diff --git a/.vscode/launch.json b/.vscode/launch.json index 80f4077..87ed9da 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,7 +12,7 @@ "console": "integratedTerminal", "justMyCode": true, "args": [ - "--exp", "without_exp", + "--exp", "with_exp", "--reset_db", "True", "--job", "24" ] diff --git a/SQL_export_high_risk_setting.sql b/SQL_export_high_risk_setting.sql index abf784a..cfa3a17 100644 --- a/SQL_export_high_risk_setting.sql +++ b/SQL_export_high_risk_setting.sql @@ -1,19 +1,15 @@ -select count(*) from iiabmdb.without_exp_sample; - -select distinct s_id from iiabmdb.without_exp_result where ts > 0; -select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id order by max_ts; -select e_id, count(id) as count, max(max_ts) as max_max_ts from iiabmdb.without_exp_sample as a -inner join (select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id) as b -on a.id = b.s_id +select e_id, n_disrupt_sample, total_n_disrupt_firm_prod_experiment, dct_lst_init_disrupt_firm_prod from iiabmdb.without_exp_experiment as experiment +inner join ( +select e_id, count(id) as n_disrupt_sample, sum(n_disrupt_firm_prod_sample) as total_n_disrupt_firm_prod_experiment from iiabmdb.without_exp_sample as sample +inner join ( +select * from +(select s_id, COUNT(DISTINCT id_firm, id_product) as n_disrupt_firm_prod_sample from iiabmdb.without_exp_result group by s_id +) as count_disrupt_firm_prod_sample +where n_disrupt_firm_prod_sample > 1 +) as disrupt_sample +on sample.id = disrupt_sample.s_id group by e_id -order by count desc; - -select e_id, count, max_max_ts, dct_lst_init_remove_firm_prod from iiabmdb.without_exp_experiment as a -inner join -(select e_id, count(id) as count, max(max_ts) as max_max_ts from iiabmdb.without_exp_sample as a -inner join (select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id) as b -on a.id = b.s_id -group by e_id) as b -on a.id = b.e_id -where count > 10 -order by count desc; +) as disrupt_experiment +on experiment.id = disrupt_experiment.e_id +order by n_disrupt_sample desc, total_n_disrupt_firm_prod_experiment desc +limit 0, 95; \ No newline at end of file diff --git a/SQL_find_high_risk_setting.sql b/SQL_find_high_risk_setting.sql new file mode 100644 index 0000000..21e7d21 --- /dev/null +++ b/SQL_find_high_risk_setting.sql @@ -0,0 +1,44 @@ +select max(ts_done) from iiabmdb.without_exp_sample; +select min(ts_done) from iiabmdb.without_exp_sample; +select count(*) from iiabmdb.without_exp_sample; + +select distinct s_id from iiabmdb.without_exp_result where ts > 0; +select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id order by max_ts; +select e_id, count(id) as count, max(max_ts) as max_max_ts from iiabmdb.without_exp_sample as a +inner join (select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id) as b +on a.id = b.s_id +group by e_id +order by count desc; + +select e_id, count, max_max_ts, dct_lst_init_remove_firm_prod from iiabmdb.without_exp_experiment as a +inner join +(select e_id, count(id) as count, max(max_ts) as max_max_ts from iiabmdb.without_exp_sample as a +inner join (select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id) as b +on a.id = b.s_id +group by e_id) as b +on a.id = b.e_id +where count > 10 +order by count desc; + +select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id; +select * from iiabmdb.without_exp_result order by s_id limit 0,50; +select s_id, COUNT(DISTINCT id_firm, id_product) as n_disrupt_firm_prod from iiabmdb.without_exp_result group by s_id; +select * from +(select s_id, COUNT(DISTINCT id_firm, id_product) as n_disrupt_firm_prod_sample from iiabmdb.without_exp_result group by s_id) as count_disrupt_firm_prod_sample +where n_disrupt_firm_prod_sample > 1; + +select e_id, n_disrupt_sample, total_n_disrupt_firm_prod_experiment, dct_lst_init_disrupt_firm_prod from iiabmdb.without_exp_experiment as experiment +inner join ( +select e_id, count(id) as n_disrupt_sample, sum(n_disrupt_firm_prod_sample) as total_n_disrupt_firm_prod_experiment from iiabmdb.without_exp_sample as sample +inner join ( +select * from +(select s_id, COUNT(DISTINCT id_firm, id_product) as n_disrupt_firm_prod_sample from iiabmdb.without_exp_result group by s_id +) as count_disrupt_firm_prod_sample +where n_disrupt_firm_prod_sample > 1 +) as disrupt_sample +on sample.id = disrupt_sample.s_id +group by e_id +) as disrupt_experiment +on experiment.id = disrupt_experiment.e_id +order by n_disrupt_sample desc, total_n_disrupt_firm_prod_experiment desc +limit 0, 95; # 20% of 475 experiment \ No newline at end of file diff --git a/__pycache__/controller_db.cpython-38.pyc b/__pycache__/controller_db.cpython-38.pyc index b4a7e5a..a6891e5 100644 Binary files a/__pycache__/controller_db.cpython-38.pyc and b/__pycache__/controller_db.cpython-38.pyc differ diff --git a/__pycache__/firm.cpython-38.pyc b/__pycache__/firm.cpython-38.pyc index fa98168..26921ff 100644 Binary files a/__pycache__/firm.cpython-38.pyc and b/__pycache__/firm.cpython-38.pyc differ diff --git a/__pycache__/model.cpython-38.pyc b/__pycache__/model.cpython-38.pyc index 5a359a9..1154905 100644 Binary files a/__pycache__/model.cpython-38.pyc and b/__pycache__/model.cpython-38.pyc differ diff --git a/controller_db.py b/controller_db.py index 2eeea65..fbd227a 100644 --- a/controller_db.py +++ b/controller_db.py @@ -54,18 +54,8 @@ class ControllerDB: # fill dct_lst_init_disrupt_firm_prod list_dct = [] if self.is_with_exp: - str_sql = "select e_id, count, max_max_ts, " \ - "dct_lst_init_disrupt_firm_prod from " \ - "iiabmdb.without_exp_experiment as a " \ - "inner join " \ - "(select e_id, count(id) as count, max(max_ts) as max_max_ts "\ - "from iiabmdb.without_exp_sample as a " \ - "inner join (select s_id, max(ts) as max_ts from " \ - "iiabmdb.without_exp_result where ts > 0 group by s_id) as b "\ - "on a.id = b.s_id " \ - "group by e_id) as b " \ - "on a.id = b.e_id " \ - "order by count desc;" + with open('SQL_export_high_risk_setting.sql', 'r') as f: + str_sql = f.read() result = pd.read_sql(sql=str_sql, con=engine) result['dct_lst_init_disrupt_firm_prod'] = \ result['dct_lst_init_disrupt_firm_prod'].apply( diff --git a/oa_with_exp.xlsx b/oa_with_exp.xlsx index a880df4..1d16ed2 100644 Binary files a/oa_with_exp.xlsx and b/oa_with_exp.xlsx differ diff --git a/test.ipynb b/test.ipynb index e52427d..e417a09 100644 --- a/test.ipynb +++ b/test.ipynb @@ -357,6 +357,166 @@ "\n", "lst[-5:]" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | e_id | \n", + "n_disrupt_sample | \n", + "total_n_disrupt_firm_prod_experiment | \n", + "dct_lst_init_disrupt_firm_prod | \n", + "
---|---|---|---|---|
0 | \n", + "383 | \n", + "50 | \n", + "300.0 | \n", + "b'\\x80\\x05\\x95\\x17\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
1 | \n", + "227 | \n", + "50 | \n", + "250.0 | \n", + "b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
2 | \n", + "83 | \n", + "50 | \n", + "200.0 | \n", + "b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
3 | \n", + "135 | \n", + "50 | \n", + "200.0 | \n", + "b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
4 | \n", + "179 | \n", + "50 | \n", + "200.0 | \n", + "b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
90 | \n", + "76 | \n", + "24 | \n", + "56.0 | \n", + "b'\\x80\\x05\\x95\\x14\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
91 | \n", + "89 | \n", + "24 | \n", + "54.0 | \n", + "b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
92 | \n", + "90 | \n", + "24 | \n", + "54.0 | \n", + "b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
93 | \n", + "335 | \n", + "24 | \n", + "54.0 | \n", + "b'\\x80\\x05\\x95\\x17\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
94 | \n", + "449 | \n", + "24 | \n", + "53.0 | \n", + "b'\\x80\\x05\\x95\\x15\\x00\\x00\\x00\\x00\\x00\\x00\\x00... | \n", + "
95 rows × 4 columns
\n", + "