diff --git a/.vscode/launch.json b/.vscode/launch.json index 596058b..e7403d9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,9 +12,8 @@ "console": "integratedTerminal", "justMyCode": true, "args": [ - "--exp", "without_exp", + "--exp", "with_exp", "--job", "24", - "--reset_db", "True", ] } ] diff --git a/SQL_running_check.sql b/SQL_running_check.sql index 2145b42..672e031 100644 --- a/SQL_running_check.sql +++ b/SQL_running_check.sql @@ -1,4 +1,34 @@ select id, e_id, idx_sample, seed, ts_done from iiabmdb.without_exp_sample where is_done_flag != -1 order by ts_done; select count(id) from iiabmdb.without_exp_sample where is_done_flag != -1; -select count(id) from iiabmdb.without_exp_sample; -select count(id) from iiabmdb.without_exp_sample where is_done_flag != -1; \ No newline at end of file +select count(id) from iiabmdb.without_exp_sample‘; +select count(id) from iiabmdb.with_exp_sample where is_done_flag != -1; + +select * from +(select distinct idx_scenario, n_max_trial, crit_supplier, firm_pref_request, +firm_pref_accept, netw_pref_cust_n, netw_pref_cust_size, cap_limit, +diff_new_conn, diff_remove from iiabmdb.with_exp_experiment) as a +inner join +( +select idx_scenario, +sum(n_disrupt_s) as n_disrupt_s, sum(n_disrupt_t) as n_disrupt_t from +iiabmdb.with_exp_experiment as a +inner join +( +select e_id, count(n_s_disrupt_t) as n_disrupt_s, +sum(n_s_disrupt_t) as n_disrupt_t from +iiabmdb.with_exp_sample as a +inner join +(select a.s_id as s_id, count(id) as n_s_disrupt_t from +iiabmdb.with_exp_result as a +inner join +(select distinct s_id from iiabmdb.with_exp_result where ts > 0) as b +on a.s_id = b.s_id +group by s_id +) as b +on a.id = b.s_id +group by e_id +) as b +on a.id = b.e_id +group by idx_scenario) as b +on a.idx_scenario = b.idx_scenario; + diff --git a/__pycache__/controller_db.cpython-38.pyc b/__pycache__/controller_db.cpython-38.pyc index 65bf1ad..c056717 100644 Binary files a/__pycache__/controller_db.cpython-38.pyc and b/__pycache__/controller_db.cpython-38.pyc differ diff --git a/__pycache__/model.cpython-38.pyc b/__pycache__/model.cpython-38.pyc index deb91cb..af723fe 100644 Binary files a/__pycache__/model.cpython-38.pyc and b/__pycache__/model.cpython-38.pyc differ diff --git a/analysis/experiment_result.csv b/analysis/experiment_result.csv new file mode 100644 index 0000000..7708325 --- /dev/null +++ b/analysis/experiment_result.csv @@ -0,0 +1,28 @@ +,n_max_trial,crit_supplier,firm_pref_request,firm_pref_accept,netw_pref_cust_n,netw_pref_cust_size,cap_limit,diff_new_conn,diff_remove,X10,X11,X12,X13,n_disrupt_s,n_disrupt_t +0,15,2.0,2.0,2.0,0.5,2.0,4,0.5,0.5,0,0,0,0,888.0,2114.0 +1,15,2.0,2.0,2.0,1.0,1.0,2,1.0,1.0,1,1,1,1,1297.0,2810.0 +2,15,2.0,2.0,2.0,2.0,0.5,1,2.0,2.0,2,2,2,2,1826.0,3809.0 +3,15,1.0,1.0,1.0,0.5,2.0,4,1.0,1.0,1,2,2,2,1372.0,3055.0 +4,15,1.0,1.0,1.0,1.0,1.0,2,2.0,2.0,2,0,0,0,2118.0,4519.0 +5,15,1.0,1.0,1.0,2.0,0.5,1,0.5,0.5,0,1,1,1,815.0,2073.0 +6,15,0.5,0.5,0.5,0.5,2.0,4,2.0,2.0,2,1,1,1,2378.0,5528.0 +7,15,0.5,0.5,0.5,1.0,1.0,2,0.5,0.5,0,2,2,2,968.0,2300.0 +8,15,0.5,0.5,0.5,2.0,0.5,1,1.0,1.0,1,0,0,0,1531.0,3317.0 +9,10,2.0,1.0,0.5,0.5,1.0,1,0.5,1.0,2,0,1,2,881.0,1972.0 +10,10,2.0,1.0,0.5,1.0,0.5,4,1.0,2.0,0,1,2,0,1298.0,2763.0 +11,10,2.0,1.0,0.5,2.0,2.0,2,2.0,0.5,1,2,0,1,1717.0,3837.0 +12,10,1.0,0.5,2.0,0.5,1.0,1,1.0,2.0,0,2,0,1,1327.0,2855.0 +13,10,1.0,0.5,2.0,1.0,0.5,4,2.0,0.5,1,0,1,2,2126.0,4788.0 +14,10,1.0,0.5,2.0,2.0,2.0,2,0.5,1.0,2,1,2,0,801.0,1814.0 +15,10,0.5,2.0,1.0,0.5,1.0,1,2.0,0.5,1,1,2,0,2442.0,5980.0 +16,10,0.5,2.0,1.0,1.0,0.5,4,0.5,1.0,2,2,0,1,991.0,2186.0 +17,10,0.5,2.0,1.0,2.0,2.0,2,1.0,2.0,0,0,1,2,1311.0,2776.0 +18,5,2.0,0.5,1.0,0.5,0.5,2,0.5,2.0,1,0,2,1,879.0,1909.0 +19,5,2.0,0.5,1.0,1.0,2.0,1,1.0,0.5,2,1,0,2,1354.0,3132.0 +20,5,2.0,0.5,1.0,2.0,1.0,4,2.0,1.0,0,2,1,0,1727.0,3673.0 +21,5,1.0,2.0,0.5,0.5,0.5,2,1.0,0.5,2,2,1,0,1379.0,3184.0 +22,5,1.0,2.0,0.5,1.0,2.0,1,2.0,1.0,0,0,2,1,2145.0,4658.0 +23,5,1.0,2.0,0.5,2.0,1.0,4,0.5,2.0,1,1,0,2,810.0,1764.0 +24,5,0.5,1.0,2.0,0.5,0.5,2,2.0,1.0,0,1,0,2,2412.0,5783.0 +25,5,0.5,1.0,2.0,1.0,2.0,1,0.5,2.0,1,2,1,0,915.0,1973.0 +26,5,0.5,1.0,2.0,2.0,1.0,4,1.0,0.5,2,0,2,1,1336.0,3087.0 diff --git a/anova.py b/anova.py new file mode 100644 index 0000000..7d3e009 --- /dev/null +++ b/anova.py @@ -0,0 +1,156 @@ +import numpy as np +import pandas as pd +from orm import engine +from scipy.stats import f + + +""" + This file needs to define the info in the *main* block, + and then run the anova function. +""" + + +def do_print(lst_value, str_col): + """ + Just for friendly-looking printing + + :param lst_value: + :param str_col: + :return: + """ + str_data = '\t'.join( + [str(round(e, 4 if 'P value' in str_col else 3)) for e in lst_value]) + print(f'{str_col}\t{str_data}') + + +def anova(lst_col_seg, n_level, oa_file, result_file, alpha=0.1): + """ + Give the files and info, compute the significance of each X for each Y + + :param lst_col_seg: record the number of X, E, and Y. + :param n_level: + :param oa_file: + :param result_file: + :param alpha: significance level, usually 0.1, 0.05, 0.01 + :return: + """ + # read and check the files + df_oa = pd.read_csv("oa_with_exp.csv", index_col=None) + df_res = result_file + assert df_res.shape[1] == sum(lst_col_seg), 'the column number is wrong' + assert df_oa.shape[1] == lst_col_seg[0] + \ + lst_col_seg[1], 'the column number is wrong' + lst_head = [f"{idx+1}_{ind_name}" for idx, + ind_name in enumerate(df_res.columns)] + + # The three lines below define some coefficients for further computation + n_col_input = lst_col_seg[0] + lst_col_seg[1] + n_exp_row = df_res.shape[0] + n_degree_error = n_exp_row - 1 - (n_level - 1) * lst_col_seg[0] + + df_output = df_res.iloc[:, n_col_input:] + + print("Source\tSource\t" + '\t'.join(lst_head[:lst_col_seg[0]]) + "\te") + print("DOF\tDOF\t" + '\t'.join([str(n_level-1)] + * lst_col_seg[0]) + f"\t{n_degree_error}") + + lst_report = [] + + # start to loop each Y + for idx_col in range(lst_col_seg[2]): + str_ind_name = lst_head[idx_col+n_col_input] + + df_y_col = df_output.iloc[:, idx_col] # the y column + df_y_col_repeated = np.tile( + df_y_col, (n_col_input, 1)).T # repeat the y column + big_t = df_y_col.sum() # the big T + + # generate T1, ..., T(n_levels) + lst_2d_big_t = [] # Table 1, row 10, 11, 12 + for level in range(n_level): + arr_big_t = np.sum(df_y_col_repeated * + np.where(df_oa == level, 1, 0), axis=0) + lst_2d_big_t.append(arr_big_t.tolist()) + arr_big_t_2 = np.power(np.array(lst_2d_big_t), 2) + arr_s = np.sum(arr_big_t_2, axis=0) / (n_exp_row / n_level) - \ + big_t * big_t / n_exp_row # Table 1, last row + assert arr_s.size == n_col_input, 'wrong arr_s size' + + # so far, the first table is computed. Now, compute the second table + df_s = pd.DataFrame(arr_s.reshape((1, n_col_input)), + columns=lst_head[:n_col_input]) + do_print(arr_s.tolist(), f'{str_ind_name}\tS') # Table 2, col 2 + + df_s_non_error = df_s.iloc[:, :lst_col_seg[0]] / (n_level - 1) + ms_of_error = \ + df_s.iloc[:, lst_col_seg[0]:].sum().sum() / n_degree_error + + do_print(df_s_non_error.values.tolist()[ + 0] + [ms_of_error], f'{str_ind_name}\tMS') # Table 2, col 4 + + arr_f = df_s_non_error / ms_of_error + # Table 2, col 5 + do_print(arr_f.values.tolist()[0], f'{str_ind_name}\tF ratio') + + # from scipy.stats import f + arr_p_value = f.sf(arr_f, n_level - 1, n_degree_error) + # Table 2, col 6 + do_print(arr_p_value.tolist()[0], f'{str_ind_name}\tP value') + + lst_sig = [c for c, p in zip( + lst_head[:lst_col_seg[0]], arr_p_value[0].tolist()) if p < alpha] + + if len(lst_sig) > 0: + lst_report.append( + f"For indicator {str_ind_name}, the sig factors are {lst_sig}") + + for s in lst_report: + print(s) + + +if __name__ == '__main__': + # prep data + str_sql = """ + select * from + (select distinct idx_scenario, n_max_trial, crit_supplier, + firm_pref_request, firm_pref_accept, netw_pref_cust_n, + netw_pref_cust_size, cap_limit, diff_new_conn, diff_remove + from iiabmdb.with_exp_experiment) as a + inner join + ( + select idx_scenario, + sum(n_disrupt_s) as n_disrupt_s, sum(n_disrupt_t) as n_disrupt_t from + iiabmdb.with_exp_experiment as a + inner join + ( + select e_id, count(n_s_disrupt_t) as n_disrupt_s, + sum(n_s_disrupt_t) as n_disrupt_t from + iiabmdb.with_exp_sample as a + inner join + (select a.s_id as s_id, count(id) as n_s_disrupt_t from + iiabmdb.with_exp_result as a + inner join + (select distinct s_id from iiabmdb.with_exp_result where ts > 0) as b + on a.s_id = b.s_id + group by s_id + ) as b + on a.id = b.s_id + group by e_id + ) as b + on a.id = b.e_id + group by idx_scenario) as b + on a.idx_scenario = b.idx_scenario; + + """ + result = pd.read_sql(sql=str_sql, + con=engine) + result.drop('idx_scenario', 1, inplace=True) + df_oa = pd.read_csv("oa_with_exp.csv", index_col=None) + result = pd.concat( + [result.iloc[:, 0:9], df_oa.iloc[:, -4:], result.iloc[:, -2:]], axis=1) + result.to_csv('analysis\\experiment_result.csv') + + # 9 factors (X), 4 for error (E), and 2 indicators (Y) + the_lst_col_seg = [9, 4, 2] + the_n_level = 3 + anova(the_lst_col_seg, the_n_level, "oa25.txt", result, 0.1) diff --git a/anova.xlsx b/anova.xlsx new file mode 100644 index 0000000..e7a33ce Binary files /dev/null and b/anova.xlsx differ diff --git a/conf_db_prefix.yaml b/conf_db_prefix.yaml index 0e48fd8..1391188 100644 --- a/conf_db_prefix.yaml +++ b/conf_db_prefix.yaml @@ -1 +1 @@ -db_name_prefix: without_exp +db_name_prefix: with_exp diff --git a/controller_db.py b/controller_db.py index d28b951..5da4297 100644 --- a/controller_db.py +++ b/controller_db.py @@ -94,6 +94,7 @@ class ControllerDB: df_xv = pd.read_csv("xv.csv", index_col=None) # read the OA table df_oa = pd.read_csv("oa_with_exp.csv", index_col=None) + df_oa = df_oa.iloc[:, 0:9] for idx_scenario, row in df_oa.iterrows(): dct_exp_para = {} for idx_col, para_level in enumerate(row): diff --git a/oa_with_exp.csv b/oa_with_exp.csv index b000b5f..ec93a67 100644 --- a/oa_with_exp.csv +++ b/oa_with_exp.csv @@ -1,28 +1,28 @@ -X1,X2,X3,X4,X5,X6,X7,X8,X9 -0,0,0,0,0,0,0,0,0 -0,0,0,0,1,1,1,1,1 -0,0,0,0,2,2,2,2,2 -0,1,1,1,0,0,0,1,1 -0,1,1,1,1,1,1,2,2 -0,1,1,1,2,2,2,0,0 -0,2,2,2,0,0,0,2,2 -0,2,2,2,1,1,1,0,0 -0,2,2,2,2,2,2,1,1 -1,0,1,2,0,1,2,0,1 -1,0,1,2,1,2,0,1,2 -1,0,1,2,2,0,1,2,0 -1,1,2,0,0,1,2,1,2 -1,1,2,0,1,2,0,2,0 -1,1,2,0,2,0,1,0,1 -1,2,0,1,0,1,2,2,0 -1,2,0,1,1,2,0,0,1 -1,2,0,1,2,0,1,1,2 -2,0,2,1,0,2,1,0,2 -2,0,2,1,1,0,2,1,0 -2,0,2,1,2,1,0,2,1 -2,1,0,2,0,2,1,1,0 -2,1,0,2,1,0,2,2,1 -2,1,0,2,2,1,0,0,2 -2,2,1,0,0,2,1,2,1 -2,2,1,0,1,0,2,0,2 -2,2,1,0,2,1,0,1,0 +X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13 +0,0,0,0,0,0,0,0,0,0,0,0,0 +0,0,0,0,1,1,1,1,1,1,1,1,1 +0,0,0,0,2,2,2,2,2,2,2,2,2 +0,1,1,1,0,0,0,1,1,1,2,2,2 +0,1,1,1,1,1,1,2,2,2,0,0,0 +0,1,1,1,2,2,2,0,0,0,1,1,1 +0,2,2,2,0,0,0,2,2,2,1,1,1 +0,2,2,2,1,1,1,0,0,0,2,2,2 +0,2,2,2,2,2,2,1,1,1,0,0,0 +1,0,1,2,0,1,2,0,1,2,0,1,2 +1,0,1,2,1,2,0,1,2,0,1,2,0 +1,0,1,2,2,0,1,2,0,1,2,0,1 +1,1,2,0,0,1,2,1,2,0,2,0,1 +1,1,2,0,1,2,0,2,0,1,0,1,2 +1,1,2,0,2,0,1,0,1,2,1,2,0 +1,2,0,1,0,1,2,2,0,1,1,2,0 +1,2,0,1,1,2,0,0,1,2,2,0,1 +1,2,0,1,2,0,1,1,2,0,0,1,2 +2,0,2,1,0,2,1,0,2,1,0,2,1 +2,0,2,1,1,0,2,1,0,2,1,0,2 +2,0,2,1,2,1,0,2,1,0,2,1,0 +2,1,0,2,0,2,1,1,0,2,2,1,0 +2,1,0,2,1,0,2,2,1,0,0,2,1 +2,1,0,2,2,1,0,0,2,1,1,0,2 +2,2,1,0,0,2,1,2,1,0,1,0,2 +2,2,1,0,1,0,2,0,2,1,2,1,0 +2,2,1,0,2,1,0,1,0,2,0,2,1 diff --git a/oa_without_exp.csv b/oa_without_exp.csv index f139061..a70f612 100644 --- a/oa_without_exp.csv +++ b/oa_without_exp.csv @@ -1,2 +1,2 @@ -X1,X2,X3,X4,X5,X6,X7,X8,X9 -1,1,1,1,1,1,1,1,1 +X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13 +1,1,1,1,1,1,1,1,1,1,1,1,1 diff --git a/xv.csv b/xv.csv index 08b3cb4..de72a3f 100644 --- a/xv.csv +++ b/xv.csv @@ -1,4 +1,4 @@ n_max_trial,crit_supplier,firm_pref_request,firm_pref_accept,netw_pref_cust_n,netw_pref_cust_size,cap_limit,diff_new_conn,diff_remove 15,2,2,2,0.5,2,4,0.5,0.5 10,1,1,1,1,1,2,1,1 -5,0.5,0.5,0.5,2,0.5,0,2,2 +5,0.5,0.5,0.5,2,0.5,1,2,2