diff --git a/anova.py b/anova.py new file mode 100644 index 0000000..0a0f382 --- /dev/null +++ b/anova.py @@ -0,0 +1,81 @@ +import numpy as np +import pandas as pd +from scipy.stats import f + + +def do_print(lst_value, str_col): + str_data = '\t'.join([str(round(e, 4 if 'P value' in str_col else 3)) for e in lst_value]) + print(f'{str_col}\t{str_data}') + + +def anova(lst_col_seg, n_level, oa_file, result_file): + df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6) + df_res = pd.read_excel(result_file, sheet_name=0, engine='openpyxl', index_col=0) + assert df_res.shape[1] == sum(lst_col_seg), 'the column number is wrong' + assert df_oa.shape[1] == lst_col_seg[0] + lst_col_seg[1], 'the column number is wrong' + lst_head = [f"{idx+1}_{ind_name}" for idx, ind_name in enumerate(df_res.columns)] + + n_col_input = lst_col_seg[0] + lst_col_seg[1] + n_exp_row = df_res.shape[0] + n_degree_error = n_exp_row - 1 - (n_level - 1) * lst_col_seg[0] + + df_output = df_res.iloc[:, n_col_input:] + + print("Source\tSource\t" + '\t'.join(lst_head[:lst_col_seg[0]]) + "\te") + print("DOF\tDOF\t" + '\t'.join([str(n_level-1)] * lst_col_seg[0]) + f"\t{n_degree_error}") + + lst_report = [] + + for idx_col in range(lst_col_seg[2]): + str_ind_name = lst_head[idx_col+n_col_input] + + df_y_col = df_output.iloc[:, idx_col] # the y column + # sum_y_2 = np.sum(np.power(df_y_col, 2)) # sum(y^2) + df_y_col_repeated = np.tile(df_y_col, (n_col_input, 1)).T # repeat the y column + big_t = df_y_col.sum() # the big T + + # generate T1, ..., T(n_levels) + lst_2d_big_t = [] + for level in range(n_level): + arr_big_t = np.sum(df_y_col_repeated * np.where(df_oa == level, 1, 0), axis=0) + lst_2d_big_t.append(arr_big_t.tolist()) + arr_big_t_2 = np.power(np.array(lst_2d_big_t), 2) + arr_s = np.sum(arr_big_t_2, axis=0) / (n_exp_row / n_level) - big_t * big_t / n_exp_row + assert arr_s.size == n_col_input, 'wrong arr_s size' + # sum_s = np.sum(arr_s) + + # so far, the first table is computed. Now, compute the second table + df_s = pd.DataFrame(arr_s.reshape((1, n_col_input)), columns=lst_head[:n_col_input]) + do_print(arr_s.tolist(), f'{str_ind_name}\tS') + + df_s_non_error = df_s.iloc[:, :lst_col_seg[0]] / (n_level - 1) + ms_of_error = df_s.iloc[:, lst_col_seg[0]:].sum().sum() / n_degree_error + + do_print(df_s_non_error.values.tolist()[0] + [ms_of_error], f'{str_ind_name}\tMS') + + arr_f = df_s_non_error / ms_of_error + do_print(arr_f.values.tolist()[0], f'{str_ind_name}\tF ratio') + + alpha = 0.1 + arr_p_value = f.sf(arr_f, n_level - 1, n_degree_error) + do_print(arr_p_value.tolist()[0], f'{str_ind_name}\tP value') + + lst_sig = [c for c, p in zip(lst_head[:lst_col_seg[0]], arr_p_value[0].tolist()) if p < alpha] + # print(df_output.columns[idx_col], arr_p_value, lst_sig) + if len(lst_sig) > 0: + lst_report.append(f"For indicator {str_ind_name}, the sig factors are {lst_sig}") + + for s in lst_report: + print(s) + + +if __name__ == '__main__': + # first test + the_lst_col_seg = [5, 1, 5] # 11 factors, 2 for error, and 6 indicators + the_n_level = 5 + anova(the_lst_col_seg, the_n_level, "oa25.txt", "result/experiment_result_25.xlsx") + + # second test + # the_lst_col_seg = [3, 1, 1] # 11 factors, 2 for error, and 6 indicators + # the_n_level = 3 + # anova(the_lst_col_seg, the_n_level, "oaL9-3-4.csv", "result.xlsx") diff --git a/anova.xlsx b/anova.xlsx new file mode 100644 index 0000000..a9a53ca Binary files /dev/null and b/anova.xlsx differ diff --git a/env.py b/env.py index f099617..f19f0ed 100644 --- a/env.py +++ b/env.py @@ -13,6 +13,10 @@ class Env(ap.Model): float_market_size: float # percent_rh: float percent_search: float + + is_RH_ratio: float + is_FH_ratio: float + n_worker: int n_firm: int e_revenue: float @@ -31,11 +35,15 @@ class Env(ap.Model): out_f_gini_profit: float out_w_percent_hired: float - def setup(self): + def __init__(self, dct_all): + super().__init__() + # 工作人员、企业数量、搜寻企业数量赋值 - self.n_worker = self.p.n_worker - self.n_firm = self.p.n_firm - self.percent_search = self.p.percent_search + self.n_worker = int(dct_all['n_worker']) + self.n_firm = int(dct_all['n_firm']) + self.percent_search = float(dct_all['percent_search']) + self.is_RH_ratio = float(dct_all['is_RH_ratio']) + self.is_FH_ratio = float(dct_all['is_FH_ratio']) # 工人、企业列表 self.a_lst_worker = ap.AgentList(self) self.a_lst_firm = ap.AgentList(self) @@ -44,15 +52,18 @@ class Env(ap.Model): # 在工人列表中添加工人 for i in range(self.n_worker): # 初始化 worker agent,并把alpha属性传过去 - w = WorkerAgent(self, self.p.alpha) + w = WorkerAgent(self, float(dct_all['alpha'])) self.a_lst_worker.append(w) # 在企业列表中添加企业,放入一个is_RH_ratio, 即有多大比例的企业是属于RH类型的 for i in range(self.n_firm): # 对于企业属性true or false 的判断, 影响到firm 板块下, self.s_IsRH = is_RH 语句的判断 - f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1)) + f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1)) self.a_lst_firm.append(f) + self.running = True + self.t = 0 + def update_e_revenue(self): self.e_revenue += 0.01 * self.e_revenue @@ -79,9 +90,9 @@ class Env(ap.Model): self.update() if self.t == 200: - self.stop() - # self.picture_out() - # pass + self.running = False + else: + self.t += 1 def update(self): lst_salary = [] @@ -123,7 +134,7 @@ class Env(ap.Model): worker.working_firm = None self.a_lst_firm.remove(f) del f - new_f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1)) + new_f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1)) self.a_lst_firm.append(new_f) else: if f.s_profit < 0: diff --git a/experiment.py b/experiment.py new file mode 100644 index 0000000..0daa25a --- /dev/null +++ b/experiment.py @@ -0,0 +1,67 @@ +import numpy as np +import pandas as pd +from env import Env +import datetime + + +idx_start = 10 # first index is 1 ! + +n_sample = 30 + +df_xv = pd.read_csv("xv.csv", header=None, index_col=None).transpose() +lst_xv_keys = [ + 'alpha', + 'percent_search', + 'is_RH_ratio', + 'is_FH_ratio' +] +df_xv.columns = lst_xv_keys + +df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6) +n_row, n_col = df_oa.shape +model_para = { + "n_worker": 1000, + "n_firm": 100 +} + +lst_op_key = ['out_w_avg_salary', 'out_w_gini_salary', 'out_f_avg_profit', 'out_f_gini_profit', 'out_w_percent_hired'] +lst_2d_op_avg = [] +lst_2d_xv = [] +for idx_row in range(idx_start-1, n_row, 1): + print(f"Running the {idx_row + 1}-th experiment at {datetime.datetime.now()}.") + lst_value = [] + for idx_col, k in enumerate(lst_xv_keys): + oa_idx_level = int(df_oa.iat[idx_row, idx_col]) + lst_value.append(df_xv.iloc[oa_idx_level][k]) + lst_2d_xv.append(lst_value) + dct_merge = {**model_para, **dict(zip(lst_xv_keys, lst_value))} + # pprint.pprint(dct_merge) + lst_2d_op = [] + + for i in range(n_sample): + print(f"-- {i + 1}-th sample at {datetime.datetime.now()}.") + the_model = Env(dct_merge) + + while 1: + the_model.step() + if not the_model.running: + break + + lst_2d_op.append([the_model.out_w_avg_salary, the_model.out_w_gini_salary, + the_model.out_f_avg_profit, the_model.out_f_gini_profit, + the_model.out_w_percent_hired]) + + arr_op = np.array(lst_2d_op) + lst_2d_op_avg.append(arr_op.mean(axis=0).tolist()) + + # these codes below should be outside of loop. but temply inside + arr_op_avg = np.array(lst_2d_op_avg) + + df_final = pd.concat([pd.DataFrame(lst_2d_xv, columns=lst_xv_keys).reset_index(drop=True), + df_oa.iloc[:, len(lst_xv_keys):].reset_index(drop=True)], axis=1) + + df_final = pd.concat([df_final.reset_index(drop=True), + pd.DataFrame(lst_2d_op_avg, columns=lst_op_key).reset_index(drop=True)], axis=1) + + df_final.to_excel(f'result/experiment_result_{idx_row + 1}.xlsx') + df_final.to_csv(f'result/experiment_result_{idx_row + 1}.csv') diff --git a/result/experiment_result_25.csv b/result/experiment_result_25.csv new file mode 100644 index 0000000..be7782d --- /dev/null +++ b/result/experiment_result_25.csv @@ -0,0 +1,26 @@ +,alpha,percent_search,is_RH_ratio,is_FH_ratio,4,5,out_w_avg_salary,out_w_gini_salary,out_f_avg_profit,out_f_gini_profit,out_w_percent_hired +0,0.3,0.8,0.75,0.5,0,0,3264853080.7861195,0.07448611065757892,27068180827.948566,0.028471074467998493,0.9926999999999998 +1,0.5,0.1,0.5,0.5,3,4,3637864305.7383804,0.07297697986009385,30978395677.329544,0.025035570583021458,0.9932 +2,0.5,0.2,1.0,0.75,4,1,2179839879.06716,0.09840417843256091,14870250970.957703,0.09615406368119288,0.9886333333333329 +3,0.5,0.3,0.75,0.25,1,2,3294909225.056388,0.07458035794885493,27529372104.086937,0.027549550048933245,0.9904999999999996 +4,0.5,0.6,0.25,0.0,2,3,3716906250.022679,0.08529021609092702,30937889107.04533,0.025723945674350865,0.9942999999999997 +5,0.5,0.8,0.0,1.0,1,1,3869912345.136378,0.09546657392456424,32080217202.03705,0.02533111889073941,1.0 +6,0.7,0.1,0.75,0.75,0,3,3323780234.219529,0.07324360974831064,27481709495.610508,0.02827211821311022,0.9934666666666666 +7,0.7,0.2,0.0,0.25,3,2,3890360391.8347297,0.09312614979722612,32025419370.64081,0.027331743681722087,0.9998666666666668 +8,0.7,0.3,0.25,1.0,2,4,3724730633.529055,0.08585608644546336,30966818797.24322,0.02556013680036385,0.9997333333333333 +9,0.7,0.6,1.0,0.5,4,0,2059080141.3863091,0.13011386291850205,11919309572.784859,0.17383571880300247,0.9850999999999996 +10,0.7,0.8,0.5,0.0,2,2,3558046321.01991,0.07852769877145456,29326434059.91672,0.02645991757420794,0.9920333333333334 +11,0.9,0.1,1.0,1.0,1,0,2217056469.2871513,0.09085566892663295,14019663732.042246,0.10640474650259805,0.9943333333333331 +12,0.9,0.2,0.75,0.0,0,4,3288880578.2219296,0.07786092688764147,27583988860.902683,0.028509652392809604,0.9885666666666665 +13,0.9,0.3,0.0,0.5,4,3,3879623567.573062,0.09407352725749332,32151544223.643482,0.026003986467554037,0.9999666666666667 +14,0.9,0.6,0.5,0.25,3,1,3577219455.771972,0.0785081831642713,29903884548.088444,0.02491509062583037,0.9924 +15,0.9,0.8,0.25,0.75,3,3,3710277766.924487,0.08632310052044662,30554824167.139378,0.026183286845803396,0.9967333333333335 +16,,,,,4,2,,,,, +17,,,,,2,0,,,,, +18,,,,,0,1,,,,, +19,,,,,1,4,,,,, +20,,,,,4,4,,,,, +21,,,,,2,1,,,,, +22,,,,,1,3,,,,, +23,,,,,3,0,,,,, +24,,,,,0,2,,,,, diff --git a/result/experiment_result_25.xlsx b/result/experiment_result_25.xlsx new file mode 100644 index 0000000..9b07d16 Binary files /dev/null and b/result/experiment_result_25.xlsx differ diff --git a/xv.csv b/xv.csv new file mode 100644 index 0000000..731f9bf --- /dev/null +++ b/xv.csv @@ -0,0 +1,4 @@ +0.1, 0.3, 0.5, 0.7, 0.9 +0.1, 0.2, 0.3, 0.6, 0.8 +0, 0.25, 0.5, 0.75, 1 +0, 0.25, 0.5, 0.75, 1 \ No newline at end of file