First DOE. Only C is significant.

2023-01-19 11:37:21 +08:00 · 2023-01-19 11:37:21 +08:00 · f4dc9f7783
parent 9de2115ce8
commit f4dc9f7783
7 changed files with 199 additions and 10 deletions
--- a/anova.py
+++ b/anova.py
@ -0,0 +1,81 @@
+import numpy as np
+import pandas as pd
+from scipy.stats import f
+
+
+def do_print(lst_value, str_col):
+    str_data = '\t'.join([str(round(e, 4 if 'P value' in str_col else 3)) for e in lst_value])
+    print(f'{str_col}\t{str_data}')
+
+
+def anova(lst_col_seg, n_level, oa_file, result_file):
+    df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
+    df_res = pd.read_excel(result_file, sheet_name=0, engine='openpyxl', index_col=0)
+    assert df_res.shape[1] == sum(lst_col_seg), 'the column number is wrong'
+    assert df_oa.shape[1] == lst_col_seg[0] + lst_col_seg[1], 'the column number is wrong'
+    lst_head = [f"{idx+1}_{ind_name}" for idx, ind_name in enumerate(df_res.columns)]
+
+    n_col_input = lst_col_seg[0] + lst_col_seg[1]
+    n_exp_row = df_res.shape[0]
+    n_degree_error = n_exp_row - 1 - (n_level - 1) * lst_col_seg[0]
+
+    df_output = df_res.iloc[:, n_col_input:]
+
+    print("Source\tSource\t" + '\t'.join(lst_head[:lst_col_seg[0]]) + "\te")
+    print("DOF\tDOF\t" + '\t'.join([str(n_level-1)] * lst_col_seg[0]) + f"\t{n_degree_error}")
+
+    lst_report = []
+
+    for idx_col in range(lst_col_seg[2]):
+        str_ind_name = lst_head[idx_col+n_col_input]
+
+        df_y_col = df_output.iloc[:, idx_col]  # the y column
+        # sum_y_2 = np.sum(np.power(df_y_col, 2))  # sum(y^2)
+        df_y_col_repeated = np.tile(df_y_col, (n_col_input, 1)).T  # repeat the y column
+        big_t = df_y_col.sum()  # the big T
+
+        # generate T1, ..., T(n_levels)
+        lst_2d_big_t = []
+        for level in range(n_level):
+            arr_big_t = np.sum(df_y_col_repeated * np.where(df_oa == level, 1, 0), axis=0)
+            lst_2d_big_t.append(arr_big_t.tolist())
+        arr_big_t_2 = np.power(np.array(lst_2d_big_t), 2)
+        arr_s = np.sum(arr_big_t_2, axis=0) / (n_exp_row / n_level) - big_t * big_t / n_exp_row
+        assert arr_s.size == n_col_input, 'wrong arr_s size'
+        # sum_s = np.sum(arr_s)
+
+        # so far, the first table is computed. Now, compute the second table
+        df_s = pd.DataFrame(arr_s.reshape((1, n_col_input)), columns=lst_head[:n_col_input])
+        do_print(arr_s.tolist(), f'{str_ind_name}\tS')
+
+        df_s_non_error = df_s.iloc[:, :lst_col_seg[0]] / (n_level - 1)
+        ms_of_error = df_s.iloc[:, lst_col_seg[0]:].sum().sum() / n_degree_error
+
+        do_print(df_s_non_error.values.tolist()[0] + [ms_of_error], f'{str_ind_name}\tMS')
+
+        arr_f = df_s_non_error / ms_of_error
+        do_print(arr_f.values.tolist()[0], f'{str_ind_name}\tF ratio')
+
+        alpha = 0.1
+        arr_p_value = f.sf(arr_f, n_level - 1, n_degree_error)
+        do_print(arr_p_value.tolist()[0], f'{str_ind_name}\tP value')
+
+        lst_sig = [c for c, p in zip(lst_head[:lst_col_seg[0]], arr_p_value[0].tolist()) if p < alpha]
+        # print(df_output.columns[idx_col], arr_p_value, lst_sig)
+        if len(lst_sig) > 0:
+            lst_report.append(f"For indicator {str_ind_name}, the sig factors are {lst_sig}")
+
+    for s in lst_report:
+        print(s)
+
+
+if __name__ == '__main__':
+    # first test
+    the_lst_col_seg = [5, 1, 5]  # 11 factors, 2 for error, and 6 indicators
+    the_n_level = 5
+    anova(the_lst_col_seg, the_n_level, "oa25.txt", "result/experiment_result_25.xlsx")
+
+    # second test
+    # the_lst_col_seg = [3, 1, 1]  # 11 factors, 2 for error, and 6 indicators
+    # the_n_level = 3
+    # anova(the_lst_col_seg, the_n_level, "oaL9-3-4.csv", "result.xlsx")
--- a/anova.xlsx
+++ b/anova.xlsx
--- a/env.py
+++ b/env.py
@ -13,6 +13,10 @@ class Env(ap.Model):
    float_market_size: float
    # percent_rh: float
    percent_search: float
+
+    is_RH_ratio: float
+    is_FH_ratio: float
+
    n_worker: int
    n_firm: int
    e_revenue: float
@ -31,11 +35,15 @@ class Env(ap.Model):
    out_f_gini_profit: float
    out_w_percent_hired: float

-    def setup(self):
+    def __init__(self, dct_all):
+        super().__init__()
+
        # 工作人员、企业数量、搜寻企业数量赋值
-        self.n_worker = self.p.n_worker
-        self.n_firm = self.p.n_firm
-        self.percent_search = self.p.percent_search
+        self.n_worker = int(dct_all['n_worker'])
+        self.n_firm = int(dct_all['n_firm'])
+        self.percent_search = float(dct_all['percent_search'])
+        self.is_RH_ratio = float(dct_all['is_RH_ratio'])
+        self.is_FH_ratio = float(dct_all['is_FH_ratio'])
        # 工人、企业列表
        self.a_lst_worker = ap.AgentList(self)
        self.a_lst_firm = ap.AgentList(self)
@ -44,15 +52,18 @@ class Env(ap.Model):
        # 在工人列表中添加工人
        for i in range(self.n_worker):
            # 初始化 worker agent，并把alpha属性传过去
-            w = WorkerAgent(self, self.p.alpha)
+            w = WorkerAgent(self, float(dct_all['alpha']))
            self.a_lst_worker.append(w)

        # 在企业列表中添加企业，放入一个is_RH_ratio, 即有多大比例的企业是属于RH类型的
        for i in range(self.n_firm):
            # 对于企业属性true or false 的判断， 影响到firm 板块下， self.s_IsRH = is_RH 语句的判断
-            f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1))
+            f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1))
            self.a_lst_firm.append(f)

+        self.running = True
+        self.t = 0
+
    def update_e_revenue(self):
        self.e_revenue += 0.01 * self.e_revenue

@ -79,9 +90,9 @@ class Env(ap.Model):
        self.update()

        if self.t == 200:
-            self.stop()
-            # self.picture_out()
-        # pass
+            self.running = False
+        else:
+            self.t += 1

    def update(self):
        lst_salary = []
@ -123,7 +134,7 @@ class Env(ap.Model):
                    worker.working_firm = None
                self.a_lst_firm.remove(f)
                del f
-                new_f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1))
+                new_f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1))
                self.a_lst_firm.append(new_f)
            else:
                if f.s_profit < 0:
--- a/experiment.py
+++ b/experiment.py
@ -0,0 +1,67 @@
+import numpy as np
+import pandas as pd
+from env import Env
+import datetime
+
+
+idx_start = 10  # first index is 1 !
+
+n_sample = 30
+
+df_xv = pd.read_csv("xv.csv", header=None, index_col=None).transpose()
+lst_xv_keys = [
+    'alpha',
+    'percent_search',
+    'is_RH_ratio',
+    'is_FH_ratio'
+]
+df_xv.columns = lst_xv_keys
+
+df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
+n_row, n_col = df_oa.shape
+model_para = {
+    "n_worker": 1000,
+    "n_firm": 100
+}
+
+lst_op_key = ['out_w_avg_salary', 'out_w_gini_salary', 'out_f_avg_profit', 'out_f_gini_profit', 'out_w_percent_hired']
+lst_2d_op_avg = []
+lst_2d_xv = []
+for idx_row in range(idx_start-1, n_row, 1):
+    print(f"Running the {idx_row + 1}-th experiment at {datetime.datetime.now()}.")
+    lst_value = []
+    for idx_col, k in enumerate(lst_xv_keys):
+        oa_idx_level = int(df_oa.iat[idx_row, idx_col])
+        lst_value.append(df_xv.iloc[oa_idx_level][k])
+    lst_2d_xv.append(lst_value)
+    dct_merge = {**model_para, **dict(zip(lst_xv_keys, lst_value))}
+    # pprint.pprint(dct_merge)
+    lst_2d_op = []
+
+    for i in range(n_sample):
+        print(f"-- {i + 1}-th sample at {datetime.datetime.now()}.")
+        the_model = Env(dct_merge)
+
+        while 1:
+            the_model.step()
+            if not the_model.running:
+                break
+
+        lst_2d_op.append([the_model.out_w_avg_salary, the_model.out_w_gini_salary,
+                          the_model.out_f_avg_profit, the_model.out_f_gini_profit,
+                          the_model.out_w_percent_hired])
+
+    arr_op = np.array(lst_2d_op)
+    lst_2d_op_avg.append(arr_op.mean(axis=0).tolist())
+
+    # these codes below should be outside of loop. but temply inside
+    arr_op_avg = np.array(lst_2d_op_avg)
+
+    df_final = pd.concat([pd.DataFrame(lst_2d_xv, columns=lst_xv_keys).reset_index(drop=True),
+                          df_oa.iloc[:, len(lst_xv_keys):].reset_index(drop=True)], axis=1)
+
+    df_final = pd.concat([df_final.reset_index(drop=True),
+                          pd.DataFrame(lst_2d_op_avg, columns=lst_op_key).reset_index(drop=True)], axis=1)
+
+    df_final.to_excel(f'result/experiment_result_{idx_row + 1}.xlsx')
+    df_final.to_csv(f'result/experiment_result_{idx_row + 1}.csv')
--- a/result/experiment_result_25.csv
+++ b/result/experiment_result_25.csv
@ -0,0 +1,26 @@
+,alpha,percent_search,is_RH_ratio,is_FH_ratio,4,5,out_w_avg_salary,out_w_gini_salary,out_f_avg_profit,out_f_gini_profit,out_w_percent_hired
+0,0.3,0.8,0.75,0.5,0,0,3264853080.7861195,0.07448611065757892,27068180827.948566,0.028471074467998493,0.9926999999999998
+1,0.5,0.1,0.5,0.5,3,4,3637864305.7383804,0.07297697986009385,30978395677.329544,0.025035570583021458,0.9932
+2,0.5,0.2,1.0,0.75,4,1,2179839879.06716,0.09840417843256091,14870250970.957703,0.09615406368119288,0.9886333333333329
+3,0.5,0.3,0.75,0.25,1,2,3294909225.056388,0.07458035794885493,27529372104.086937,0.027549550048933245,0.9904999999999996
+4,0.5,0.6,0.25,0.0,2,3,3716906250.022679,0.08529021609092702,30937889107.04533,0.025723945674350865,0.9942999999999997
+5,0.5,0.8,0.0,1.0,1,1,3869912345.136378,0.09546657392456424,32080217202.03705,0.02533111889073941,1.0
+6,0.7,0.1,0.75,0.75,0,3,3323780234.219529,0.07324360974831064,27481709495.610508,0.02827211821311022,0.9934666666666666
+7,0.7,0.2,0.0,0.25,3,2,3890360391.8347297,0.09312614979722612,32025419370.64081,0.027331743681722087,0.9998666666666668
+8,0.7,0.3,0.25,1.0,2,4,3724730633.529055,0.08585608644546336,30966818797.24322,0.02556013680036385,0.9997333333333333
+9,0.7,0.6,1.0,0.5,4,0,2059080141.3863091,0.13011386291850205,11919309572.784859,0.17383571880300247,0.9850999999999996
+10,0.7,0.8,0.5,0.0,2,2,3558046321.01991,0.07852769877145456,29326434059.91672,0.02645991757420794,0.9920333333333334
+11,0.9,0.1,1.0,1.0,1,0,2217056469.2871513,0.09085566892663295,14019663732.042246,0.10640474650259805,0.9943333333333331
+12,0.9,0.2,0.75,0.0,0,4,3288880578.2219296,0.07786092688764147,27583988860.902683,0.028509652392809604,0.9885666666666665
+13,0.9,0.3,0.0,0.5,4,3,3879623567.573062,0.09407352725749332,32151544223.643482,0.026003986467554037,0.9999666666666667
+14,0.9,0.6,0.5,0.25,3,1,3577219455.771972,0.0785081831642713,29903884548.088444,0.02491509062583037,0.9924
+15,0.9,0.8,0.25,0.75,3,3,3710277766.924487,0.08632310052044662,30554824167.139378,0.026183286845803396,0.9967333333333335
+16,,,,,4,2,,,,,
+17,,,,,2,0,,,,,
+18,,,,,0,1,,,,,
+19,,,,,1,4,,,,,
+20,,,,,4,4,,,,,
+21,,,,,2,1,,,,,
+22,,,,,1,3,,,,,
+23,,,,,3,0,,,,,
+24,,,,,0,2,,,,,
--- a/result/experiment_result_25.xlsx
+++ b/result/experiment_result_25.xlsx
--- a/xv.csv
+++ b/xv.csv
@ -0,0 +1,4 @@
+0.1, 0.3, 0.5, 0.7, 0.9
+0.1, 0.2, 0.3, 0.6, 0.8
+0, 0.25, 0.5, 0.75, 1
+0, 0.25, 0.5, 0.75, 1