First DOE. Only C is significant.

2023-01-19 11:37:21 +08:00
parent 9de2115ce8
commit f4dc9f7783
7 changed files with 199 additions and 10 deletions
--- a/anova.py
+++ b/anova.py
@@ -0,0 +1,81 @@
 import numpy as np
 import pandas as pd
 from scipy.stats import f
 def do_print(lst_value, str_col):
    str_data = '\t'.join([str(round(e, 4 if 'P value' in str_col else 3)) for e in lst_value])
    print(f'{str_col}\t{str_data}')
 def anova(lst_col_seg, n_level, oa_file, result_file):
    df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
    df_res = pd.read_excel(result_file, sheet_name=0, engine='openpyxl', index_col=0)
    assert df_res.shape[1] == sum(lst_col_seg), 'the column number is wrong'
    assert df_oa.shape[1] == lst_col_seg[0] + lst_col_seg[1], 'the column number is wrong'
    lst_head = [f"{idx+1}_{ind_name}" for idx, ind_name in enumerate(df_res.columns)]
    n_col_input = lst_col_seg[0] + lst_col_seg[1]
    n_exp_row = df_res.shape[0]
    n_degree_error = n_exp_row - 1 - (n_level - 1) * lst_col_seg[0]
    df_output = df_res.iloc[:, n_col_input:]
    print("Source\tSource\t" + '\t'.join(lst_head[:lst_col_seg[0]]) + "\te")
    print("DOF\tDOF\t" + '\t'.join([str(n_level-1)] * lst_col_seg[0]) + f"\t{n_degree_error}")
    lst_report = []
    for idx_col in range(lst_col_seg[2]):
        str_ind_name = lst_head[idx_col+n_col_input]
        df_y_col = df_output.iloc[:, idx_col]  # the y column
        # sum_y_2 = np.sum(np.power(df_y_col, 2))  # sum(y^2)
        df_y_col_repeated = np.tile(df_y_col, (n_col_input, 1)).T  # repeat the y column
        big_t = df_y_col.sum()  # the big T
        # generate T1, ..., T(n_levels)
        lst_2d_big_t = []
        for level in range(n_level):
            arr_big_t = np.sum(df_y_col_repeated * np.where(df_oa == level, 1, 0), axis=0)
            lst_2d_big_t.append(arr_big_t.tolist())
        arr_big_t_2 = np.power(np.array(lst_2d_big_t), 2)
        arr_s = np.sum(arr_big_t_2, axis=0) / (n_exp_row / n_level) - big_t * big_t / n_exp_row
        assert arr_s.size == n_col_input, 'wrong arr_s size'
        # sum_s = np.sum(arr_s)
        # so far, the first table is computed. Now, compute the second table
        df_s = pd.DataFrame(arr_s.reshape((1, n_col_input)), columns=lst_head[:n_col_input])
        do_print(arr_s.tolist(), f'{str_ind_name}\tS')
        df_s_non_error = df_s.iloc[:, :lst_col_seg[0]] / (n_level - 1)
        ms_of_error = df_s.iloc[:, lst_col_seg[0]:].sum().sum() / n_degree_error
        do_print(df_s_non_error.values.tolist()[0] + [ms_of_error], f'{str_ind_name}\tMS')
        arr_f = df_s_non_error / ms_of_error
        do_print(arr_f.values.tolist()[0], f'{str_ind_name}\tF ratio')
        alpha = 0.1
        arr_p_value = f.sf(arr_f, n_level - 1, n_degree_error)
        do_print(arr_p_value.tolist()[0], f'{str_ind_name}\tP value')
        lst_sig = [c for c, p in zip(lst_head[:lst_col_seg[0]], arr_p_value[0].tolist()) if p < alpha]
        # print(df_output.columns[idx_col], arr_p_value, lst_sig)
        if len(lst_sig) > 0:
            lst_report.append(f"For indicator {str_ind_name}, the sig factors are {lst_sig}")
    for s in lst_report:
        print(s)
 if __name__ == '__main__':
    # first test
    the_lst_col_seg = [5, 1, 5]  # 11 factors, 2 for error, and 6 indicators
    the_n_level = 5
    anova(the_lst_col_seg, the_n_level, "oa25.txt", "result/experiment_result_25.xlsx")
    # second test
    # the_lst_col_seg = [3, 1, 1]  # 11 factors, 2 for error, and 6 indicators
    # the_n_level = 3
    # anova(the_lst_col_seg, the_n_level, "oaL9-3-4.csv", "result.xlsx")
--- a/anova.xlsx
+++ b/anova.xlsx
--- a/env.py
+++ b/env.py
@@ -13,6 +13,10 @@ class Env(ap.Model):
    float_market_size: float
    # percent_rh: float
    percent_search: float
    is_RH_ratio: float
    is_FH_ratio: float
    n_worker: int
    n_firm: int
    e_revenue: float
@@ -31,11 +35,15 @@ class Env(ap.Model):
    out_f_gini_profit: float
    out_w_percent_hired: float
-    def setup(self):
+    def __init__(self, dct_all):
        super().__init__()
        # 工作人员、企业数量、搜寻企业数量赋值
-        self.n_worker = self.p.n_worker
+        self.n_worker = int(dct_all['n_worker'])
-        self.n_firm = self.p.n_firm
+        self.n_firm = int(dct_all['n_firm'])
-        self.percent_search = self.p.percent_search
+        self.percent_search = float(dct_all['percent_search'])
        self.is_RH_ratio = float(dct_all['is_RH_ratio'])
        self.is_FH_ratio = float(dct_all['is_FH_ratio'])
        # 工人、企业列表
        self.a_lst_worker = ap.AgentList(self)
        self.a_lst_firm = ap.AgentList(self)
@@ -44,15 +52,18 @@ class Env(ap.Model):
        # 在工人列表中添加工人
        for i in range(self.n_worker):
            # 初始化 worker agent，并把alpha属性传过去
-            w = WorkerAgent(self, self.p.alpha)
+            w = WorkerAgent(self, float(dct_all['alpha']))
            self.a_lst_worker.append(w)
        # 在企业列表中添加企业，放入一个is_RH_ratio, 即有多大比例的企业是属于RH类型的
        for i in range(self.n_firm):
            # 对于企业属性true or false 的判断， 影响到firm 板块下， self.s_IsRH = is_RH 语句的判断
-            f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1))
+            f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1))
            self.a_lst_firm.append(f)
        self.running = True
        self.t = 0
    def update_e_revenue(self):
        self.e_revenue += 0.01 * self.e_revenue
@@ -79,9 +90,9 @@ class Env(ap.Model):
        self.update()
        if self.t == 200:
-            self.stop()
+            self.running = False
-            # self.picture_out()
+        else:
-        # pass
+            self.t += 1
    def update(self):
        lst_salary = []
@@ -123,7 +134,7 @@ class Env(ap.Model):
                    worker.working_firm = None
                self.a_lst_firm.remove(f)
                del f
-                new_f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1))
+                new_f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1))
                self.a_lst_firm.append(new_f)
            else:
                if f.s_profit < 0:
--- a/experiment.py
+++ b/experiment.py
@@ -0,0 +1,67 @@
 import numpy as np
 import pandas as pd
 from env import Env
 import datetime
 idx_start = 10  # first index is 1 !
 n_sample = 30
 df_xv = pd.read_csv("xv.csv", header=None, index_col=None).transpose()
 lst_xv_keys = [
    'alpha',
    'percent_search',
    'is_RH_ratio',
    'is_FH_ratio'
 ]
 df_xv.columns = lst_xv_keys
 df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
 n_row, n_col = df_oa.shape
 model_para = {
    "n_worker": 1000,
    "n_firm": 100
 }
 lst_op_key = ['out_w_avg_salary', 'out_w_gini_salary', 'out_f_avg_profit', 'out_f_gini_profit', 'out_w_percent_hired']
 lst_2d_op_avg = []
 lst_2d_xv = []
 for idx_row in range(idx_start-1, n_row, 1):
    print(f"Running the {idx_row + 1}-th experiment at {datetime.datetime.now()}.")
    lst_value = []
    for idx_col, k in enumerate(lst_xv_keys):
        oa_idx_level = int(df_oa.iat[idx_row, idx_col])
        lst_value.append(df_xv.iloc[oa_idx_level][k])
    lst_2d_xv.append(lst_value)
    dct_merge = {**model_para, **dict(zip(lst_xv_keys, lst_value))}
    # pprint.pprint(dct_merge)
    lst_2d_op = []
    for i in range(n_sample):
        print(f"-- {i + 1}-th sample at {datetime.datetime.now()}.")
        the_model = Env(dct_merge)
        while 1:
            the_model.step()
            if not the_model.running:
                break
        lst_2d_op.append([the_model.out_w_avg_salary, the_model.out_w_gini_salary,
                          the_model.out_f_avg_profit, the_model.out_f_gini_profit,
                          the_model.out_w_percent_hired])
    arr_op = np.array(lst_2d_op)
    lst_2d_op_avg.append(arr_op.mean(axis=0).tolist())
    # these codes below should be outside of loop. but temply inside
    arr_op_avg = np.array(lst_2d_op_avg)
    df_final = pd.concat([pd.DataFrame(lst_2d_xv, columns=lst_xv_keys).reset_index(drop=True),
                          df_oa.iloc[:, len(lst_xv_keys):].reset_index(drop=True)], axis=1)
    df_final = pd.concat([df_final.reset_index(drop=True),
                          pd.DataFrame(lst_2d_op_avg, columns=lst_op_key).reset_index(drop=True)], axis=1)
    df_final.to_excel(f'result/experiment_result_{idx_row + 1}.xlsx')
    df_final.to_csv(f'result/experiment_result_{idx_row + 1}.csv')
--- a/result/experiment_result_25.csv
+++ b/result/experiment_result_25.csv
@@ -0,0 +1,26 @@
 ,alpha,percent_search,is_RH_ratio,is_FH_ratio,4,5,out_w_avg_salary,out_w_gini_salary,out_f_avg_profit,out_f_gini_profit,out_w_percent_hired
 0,0.3,0.8,0.75,0.5,0,0,3264853080.7861195,0.07448611065757892,27068180827.948566,0.028471074467998493,0.9926999999999998
 1,0.5,0.1,0.5,0.5,3,4,3637864305.7383804,0.07297697986009385,30978395677.329544,0.025035570583021458,0.9932
 2,0.5,0.2,1.0,0.75,4,1,2179839879.06716,0.09840417843256091,14870250970.957703,0.09615406368119288,0.9886333333333329
 3,0.5,0.3,0.75,0.25,1,2,3294909225.056388,0.07458035794885493,27529372104.086937,0.027549550048933245,0.9904999999999996
 4,0.5,0.6,0.25,0.0,2,3,3716906250.022679,0.08529021609092702,30937889107.04533,0.025723945674350865,0.9942999999999997
 5,0.5,0.8,0.0,1.0,1,1,3869912345.136378,0.09546657392456424,32080217202.03705,0.02533111889073941,1.0
 6,0.7,0.1,0.75,0.75,0,3,3323780234.219529,0.07324360974831064,27481709495.610508,0.02827211821311022,0.9934666666666666
 7,0.7,0.2,0.0,0.25,3,2,3890360391.8347297,0.09312614979722612,32025419370.64081,0.027331743681722087,0.9998666666666668
 8,0.7,0.3,0.25,1.0,2,4,3724730633.529055,0.08585608644546336,30966818797.24322,0.02556013680036385,0.9997333333333333
 9,0.7,0.6,1.0,0.5,4,0,2059080141.3863091,0.13011386291850205,11919309572.784859,0.17383571880300247,0.9850999999999996
 10,0.7,0.8,0.5,0.0,2,2,3558046321.01991,0.07852769877145456,29326434059.91672,0.02645991757420794,0.9920333333333334
 11,0.9,0.1,1.0,1.0,1,0,2217056469.2871513,0.09085566892663295,14019663732.042246,0.10640474650259805,0.9943333333333331
 12,0.9,0.2,0.75,0.0,0,4,3288880578.2219296,0.07786092688764147,27583988860.902683,0.028509652392809604,0.9885666666666665
 13,0.9,0.3,0.0,0.5,4,3,3879623567.573062,0.09407352725749332,32151544223.643482,0.026003986467554037,0.9999666666666667
 14,0.9,0.6,0.5,0.25,3,1,3577219455.771972,0.0785081831642713,29903884548.088444,0.02491509062583037,0.9924
 15,0.9,0.8,0.25,0.75,3,3,3710277766.924487,0.08632310052044662,30554824167.139378,0.026183286845803396,0.9967333333333335
 16,,,,,4,2,,,,,
 17,,,,,2,0,,,,,
 18,,,,,0,1,,,,,
 19,,,,,1,4,,,,,
 20,,,,,4,4,,,,,
 21,,,,,2,1,,,,,
 22,,,,,1,3,,,,,
 23,,,,,3,0,,,,,
 24,,,,,0,2,,,,,
--- a/result/experiment_result_25.xlsx
+++ b/result/experiment_result_25.xlsx
--- a/xv.csv
+++ b/xv.csv
@@ -0,0 +1,4 @@
 0.1, 0.3, 0.5, 0.7, 0.9
 0.1, 0.2, 0.3, 0.6, 0.8
 0, 0.25, 0.5, 0.75, 1
 0, 0.25, 0.5, 0.75, 1