First DOE. Only C is significant.

This commit is contained in:
He Zhou 2023-01-19 11:37:21 +08:00
parent 9de2115ce8
commit f4dc9f7783
7 changed files with 199 additions and 10 deletions

81
anova.py Normal file
View File

@ -0,0 +1,81 @@
import numpy as np
import pandas as pd
from scipy.stats import f
def do_print(lst_value, str_col):
str_data = '\t'.join([str(round(e, 4 if 'P value' in str_col else 3)) for e in lst_value])
print(f'{str_col}\t{str_data}')
def anova(lst_col_seg, n_level, oa_file, result_file):
df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
df_res = pd.read_excel(result_file, sheet_name=0, engine='openpyxl', index_col=0)
assert df_res.shape[1] == sum(lst_col_seg), 'the column number is wrong'
assert df_oa.shape[1] == lst_col_seg[0] + lst_col_seg[1], 'the column number is wrong'
lst_head = [f"{idx+1}_{ind_name}" for idx, ind_name in enumerate(df_res.columns)]
n_col_input = lst_col_seg[0] + lst_col_seg[1]
n_exp_row = df_res.shape[0]
n_degree_error = n_exp_row - 1 - (n_level - 1) * lst_col_seg[0]
df_output = df_res.iloc[:, n_col_input:]
print("Source\tSource\t" + '\t'.join(lst_head[:lst_col_seg[0]]) + "\te")
print("DOF\tDOF\t" + '\t'.join([str(n_level-1)] * lst_col_seg[0]) + f"\t{n_degree_error}")
lst_report = []
for idx_col in range(lst_col_seg[2]):
str_ind_name = lst_head[idx_col+n_col_input]
df_y_col = df_output.iloc[:, idx_col] # the y column
# sum_y_2 = np.sum(np.power(df_y_col, 2)) # sum(y^2)
df_y_col_repeated = np.tile(df_y_col, (n_col_input, 1)).T # repeat the y column
big_t = df_y_col.sum() # the big T
# generate T1, ..., T(n_levels)
lst_2d_big_t = []
for level in range(n_level):
arr_big_t = np.sum(df_y_col_repeated * np.where(df_oa == level, 1, 0), axis=0)
lst_2d_big_t.append(arr_big_t.tolist())
arr_big_t_2 = np.power(np.array(lst_2d_big_t), 2)
arr_s = np.sum(arr_big_t_2, axis=0) / (n_exp_row / n_level) - big_t * big_t / n_exp_row
assert arr_s.size == n_col_input, 'wrong arr_s size'
# sum_s = np.sum(arr_s)
# so far, the first table is computed. Now, compute the second table
df_s = pd.DataFrame(arr_s.reshape((1, n_col_input)), columns=lst_head[:n_col_input])
do_print(arr_s.tolist(), f'{str_ind_name}\tS')
df_s_non_error = df_s.iloc[:, :lst_col_seg[0]] / (n_level - 1)
ms_of_error = df_s.iloc[:, lst_col_seg[0]:].sum().sum() / n_degree_error
do_print(df_s_non_error.values.tolist()[0] + [ms_of_error], f'{str_ind_name}\tMS')
arr_f = df_s_non_error / ms_of_error
do_print(arr_f.values.tolist()[0], f'{str_ind_name}\tF ratio')
alpha = 0.1
arr_p_value = f.sf(arr_f, n_level - 1, n_degree_error)
do_print(arr_p_value.tolist()[0], f'{str_ind_name}\tP value')
lst_sig = [c for c, p in zip(lst_head[:lst_col_seg[0]], arr_p_value[0].tolist()) if p < alpha]
# print(df_output.columns[idx_col], arr_p_value, lst_sig)
if len(lst_sig) > 0:
lst_report.append(f"For indicator {str_ind_name}, the sig factors are {lst_sig}")
for s in lst_report:
print(s)
if __name__ == '__main__':
# first test
the_lst_col_seg = [5, 1, 5] # 11 factors, 2 for error, and 6 indicators
the_n_level = 5
anova(the_lst_col_seg, the_n_level, "oa25.txt", "result/experiment_result_25.xlsx")
# second test
# the_lst_col_seg = [3, 1, 1] # 11 factors, 2 for error, and 6 indicators
# the_n_level = 3
# anova(the_lst_col_seg, the_n_level, "oaL9-3-4.csv", "result.xlsx")

BIN
anova.xlsx Normal file

Binary file not shown.

31
env.py
View File

@ -13,6 +13,10 @@ class Env(ap.Model):
float_market_size: float
# percent_rh: float
percent_search: float
is_RH_ratio: float
is_FH_ratio: float
n_worker: int
n_firm: int
e_revenue: float
@ -31,11 +35,15 @@ class Env(ap.Model):
out_f_gini_profit: float
out_w_percent_hired: float
def setup(self):
def __init__(self, dct_all):
super().__init__()
# 工作人员、企业数量、搜寻企业数量赋值
self.n_worker = self.p.n_worker
self.n_firm = self.p.n_firm
self.percent_search = self.p.percent_search
self.n_worker = int(dct_all['n_worker'])
self.n_firm = int(dct_all['n_firm'])
self.percent_search = float(dct_all['percent_search'])
self.is_RH_ratio = float(dct_all['is_RH_ratio'])
self.is_FH_ratio = float(dct_all['is_FH_ratio'])
# 工人、企业列表
self.a_lst_worker = ap.AgentList(self)
self.a_lst_firm = ap.AgentList(self)
@ -44,15 +52,18 @@ class Env(ap.Model):
# 在工人列表中添加工人
for i in range(self.n_worker):
# 初始化 worker agent并把alpha属性传过去
w = WorkerAgent(self, self.p.alpha)
w = WorkerAgent(self, float(dct_all['alpha']))
self.a_lst_worker.append(w)
# 在企业列表中添加企业放入一个is_RH_ratio, 即有多大比例的企业是属于RH类型的
for i in range(self.n_firm):
# 对于企业属性true or false 的判断, 影响到firm 板块下, self.s_IsRH = is_RH 语句的判断
f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1))
f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1))
self.a_lst_firm.append(f)
self.running = True
self.t = 0
def update_e_revenue(self):
self.e_revenue += 0.01 * self.e_revenue
@ -79,9 +90,9 @@ class Env(ap.Model):
self.update()
if self.t == 200:
self.stop()
# self.picture_out()
# pass
self.running = False
else:
self.t += 1
def update(self):
lst_salary = []
@ -123,7 +134,7 @@ class Env(ap.Model):
worker.working_firm = None
self.a_lst_firm.remove(f)
del f
new_f = FirmAgent(self, self.p.is_RH_ratio >= uniform(0, 1), self.p.is_FH_ratio >= uniform(0, 1))
new_f = FirmAgent(self, self.is_RH_ratio >= uniform(0, 1), self.is_FH_ratio >= uniform(0, 1))
self.a_lst_firm.append(new_f)
else:
if f.s_profit < 0:

67
experiment.py Normal file
View File

@ -0,0 +1,67 @@
import numpy as np
import pandas as pd
from env import Env
import datetime
idx_start = 10 # first index is 1 !
n_sample = 30
df_xv = pd.read_csv("xv.csv", header=None, index_col=None).transpose()
lst_xv_keys = [
'alpha',
'percent_search',
'is_RH_ratio',
'is_FH_ratio'
]
df_xv.columns = lst_xv_keys
df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
n_row, n_col = df_oa.shape
model_para = {
"n_worker": 1000,
"n_firm": 100
}
lst_op_key = ['out_w_avg_salary', 'out_w_gini_salary', 'out_f_avg_profit', 'out_f_gini_profit', 'out_w_percent_hired']
lst_2d_op_avg = []
lst_2d_xv = []
for idx_row in range(idx_start-1, n_row, 1):
print(f"Running the {idx_row + 1}-th experiment at {datetime.datetime.now()}.")
lst_value = []
for idx_col, k in enumerate(lst_xv_keys):
oa_idx_level = int(df_oa.iat[idx_row, idx_col])
lst_value.append(df_xv.iloc[oa_idx_level][k])
lst_2d_xv.append(lst_value)
dct_merge = {**model_para, **dict(zip(lst_xv_keys, lst_value))}
# pprint.pprint(dct_merge)
lst_2d_op = []
for i in range(n_sample):
print(f"-- {i + 1}-th sample at {datetime.datetime.now()}.")
the_model = Env(dct_merge)
while 1:
the_model.step()
if not the_model.running:
break
lst_2d_op.append([the_model.out_w_avg_salary, the_model.out_w_gini_salary,
the_model.out_f_avg_profit, the_model.out_f_gini_profit,
the_model.out_w_percent_hired])
arr_op = np.array(lst_2d_op)
lst_2d_op_avg.append(arr_op.mean(axis=0).tolist())
# these codes below should be outside of loop. but temply inside
arr_op_avg = np.array(lst_2d_op_avg)
df_final = pd.concat([pd.DataFrame(lst_2d_xv, columns=lst_xv_keys).reset_index(drop=True),
df_oa.iloc[:, len(lst_xv_keys):].reset_index(drop=True)], axis=1)
df_final = pd.concat([df_final.reset_index(drop=True),
pd.DataFrame(lst_2d_op_avg, columns=lst_op_key).reset_index(drop=True)], axis=1)
df_final.to_excel(f'result/experiment_result_{idx_row + 1}.xlsx')
df_final.to_csv(f'result/experiment_result_{idx_row + 1}.csv')

View File

@ -0,0 +1,26 @@
,alpha,percent_search,is_RH_ratio,is_FH_ratio,4,5,out_w_avg_salary,out_w_gini_salary,out_f_avg_profit,out_f_gini_profit,out_w_percent_hired
0,0.3,0.8,0.75,0.5,0,0,3264853080.7861195,0.07448611065757892,27068180827.948566,0.028471074467998493,0.9926999999999998
1,0.5,0.1,0.5,0.5,3,4,3637864305.7383804,0.07297697986009385,30978395677.329544,0.025035570583021458,0.9932
2,0.5,0.2,1.0,0.75,4,1,2179839879.06716,0.09840417843256091,14870250970.957703,0.09615406368119288,0.9886333333333329
3,0.5,0.3,0.75,0.25,1,2,3294909225.056388,0.07458035794885493,27529372104.086937,0.027549550048933245,0.9904999999999996
4,0.5,0.6,0.25,0.0,2,3,3716906250.022679,0.08529021609092702,30937889107.04533,0.025723945674350865,0.9942999999999997
5,0.5,0.8,0.0,1.0,1,1,3869912345.136378,0.09546657392456424,32080217202.03705,0.02533111889073941,1.0
6,0.7,0.1,0.75,0.75,0,3,3323780234.219529,0.07324360974831064,27481709495.610508,0.02827211821311022,0.9934666666666666
7,0.7,0.2,0.0,0.25,3,2,3890360391.8347297,0.09312614979722612,32025419370.64081,0.027331743681722087,0.9998666666666668
8,0.7,0.3,0.25,1.0,2,4,3724730633.529055,0.08585608644546336,30966818797.24322,0.02556013680036385,0.9997333333333333
9,0.7,0.6,1.0,0.5,4,0,2059080141.3863091,0.13011386291850205,11919309572.784859,0.17383571880300247,0.9850999999999996
10,0.7,0.8,0.5,0.0,2,2,3558046321.01991,0.07852769877145456,29326434059.91672,0.02645991757420794,0.9920333333333334
11,0.9,0.1,1.0,1.0,1,0,2217056469.2871513,0.09085566892663295,14019663732.042246,0.10640474650259805,0.9943333333333331
12,0.9,0.2,0.75,0.0,0,4,3288880578.2219296,0.07786092688764147,27583988860.902683,0.028509652392809604,0.9885666666666665
13,0.9,0.3,0.0,0.5,4,3,3879623567.573062,0.09407352725749332,32151544223.643482,0.026003986467554037,0.9999666666666667
14,0.9,0.6,0.5,0.25,3,1,3577219455.771972,0.0785081831642713,29903884548.088444,0.02491509062583037,0.9924
15,0.9,0.8,0.25,0.75,3,3,3710277766.924487,0.08632310052044662,30554824167.139378,0.026183286845803396,0.9967333333333335
16,,,,,4,2,,,,,
17,,,,,2,0,,,,,
18,,,,,0,1,,,,,
19,,,,,1,4,,,,,
20,,,,,4,4,,,,,
21,,,,,2,1,,,,,
22,,,,,1,3,,,,,
23,,,,,3,0,,,,,
24,,,,,0,2,,,,,
1 alpha percent_search is_RH_ratio is_FH_ratio 4 5 out_w_avg_salary out_w_gini_salary out_f_avg_profit out_f_gini_profit out_w_percent_hired
2 0 0.3 0.8 0.75 0.5 0 0 3264853080.7861195 0.07448611065757892 27068180827.948566 0.028471074467998493 0.9926999999999998
3 1 0.5 0.1 0.5 0.5 3 4 3637864305.7383804 0.07297697986009385 30978395677.329544 0.025035570583021458 0.9932
4 2 0.5 0.2 1.0 0.75 4 1 2179839879.06716 0.09840417843256091 14870250970.957703 0.09615406368119288 0.9886333333333329
5 3 0.5 0.3 0.75 0.25 1 2 3294909225.056388 0.07458035794885493 27529372104.086937 0.027549550048933245 0.9904999999999996
6 4 0.5 0.6 0.25 0.0 2 3 3716906250.022679 0.08529021609092702 30937889107.04533 0.025723945674350865 0.9942999999999997
7 5 0.5 0.8 0.0 1.0 1 1 3869912345.136378 0.09546657392456424 32080217202.03705 0.02533111889073941 1.0
8 6 0.7 0.1 0.75 0.75 0 3 3323780234.219529 0.07324360974831064 27481709495.610508 0.02827211821311022 0.9934666666666666
9 7 0.7 0.2 0.0 0.25 3 2 3890360391.8347297 0.09312614979722612 32025419370.64081 0.027331743681722087 0.9998666666666668
10 8 0.7 0.3 0.25 1.0 2 4 3724730633.529055 0.08585608644546336 30966818797.24322 0.02556013680036385 0.9997333333333333
11 9 0.7 0.6 1.0 0.5 4 0 2059080141.3863091 0.13011386291850205 11919309572.784859 0.17383571880300247 0.9850999999999996
12 10 0.7 0.8 0.5 0.0 2 2 3558046321.01991 0.07852769877145456 29326434059.91672 0.02645991757420794 0.9920333333333334
13 11 0.9 0.1 1.0 1.0 1 0 2217056469.2871513 0.09085566892663295 14019663732.042246 0.10640474650259805 0.9943333333333331
14 12 0.9 0.2 0.75 0.0 0 4 3288880578.2219296 0.07786092688764147 27583988860.902683 0.028509652392809604 0.9885666666666665
15 13 0.9 0.3 0.0 0.5 4 3 3879623567.573062 0.09407352725749332 32151544223.643482 0.026003986467554037 0.9999666666666667
16 14 0.9 0.6 0.5 0.25 3 1 3577219455.771972 0.0785081831642713 29903884548.088444 0.02491509062583037 0.9924
17 15 0.9 0.8 0.25 0.75 3 3 3710277766.924487 0.08632310052044662 30554824167.139378 0.026183286845803396 0.9967333333333335
18 16 4 2
19 17 2 0
20 18 0 1
21 19 1 4
22 20 4 4
23 21 2 1
24 22 1 3
25 23 3 0
26 24 0 2

Binary file not shown.

4
xv.csv Normal file
View File

@ -0,0 +1,4 @@
0.1, 0.3, 0.5, 0.7, 0.9
0.1, 0.2, 0.3, 0.6, 0.8
0, 0.25, 0.5, 0.75, 1
0, 0.25, 0.5, 0.75, 1
1 0.1 0.3 0.5 0.7 0.9
2 0.1 0.2 0.3 0.6 0.8
3 0 0.25 0.5 0.75 1
4 0 0.25 0.5 0.75 1