salary02/experiment.py

87 lines
3.3 KiB
Python
Raw Permalink Normal View History

2023-01-19 11:37:21 +08:00
import numpy as np
import pandas as pd
from env import Env
import datetime
2023-03-23 23:05:51 +08:00
"""
The experiment.py is used to read the input value in the file xv.csv according to the Orthogonal Array (OA) table
in the file oa25.txt.
The file oa25 means that the number of total experiments is 25, as it has at most 6 inputs, each one has 5 levels.
Therefore, oa25.txt has 6 columns, and the values are labelled as 0, 1, 2, 3 and 4.
After reading these two files, the code below runs the model one-by-one (which is very ineffective), and
uses lists to record the outputs, and save the results in the sub-folder result.
"""
2023-01-19 11:37:21 +08:00
2023-03-23 23:05:51 +08:00
# This idx_start is used when the bulk running is unexpectedly stopped, and we need to rerun the model.
idx_start = 1 # first index is 1, not 0 ! If we read oa25.txt, then the max value is 25
2023-01-19 11:37:21 +08:00
2023-03-23 23:05:51 +08:00
# number of runs for each experiment
2023-02-20 20:30:55 +08:00
n_sample = 50
2023-01-19 11:37:21 +08:00
df_xv = pd.read_csv("xv.csv", header=None, index_col=None).transpose()
2023-03-23 23:05:51 +08:00
# the names of four input indicators
2023-01-19 11:37:21 +08:00
lst_xv_keys = [
'alpha',
'percent_search',
'is_RH_ratio',
'is_FH_ratio'
]
df_xv.columns = lst_xv_keys
2023-03-23 23:05:51 +08:00
# read the OA table
2023-01-19 11:37:21 +08:00
df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
n_row, n_col = df_oa.shape
2023-03-23 23:05:51 +08:00
# these para below keep unchanged
2023-01-19 11:37:21 +08:00
model_para = {
"n_worker": 1000,
"n_firm": 100
}
2023-03-23 23:05:51 +08:00
# defined six outputs
2023-02-12 23:43:40 +08:00
lst_op_key = ['out_w_avg_salary', 'out_w_gini_salary', 'out_f_avg_profit', 'out_f_avg_yield',
'out_f_gini_profit', 'out_w_percent_hired']
2023-01-19 11:37:21 +08:00
lst_2d_op_avg = []
lst_2d_xv = []
for idx_row in range(idx_start-1, n_row, 1):
print(f"Running the {idx_row + 1}-th experiment at {datetime.datetime.now()}.")
lst_value = []
for idx_col, k in enumerate(lst_xv_keys):
2023-03-23 23:05:51 +08:00
# read the corresponding value by mapping OA and xv.csv
2023-01-19 11:37:21 +08:00
oa_idx_level = int(df_oa.iat[idx_row, idx_col])
lst_value.append(df_xv.iloc[oa_idx_level][k])
lst_2d_xv.append(lst_value)
2023-03-23 23:05:51 +08:00
# merge the two dictionaries into one; send it to the AgentPy model
2023-01-19 11:37:21 +08:00
dct_merge = {**model_para, **dict(zip(lst_xv_keys, lst_value))}
# pprint.pprint(dct_merge)
lst_2d_op = []
for i in range(n_sample):
print(f"-- {i + 1}-th sample at {datetime.datetime.now()}.")
the_model = Env(dct_merge)
while 1:
the_model.step()
if not the_model.running:
break
2023-03-23 23:05:51 +08:00
# record the six outputs
2023-01-19 11:37:21 +08:00
lst_2d_op.append([the_model.out_w_avg_salary, the_model.out_w_gini_salary,
2023-02-12 23:43:40 +08:00
the_model.out_f_avg_profit, the_model.out_f_avg_yield,
the_model.out_f_gini_profit, the_model.out_w_percent_hired])
2023-01-19 11:37:21 +08:00
arr_op = np.array(lst_2d_op)
lst_2d_op_avg.append(arr_op.mean(axis=0).tolist())
# these codes below should be outside of loop. but temply inside
arr_op_avg = np.array(lst_2d_op_avg)
df_final = pd.concat([pd.DataFrame(lst_2d_xv, columns=lst_xv_keys).reset_index(drop=True),
df_oa.iloc[:, len(lst_xv_keys):].reset_index(drop=True)], axis=1)
df_final = pd.concat([df_final.reset_index(drop=True),
pd.DataFrame(lst_2d_op_avg, columns=lst_op_key).reset_index(drop=True)], axis=1)
2023-03-23 23:05:51 +08:00
# save to excel files
2023-01-19 11:37:21 +08:00
df_final.to_excel(f'result/experiment_result_{idx_row + 1}.xlsx')
df_final.to_csv(f'result/experiment_result_{idx_row + 1}.csv')