salary02/experiment.py

import numpy as np
import pandas as pd
from env import Env
import datetime

"""
    The experiment.py is used to read the input value in the file xv.csv according to the Orthogonal Array (OA) table
    in the file oa25.txt.
    The file oa25 means that the number of total experiments is 25, as it has at most 6 inputs, each one has 5 levels.
    Therefore, oa25.txt has 6 columns, and the values are labelled as 0, 1, 2, 3 and 4.
    After reading these two files, the code below runs the model one-by-one (which is very ineffective), and
     uses lists to record the outputs, and save the results in the sub-folder result.
"""

# This idx_start is used when the bulk running is unexpectedly stopped, and we need to rerun the model.
idx_start = 1  # first index is 1, not 0 ! If we read oa25.txt, then the max value is 25

# number of runs for each experiment
n_sample = 50

df_xv = pd.read_csv("xv.csv", header=None, index_col=None).transpose()
# the names of four input indicators
lst_xv_keys = [
    'alpha',
    'percent_search',
    'is_RH_ratio',
    'is_FH_ratio'
]
df_xv.columns = lst_xv_keys

# read the OA table
df_oa = pd.read_fwf("oa25.txt", header=None, widths=[1]*6)
n_row, n_col = df_oa.shape
# these para below keep unchanged
model_para = {
    "n_worker": 1000,
    "n_firm": 100
}

# defined six outputs
lst_op_key = ['out_w_avg_salary', 'out_w_gini_salary', 'out_f_avg_profit', 'out_f_avg_yield',
              'out_f_gini_profit', 'out_w_percent_hired']
lst_2d_op_avg = []
lst_2d_xv = []
for idx_row in range(idx_start-1, n_row, 1):
    print(f"Running the {idx_row + 1}-th experiment at {datetime.datetime.now()}.")
    lst_value = []
    for idx_col, k in enumerate(lst_xv_keys):
        # read the corresponding value by mapping OA and xv.csv
        oa_idx_level = int(df_oa.iat[idx_row, idx_col])
        lst_value.append(df_xv.iloc[oa_idx_level][k])
    lst_2d_xv.append(lst_value)
    # merge the two dictionaries into one; send it to the AgentPy model
    dct_merge = {**model_para, **dict(zip(lst_xv_keys, lst_value))}
    # pprint.pprint(dct_merge)
    lst_2d_op = []

    for i in range(n_sample):
        print(f"-- {i + 1}-th sample at {datetime.datetime.now()}.")
        the_model = Env(dct_merge)

        while 1:
            the_model.step()
            if not the_model.running:
                break

        # record the six outputs
        lst_2d_op.append([the_model.out_w_avg_salary, the_model.out_w_gini_salary,
                          the_model.out_f_avg_profit, the_model.out_f_avg_yield,
                          the_model.out_f_gini_profit, the_model.out_w_percent_hired])

    arr_op = np.array(lst_2d_op)
    lst_2d_op_avg.append(arr_op.mean(axis=0).tolist())

    # these codes below should be outside of loop. but temply inside
    arr_op_avg = np.array(lst_2d_op_avg)

    df_final = pd.concat([pd.DataFrame(lst_2d_xv, columns=lst_xv_keys).reset_index(drop=True),
                          df_oa.iloc[:, len(lst_xv_keys):].reset_index(drop=True)], axis=1)

    df_final = pd.concat([df_final.reset_index(drop=True),
                          pd.DataFrame(lst_2d_op_avg, columns=lst_op_key).reset_index(drop=True)], axis=1)

    # save to excel files
    df_final.to_excel(f'result/experiment_result_{idx_row + 1}.xlsx')
    df_final.to_csv(f'result/experiment_result_{idx_row + 1}.csv')