IIabm/analysis.py

188 lines
7.3 KiB
Python

from orm import engine
import pandas as pd
import networkx as nx
import json
import matplotlib.pyplot as plt
# prep data
Firm = pd.read_csv("Firm_amended.csv")
Firm['Code'] = Firm['Code'].astype('string')
Firm.fillna(0, inplace=True)
BomNodes = pd.read_csv('BomNodes.csv', index_col=0)
result = pd.read_sql(sql='select * from iiabmdb.not_test_result where ts > 0;',
con=engine)
lst_s_id = list(set(result['s_id'].to_list()))
for s_id in lst_s_id:
query = pd.read_sql(
sql=
f'select * from iiabmdb.not_test_result where ts = 0 and s_id = {s_id};',
con=engine)
result = pd.concat([result, query])
result.set_index('id', inplace=True)
result.sort_index(inplace=True)
result['id_firm'] = result['id_firm'].astype('string')
# G bom
plt.rcParams['font.sans-serif'] = 'SimHei'
exp_id = 1
G_bom_str = pd.read_sql(
sql=f'select g_bom from iiabmdb.not_test_experiment where id = {exp_id};',
con=engine)['g_bom'].tolist()[0]
G_bom = nx.adjacency_graph(json.loads(G_bom_str))
pos = nx.nx_agraph.graphviz_layout(G_bom, prog="twopi", args="")
node_labels = nx.get_node_attributes(G_bom, 'Name')
plt.figure(figsize=(12, 12), dpi=300)
nx.draw_networkx_nodes(G_bom, pos)
nx.draw_networkx_edges(G_bom, pos)
nx.draw_networkx_labels(G_bom, pos, labels=node_labels, font_size=6)
plt.savefig(f"analysis\\g_bom_exp_id_{exp_id}.png")
plt.close()
# G firm
plt.rcParams['font.sans-serif'] = 'SimHei'
sample_id = 1
G_firm_str = pd.read_sql(
sql=f'select g_firm from iiabmdb.not_test_sample where id = {exp_id};',
con=engine)['g_firm'].tolist()[0]
G_firm = nx.adjacency_graph(json.loads(G_firm_str))
pos = nx.nx_agraph.graphviz_layout(G_firm, prog="twopi", args="")
node_label = nx.get_node_attributes(G_firm, 'Name')
node_degree = dict(G_firm.out_degree())
node_label = {
key: f"{node_label[key]} {node_degree[key]}"
for key in node_label.keys()
}
node_size = list(nx.get_node_attributes(G_firm, 'Revenue_Log').values())
node_size = list(map(lambda x: x**2, node_size))
edge_label = nx.get_edge_attributes(G_firm, "Product")
edge_label = {(n1, n2): label for (n1, n2, _), label in edge_label.items()}
plt.figure(figsize=(12, 12), dpi=300)
nx.draw(G_firm, pos, node_size=node_size, labels=node_label, font_size=6)
nx.draw_networkx_edge_labels(G_firm, pos, edge_label, font_size=4)
plt.savefig(f"analysis\\g_firm_sample_id_{exp_id}.png")
plt.close()
# count firm product
count_firm_prod = result.value_counts(subset=['id_firm', 'id_product'])
count_firm_prod.name = 'count'
count_firm_prod = count_firm_prod.to_frame().reset_index()
count_firm_prod = pd.merge(count_firm_prod,
Firm[['Code', 'Name']],
how='left',
left_on='id_firm',
right_on='Code')
count_firm_prod.drop('Code', axis=1, inplace=True)
count_firm_prod.rename(columns={'Name': 'name_firm'}, inplace=True)
count_firm_prod = pd.merge(count_firm_prod,
BomNodes[['Code', 'Name']],
how='left',
left_on='id_product',
right_on='Code')
count_firm_prod.drop('Code', axis=1, inplace=True)
count_firm_prod.rename(columns={'Name': 'name_product'}, inplace=True)
count_firm_prod = count_firm_prod[[
'id_firm', 'name_firm', 'id_product', 'name_product', 'count'
]]
count_firm_prod.to_csv('analysis\\count_firm_prod.csv',
index=False,
encoding='utf-8-sig')
print(count_firm_prod)
# count firm
count_firm = count_firm_prod.groupby('id_firm')['count'].sum()
count_firm = count_firm.to_frame().reset_index()
count_firm = pd.merge(count_firm,
Firm[['Code', 'Name']],
how='left',
left_on='id_firm',
right_on='Code')
count_firm.drop('Code', axis=1, inplace=True)
count_firm.sort_values('count', inplace=True, ascending=False)
count_firm = count_firm[['id_firm', 'Name', 'count']]
count_firm.to_csv('analysis\\count_firm.csv',
index=False,
encoding='utf-8-sig')
print(count_firm)
# count product
count_prod = count_firm_prod.groupby('id_product')['count'].sum()
count_prod = count_prod.to_frame().reset_index()
count_prod = pd.merge(count_prod,
BomNodes[['Code', 'Name']],
how='left',
left_on='id_product',
right_on='Code')
count_prod.drop('Code', axis=1, inplace=True)
count_prod.sort_values('count', inplace=True, ascending=False)
count_prod = count_prod[['id_product', 'Name', 'count']]
count_prod.to_csv('analysis\\count_prod.csv',
index=False,
encoding='utf-8-sig')
print(count_prod)
# DCP disruption causing probability
result_disrupt_ts_above_0 = result[(result['ts'] > 0)
& (result['is_disrupted'] == 1)]
print(result_disrupt_ts_above_0)
result_dcp = pd.DataFrame(columns=[
's_id', 'up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product'
])
for sid, group in result.groupby('s_id'):
ts_start = max(group['ts'])
while ts_start >= 1:
ts_end = ts_start - 1
while ts_end >= 0:
up = group.loc[group['ts'] == ts_end, ['id_firm', 'id_product']]
down = group.loc[group['ts'] == ts_start,
['id_firm', 'id_product']]
for _, up_row in up.iterrows():
for _, down_row in down.iterrows():
row = [sid]
row += up_row.tolist()
row += down_row.tolist()
result_dcp.loc[len(result_dcp.index)] = row
ts_end -= 1
ts_start -= 1
count_dcp = result_dcp.value_counts(
subset=['up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product'])
count_dcp.name = 'count'
count_dcp = count_dcp.to_frame().reset_index()
count_dcp = pd.merge(count_dcp,
Firm[['Code', 'Name']],
how='left',
left_on='up_id_firm',
right_on='Code')
count_dcp.drop('Code', axis=1, inplace=True)
count_dcp.rename(columns={'Name': 'up_name_firm'}, inplace=True)
count_dcp = pd.merge(count_dcp,
BomNodes[['Code', 'Name']],
how='left',
left_on='up_id_product',
right_on='Code')
count_dcp.drop('Code', axis=1, inplace=True)
count_dcp.rename(columns={'Name': 'up_name_product'}, inplace=True)
count_dcp = pd.merge(count_dcp,
Firm[['Code', 'Name']],
how='left',
left_on='down_id_firm',
right_on='Code')
count_dcp.drop('Code', axis=1, inplace=True)
count_dcp.rename(columns={'Name': 'down_name_firm'}, inplace=True)
count_dcp = pd.merge(count_dcp,
BomNodes[['Code', 'Name']],
how='left',
left_on='down_id_product',
right_on='Code')
count_dcp.drop('Code', axis=1, inplace=True)
count_dcp.rename(columns={'Name': 'down_name_product'}, inplace=True)
count_dcp = count_dcp[[
'up_id_firm', 'up_name_firm', 'up_id_product', 'up_name_product',
'down_id_firm', 'down_name_firm', 'down_id_product', 'down_name_product',
'count'
]]
count_dcp.to_csv('analysis\\count_dcp.csv', index=False, encoding='utf-8-sig')
print(count_dcp)