2023-05-15 10:42:16 +08:00
|
|
|
import pandas as pd
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import networkx as nx
|
|
|
|
|
|
|
|
plt.rcParams['font.sans-serif'] = 'SimHei'
|
|
|
|
|
|
|
|
# count firm category
|
|
|
|
count_firm = pd.read_csv("analysis\\count_firm.csv")
|
|
|
|
print(count_firm.describe())
|
|
|
|
|
|
|
|
count_dcp = pd.read_csv("analysis\\count_dcp.csv",
|
|
|
|
dtype={
|
|
|
|
'up_id_firm': str,
|
|
|
|
'down_id_firm': str
|
|
|
|
})
|
|
|
|
# print(count_dcp)
|
2023-07-09 15:38:49 +08:00
|
|
|
count_dcp = count_dcp[count_dcp['count'] > 20]
|
2023-05-15 10:42:16 +08:00
|
|
|
|
|
|
|
list_firm = count_dcp['up_id_firm'].tolist(
|
|
|
|
) + count_dcp['down_id_firm'].tolist()
|
|
|
|
list_firm = list(set(list_firm))
|
|
|
|
|
|
|
|
# init graph firm
|
|
|
|
Firm = pd.read_csv("Firm_amended.csv")
|
|
|
|
Firm['Code'] = Firm['Code'].astype('string')
|
|
|
|
Firm.fillna(0, inplace=True)
|
|
|
|
Firm_attr = Firm.loc[:, ["Code", "Name", "Type_Region", "Revenue_Log"]]
|
|
|
|
firm_product = []
|
|
|
|
for _, row in Firm.loc[:, '1':].iterrows():
|
|
|
|
firm_product.append(row[row == 1].index.to_list())
|
|
|
|
Firm_attr.loc[:, 'Product_Code'] = firm_product
|
|
|
|
Firm_attr.set_index('Code', inplace=True)
|
|
|
|
|
|
|
|
G_firm = nx.MultiDiGraph()
|
|
|
|
G_firm.add_nodes_from(list_firm)
|
|
|
|
|
|
|
|
firm_labels_dict = {}
|
|
|
|
for code in G_firm.nodes:
|
|
|
|
firm_labels_dict[code] = Firm_attr.loc[code].to_dict()
|
|
|
|
nx.set_node_attributes(G_firm, firm_labels_dict)
|
|
|
|
|
|
|
|
count_max = count_dcp['count'].max()
|
|
|
|
count_min = count_dcp['count'].min()
|
|
|
|
k = 5 / (count_max - count_min)
|
|
|
|
for _, row in count_dcp.iterrows():
|
|
|
|
# print(row)
|
|
|
|
lst_add_edge = [(
|
|
|
|
row['up_id_firm'],
|
|
|
|
row['down_id_firm'],
|
|
|
|
{
|
|
|
|
'up_id_product': row['up_id_product'],
|
|
|
|
'up_name_product': row['up_name_product'],
|
|
|
|
'down_id_product': row['down_id_product'],
|
|
|
|
'down_name_product': row['down_name_product'],
|
|
|
|
'edge_label': f"{row['up_id_product']} - {row['down_id_product']}",
|
|
|
|
'edge_width': k * (row['count'] - count_min),
|
|
|
|
'count': row['count']
|
|
|
|
})]
|
|
|
|
G_firm.add_edges_from(lst_add_edge)
|
|
|
|
|
|
|
|
# dcp_networkx
|
|
|
|
pos = nx.nx_agraph.graphviz_layout(G_firm, prog="dot", args="")
|
|
|
|
node_label = nx.get_node_attributes(G_firm, 'Name')
|
|
|
|
# node_degree = dict(G_firm.out_degree())
|
2023-06-04 18:43:49 +08:00
|
|
|
# desensitize
|
2023-05-15 10:42:16 +08:00
|
|
|
node_label = {
|
|
|
|
# key: f"{node_label[key]} {node_degree[key]}"
|
2023-06-04 18:43:49 +08:00
|
|
|
# key: f"{node_label[key]}"
|
|
|
|
key: key
|
2023-05-15 10:42:16 +08:00
|
|
|
for key in node_label.keys()
|
|
|
|
}
|
|
|
|
node_size = list(nx.get_node_attributes(G_firm, 'Revenue_Log').values())
|
|
|
|
node_size = list(map(lambda x: x**2, node_size))
|
|
|
|
edge_label = nx.get_edge_attributes(G_firm, "edge_label")
|
|
|
|
edge_label = {(n1, n2): label for (n1, n2, _), label in edge_label.items()}
|
|
|
|
edge_width = nx.get_edge_attributes(G_firm, "edge_width")
|
|
|
|
edge_width = [w for (n1, n2, _), w in edge_width.items()]
|
|
|
|
colors = nx.get_edge_attributes(G_firm, "count")
|
|
|
|
colors = [w for (n1, n2, _), w in colors.items()]
|
|
|
|
vmin = min(colors)
|
|
|
|
vmax = max(colors)
|
|
|
|
cmap = plt.cm.Blues
|
|
|
|
fig = plt.figure(figsize=(10, 8), dpi=300)
|
|
|
|
nx.draw(G_firm,
|
|
|
|
pos,
|
|
|
|
node_size=node_size,
|
|
|
|
labels=node_label,
|
2023-07-09 15:38:49 +08:00
|
|
|
font_size=8,
|
2023-06-04 18:43:49 +08:00
|
|
|
width=3,
|
2023-05-15 10:42:16 +08:00
|
|
|
edge_color=colors,
|
|
|
|
edge_cmap=cmap,
|
|
|
|
edge_vmin=vmin,
|
|
|
|
edge_vmax=vmax)
|
|
|
|
nx.draw_networkx_edge_labels(G_firm, pos, edge_label, font_size=6)
|
|
|
|
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
|
|
|
|
sm._A = []
|
2023-07-09 15:38:49 +08:00
|
|
|
position = fig.add_axes([0.95, 0.05, 0.01, 0.3])
|
|
|
|
cb = plt.colorbar(sm, fraction=0.01, cax=position)
|
|
|
|
cb.ax.tick_params(labelsize=10)
|
|
|
|
cb.outline.set_visible(False)
|
|
|
|
plt.savefig("analysis\\count_dcp_network")
|
2023-05-15 10:42:16 +08:00
|
|
|
plt.close()
|