mesa/risk_analysis_sum_result.py

492 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pickle
from sqlalchemy import text
from orm import engine, connection
import pandas as pd
import networkx as nx
import json
import matplotlib.pyplot as plt
# Prepare data
Firm = pd.read_csv("input_data/input_firm_data/Firm_amended.csv")
Firm['Code'] = Firm['Code'].astype('string')
Firm.fillna(0, inplace=True)
BomNodes = pd.read_csv('input_data/input_product_data/BomNodes.csv', index_col=0)
# SQL query
with open('SQL_analysis_risk.sql', 'r') as f:
str_sql = text(f.read())
result = pd.read_sql(sql=str_sql, con=connection)
result.to_csv('output_result/risk/count.csv', index=False, encoding='utf-8-sig')
print(result)
# G_bom
plt.rcParams['font.sans-serif'] = 'SimHei'
exp_id = 1
G_bom_df = pd.read_sql(
sql=text(f'select g_bom from iiabmdb.without_exp_experiment where id = {exp_id};'),
con=connection
)
if G_bom_df.empty:
raise ValueError(f"No g_bom found for exp_id = {exp_id}")
G_bom_str = G_bom_df['g_bom'].tolist()[0]
if G_bom_str is None:
raise ValueError(f"g_bom data is None for exp_id = {exp_id}")
G_bom = nx.adjacency_graph(json.loads(G_bom_str))
pos = nx.nx_agraph.graphviz_layout(G_bom, prog="twopi", args="")
node_labels = nx.get_node_attributes(G_bom, 'Name')
node_labels = {
7: 'Si Raw Mtl.',
8: 'Photoresist & Reagents',
9: 'Etch Solution',
10: 'SiF4',
11: 'Developer',
12: 'PCE Superplasticizer',
13: 'Metal Protectant',
14: 'Deep Hole Cu Plating',
15: 'Thinner',
16: 'HP Boric Acid (Nuc.)',
17: 'E-Grade Epoxy',
18: 'Stripper',
19: 'HP-MOC',
20: 'CMP Slurry & Consumables',
21: 'PR Remover',
22: 'Poly-Si Cutting Fluid',
23: 'Passivation',
24: 'E-Grade Phenolic',
25: 'Surfactant',
26: 'Mag. Carrier',
27: 'Wet Chems.',
28: 'Plating Chems.',
29: 'E-FR Materials',
30: 'LC Alignment Agent',
31: 'Func. Wet Chems.',
32: 'InP',
33: 'SiC',
34: 'GaAs',
35: 'GaN',
36: 'AlN',
37: 'Si3N4',
38: 'SiC Substrate',
39: 'GaN Substrate',
40: 'Si Wafer',
41: 'AlN Substrate',
42: 'DUV LED Substrate',
43: 'InP Substrate',
44: 'Mono-Si Wafer',
45: 'Poly-Si Wafer',
46: 'InP Cryst./Wafer',
47: 'SiC Cryst./Wafer',
48: 'GaAs Wafer',
49: 'GaN Cryst./Wafer',
50: 'Si Epi Wafer',
51: 'SiC Epi Wafer',
52: 'AlN Epi',
53: 'GaN Epi',
54: 'InP Epi',
55: 'LED Epi',
56: 'EDA/IP',
57: 'MPW Service',
58: 'IC Design',
59: 'Track System',
60: 'Wafer Grinder',
61: 'Etcher',
62: 'Ox/Diff Furnace',
63: 'Wafer Metrology',
64: 'Crystal Grower',
65: 'CMP Tool',
66: 'Stepper',
67: 'Wafer Dicer',
68: 'Deposition System',
69: 'Edge Profiler',
70: 'Descum Tool',
71: 'Clean System',
72: 'SAF',
73: 'Plating Eqpt.',
74: 'Implanter',
75: 'Trim/Form',
76: 'Probe Card',
77: 'ATE',
78: 'PCM Eqpt.',
79: 'Inspection Sys.',
80: 'Prober',
81: 'Dicing Saw',
82: 'Handler',
83: 'Backgrinder',
84: 'Die Bonder',
85: 'Reflow Oven',
86: 'FT Tester',
87: 'Wire Bonder',
88: 'BGA Mounter',
89: 'Molding Press',
90: 'Power Devices',
91: 'Diode',
92: 'Transistor',
93: 'Thyristor',
94: 'Rectifier',
95: 'IC Fab',
96: 'IC PKG',
97: 'DV',
98: 'IPM',
99: 'CP Test',
100: 'FT Test',
101: 'Bumping',
102: 'DA Materials',
103: 'Leadframe',
104: 'Solder Ball',
105: 'Substrate',
106: 'EMC',
107: 'Bond Wire',
108: 'Underfill',
109: 'Dicing Tape'
}
plt.figure(figsize=(12, 12), dpi=500)
plt.axis('off') # 关闭坐标轴边框
# 优化节点绘制参数
nx.draw_networkx_nodes(
G_bom, pos,
node_size=100, # 优化节点尺寸
linewidths=0.0 # 去除节点边框
)
# 优化边绘制参数
nx.draw_networkx_edges(
G_bom, pos,
width=0.3, # 更细的边宽
alpha=0.5 # 半透明边
)
# 优化标签参数
nx.draw_networkx_labels(
G_bom, pos,
labels=node_labels,
font_size=3, # 适当增大字号
font_family='sans-serif', # 使用无衬线字体
font_weight='bold', # 增强可读性
)
# 专业级保存参数设置
plt.savefig(
f"output_result/risk/g_bom_exp_id_{exp_id}.png",
bbox_inches='tight', # 去除图像白边
pad_inches=0.1, # 适当内边距
facecolor='white' # 保证背景纯白
)
plt.close()
# G_firm
plt.rcParams['font.sans-serif'] = 'SimHei'
sample_id = 1
# G_firm_df = pd.read_sql(
# sql=text(f'select g_firm from iiabmdb.without_exp_sample where id = {sample_id};'),
# con=connection
# )
#
# if G_firm_df.empty:
# raise ValueError(f"No g_firm found for sample_id = {sample_id}")
#
# G_firm_str = G_firm_df['g_firm'].tolist()[0]
# if G_firm_str is None:
# raise ValueError(f"g_firm data is None for sample_id = {sample_id}")
#
# G_firm = nx.adjacency_graph(json.loads(G_firm_str))
with open("firm_network.pkl", 'rb') as f:
G_firm = pickle.load(f)
print(f"Successfully loaded cached data from firm_network.pkl")
# 1. 移除孤立节点
isolated_nodes = list(nx.isolates(G_firm)) # 找出所有没有连接的孤立节点
G_firm.remove_nodes_from(isolated_nodes) # 从图中移除这些节点
# 2. 重新布局和绘图
pos = nx.nx_agraph.graphviz_layout(G_firm, prog="twopi", args="")
node_label = {key: key for key in nx.get_node_attributes(G_firm, 'Revenue_Log').keys()}
node_label = {
"7": "1",
"9": "2",
"829768": "4",
"863079": "5",
"1452048": "6",
"2010673": "7",
"2624175": "8",
"2728939": "9",
"5278074": "10",
"5849940": "11",
"7299120": "12",
"9746245": "13",
"11807506": "14",
"15613202": "15",
"24284343": "19",
"24673506": "20",
"25036634": "21",
"25685135": "24",
"25945288": "25",
"26162741": "26",
"26516263": "27",
"27075840": "28",
"27731896": "29",
"29954548": "30",
"43407343": "33",
"70634828": "36",
"71271700": "37",
"80158773": "39",
"118882692": "40",
"145511905": "42",
"151606446": "43",
"152008168": "44",
"159511306": "45",
"191912252": "46",
"194210021": "47",
"203314437": "48",
"213386023": "49",
"218633337": "50",
"251189644": "53",
"271860868": "55",
"278221281": "56",
"301209792": "57",
"343012684": "59",
"354897041": "60",
"400488703": "62",
"400692942": "63",
"413274977": "64",
"420984285": "65",
"448033045": "66",
"453289520": "67",
"474279224": "68",
"483081978": "69",
"495782506": "70",
"503176785": "73",
"549184982": "75",
"560866402": "76",
"561545339": "77",
"571058167": "78",
"581407487": "79",
"591452402": "80",
"593312758": "81",
"594378026": "82",
"607512171": "83",
"615763365": "84",
"620220747": "85",
"631449822": "86",
"644292599": "87",
"653528340": "88",
"654825436": "89",
"688155470": "92",
"695995052": "93",
"750610681": "95",
"762985858": "96",
"771821595": "97",
"857978527": "100",
"868012326": "101",
"887840774": "102",
"888356483": "103",
"888395016": "104",
"888478182": "105",
"930767828": "107",
"996174506": "108",
"1033972427": "110",
"1128343125": "111",
"1217957486": "113",
"1307012237": "115",
"1375606900": "116",
"1549474227": "118",
"1606833003": "120",
"1679596339": "121",
"2310825263": "122",
"2311838590": "124",
"2312490120": "125",
"2316990095": "128",
"2317245827": "129",
"2317841563": "131",
"2320102626": "132",
"2320475044": "133",
"2321109759": "134",
"2324787028": "137",
"2324844174": "138",
"2326478786": "139",
"2327031723": "140",
"2327979389": "141",
"2329375731": "142",
"2333843479": "143",
"2337952436": "146",
"2339188563": "147",
"2339684065": "148",
"2341555098": "149",
"2343704209": "150",
"2348941764": "151",
"2352036411": "155",
"2354145351": "157",
"2424229017": "159",
"2545430247": "161",
"2820140348": "163",
"2944892892": "165",
"3025036704": "168",
"3026382513": "169",
"3045721313": "171",
"3047163873": "172",
"3048263744": "173",
"3069206426": "174",
"3070859372": "175",
"3072715478": "176",
"3103797386": "177",
"3111033905": "178",
"3113895788": "179",
"3120341363": "180",
"3122923980": "181",
"3127420424": "182",
"3133307899": "183",
"3147511625": "184",
"3177507356": "185",
"3188903709": "186",
"3195502499": "187",
"3203777710": "188",
"3211956484": "189",
"3215814536": "190",
"3221190269": "191",
"3226664625": "192",
"3267688490": "193",
"3269039233": "194",
"3269940677": "195",
"3271705843": "196",
"3299144127": "197",
"3312358902": "198",
"3344297292": "200",
"3372913783": "201",
"3373311444": "202",
"3384021594": "203",
"3395900897": "205",
"3398677646": "206",
"3407754893": "207",
"3433628561": "209",
"3445244192": "212",
"3445928818": "213",
"4208851809": "216",
"5007015990": "218",
"11164476478": "219",
"517717050": "223",
"737770776": "224",
"872394725": "225",
"2311581270": "226",
"2313209417": "227",
"2347013470": "228",
"2350418059": "229",
"3031009366": "234",
"3089095447": "235",
"3100891962": "236",
"3188352290": "238",
"3288105727": "239",
"3462551351": "240"
}
node_size = [value * 5 for value in nx.get_node_attributes(G_firm, 'Revenue_Log').values()]
edge_label = {(n1, n2): label for (n1, n2, _), label in nx.get_edge_attributes(G_firm, "Product").items()}
plt.figure(figsize=(15, 15), dpi=500)
plt.axis('off') # 完全关闭坐标轴系统
# 分层绘制网络组件
nodes = nx.draw_networkx_nodes(
G_firm, pos,
node_size=node_size, # 保持原始尺寸设置
)
edges = nx.draw_networkx_edges(
G_firm, pos,
width=0.3, # 保持原始线宽设置
)
# 优化节点标签
labels = nx.draw_networkx_labels(
G_firm, pos,
labels=node_label,
font_size=6, # 保持原始字号
)
# 增强边标签可读性
edge_labels = nx.draw_networkx_edge_labels(
G_firm, pos,
edge_labels=edge_label,
font_size=2,
label_pos=0.5, # 标签沿边偏移量
rotate=False, # 禁止自动旋转
)
# 专业级输出配置
plt.savefig(
f"output_result/risk/g_firm_sample_id_{sample_id}_de.png",
bbox_inches='tight',
pad_inches=0.05, # 更紧凑的边距
facecolor='white', # 强制白色背景
metadata={
'Title': f"Supply Chain Risk Map - Sample {sample_id}",
'Author': 'USTB Risk Analytics',
'Copyright': 'Confidential'
}
)
plt.close()
# Count firm product
count_firm_prod = result.value_counts(subset=['id_firm', 'id_product'])
count_firm_prod.name = 'count'
count_firm_prod = count_firm_prod.to_frame().reset_index()
count_firm_prod.to_csv('output_result/risk/count_firm_prod.csv', index=False, encoding='utf-8-sig')
print(count_firm_prod)
# Count firm
count_firm = count_firm_prod.groupby('id_firm')['count'].sum()
count_firm = count_firm.to_frame().reset_index()
count_firm.sort_values('count', inplace=True, ascending=False)
count_firm.to_csv('output_result/risk/count_firm.csv', index=False, encoding='utf-8-sig')
print(count_firm)
# Count product
count_prod = count_firm_prod.groupby('id_product')['count'].sum()
count_prod = count_prod.to_frame().reset_index()
count_prod.sort_values('count', inplace=True, ascending=False)
count_prod.to_csv('output_result/risk/count_prod.csv', index=False, encoding='utf-8-sig')
print(count_prod)
# DCP disruption causing probability
result_disrupt_ts_above_0 = result[result['ts'] > 0]
print(result_disrupt_ts_above_0)
result_dcp = pd.DataFrame(columns=[
's_id', 'up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product'
])
result_dcp_list = [] # 用列表收集数据避免DataFrame逐行增长的问题
for sid, group in result.groupby('s_id'):
ts_start = max(group['ts'])
while ts_start >= 1:
ts_end = ts_start - 1
while ts_end >= 0:
up = group.loc[group['ts'] == ts_end, ['id_firm', 'id_product']]
down = group.loc[group['ts'] == ts_start, ['id_firm', 'id_product']]
for _, up_row in up.iterrows():
for _, down_row in down.iterrows():
result_dcp_list.append([sid] + up_row.tolist() + down_row.tolist())
ts_end -= 1
ts_start -= 1
# 转换为DataFrame
result_dcp = pd.DataFrame(result_dcp_list, columns=[
's_id', 'up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product'
])
# 统计
count_dcp = result_dcp.value_counts(
subset=['up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product']
).reset_index(name='count')
# 保存文件
count_dcp.to_csv('output_result/risk/count_dcp.csv', index=False, encoding='utf-8-sig')
# 输出结果
print(count_dcp)