mesa/risk_analysis_sum_result.py

import pickle

from sqlalchemy import text
from orm import engine, connection
import pandas as pd
import networkx as nx
import json
import matplotlib.pyplot as plt

# Prepare data
Firm = pd.read_csv("input_data/input_firm_data/Firm_amended.csv")
Firm['Code'] = Firm['Code'].astype('string')
Firm.fillna(0, inplace=True)
BomNodes = pd.read_csv('input_data/input_product_data/BomNodes.csv', index_col=0)

# SQL query
with open('SQL_analysis_risk.sql', 'r') as f:
    str_sql = text(f.read())

result = pd.read_sql(sql=str_sql, con=connection)
result.to_csv('output_result/risk/count.csv', index=False, encoding='utf-8-sig')
print(result)

# G_bom
plt.rcParams['font.sans-serif'] = 'SimHei'

exp_id = 1
G_bom_df = pd.read_sql(
    sql=text(f'select g_bom from iiabmdb.without_exp_experiment where id = {exp_id};'),
    con=connection
)

if G_bom_df.empty:
    raise ValueError(f"No g_bom found for exp_id = {exp_id}")

G_bom_str = G_bom_df['g_bom'].tolist()[0]
if G_bom_str is None:
    raise ValueError(f"g_bom data is None for exp_id = {exp_id}")

G_bom = nx.adjacency_graph(json.loads(G_bom_str))
pos = nx.nx_agraph.graphviz_layout(G_bom, prog="twopi", args="")
node_labels = nx.get_node_attributes(G_bom, 'Name')
node_labels = {
    7: 'Si Raw Mtl.',
    8: 'Photoresist & Reagents',
    9: 'Etch Solution',
    10: 'SiF4',
    11: 'Developer',
    12: 'PCE Superplasticizer',
    13: 'Metal Protectant',
    14: 'Deep Hole Cu Plating',
    15: 'Thinner',
    16: 'HP Boric Acid (Nuc.)',
    17: 'E-Grade Epoxy',
    18: 'Stripper',
    19: 'HP-MOC',
    20: 'CMP Slurry & Consumables',
    21: 'PR Remover',
    22: 'Poly-Si Cutting Fluid',
    23: 'Passivation',
    24: 'E-Grade Phenolic',
    25: 'Surfactant',
    26: 'Mag. Carrier',
    27: 'Wet Chems.',
    28: 'Plating Chems.',
    29: 'E-FR Materials',
    30: 'LC Alignment Agent',
    31: 'Func. Wet Chems.',
    32: 'InP',
    33: 'SiC',
    34: 'GaAs',
    35: 'GaN',
    36: 'AlN',
    37: 'Si3N4',
    38: 'SiC Substrate',
    39: 'GaN Substrate',
    40: 'Si Wafer',
    41: 'AlN Substrate',
    42: 'DUV LED Substrate',
    43: 'InP Substrate',
    44: 'Mono-Si Wafer',
    45: 'Poly-Si Wafer',
    46: 'InP Cryst./Wafer',
    47: 'SiC Cryst./Wafer',
    48: 'GaAs Wafer',
    49: 'GaN Cryst./Wafer',
    50: 'Si Epi Wafer',
    51: 'SiC Epi Wafer',
    52: 'AlN Epi',
    53: 'GaN Epi',
    54: 'InP Epi',
    55: 'LED Epi',
    56: 'EDA/IP',
    57: 'MPW Service',
    58: 'IC Design',
    59: 'Track System',
    60: 'Wafer Grinder',
    61: 'Etcher',
    62: 'Ox/Diff Furnace',
    63: 'Wafer Metrology',
    64: 'Crystal Grower',
    65: 'CMP Tool',
    66: 'Stepper',
    67: 'Wafer Dicer',
    68: 'Deposition System',
    69: 'Edge Profiler',
    70: 'Descum Tool',
    71: 'Clean System',
    72: 'SAF',
    73: 'Plating Eqpt.',
    74: 'Implanter',
    75: 'Trim/Form',
    76: 'Probe Card',
    77: 'ATE',
    78: 'PCM Eqpt.',
    79: 'Inspection Sys.',
    80: 'Prober',
    81: 'Dicing Saw',
    82: 'Handler',
    83: 'Backgrinder',
    84: 'Die Bonder',
    85: 'Reflow Oven',
    86: 'FT Tester',
    87: 'Wire Bonder',
    88: 'BGA Mounter',
    89: 'Molding Press',
    90: 'Power Devices',
    91: 'Diode',
    92: 'Transistor',
    93: 'Thyristor',
    94: 'Rectifier',
    95: 'IC Fab',
    96: 'IC PKG',
    97: 'DV',
    98: 'IPM',
    99: 'CP Test',
    100: 'FT Test',
    101: 'Bumping',
    102: 'DA Materials',
    103: 'Leadframe',
    104: 'Solder Ball',
    105: 'Substrate',
    106: 'EMC',
    107: 'Bond Wire',
    108: 'Underfill',
    109: 'Dicing Tape'
}
plt.figure(figsize=(12, 12), dpi=500)
plt.axis('off')  # 关闭坐标轴边框

# 优化节点绘制参数
nx.draw_networkx_nodes(
    G_bom, pos,
    node_size=100,               # 优化节点尺寸
    linewidths=0.0              # 去除节点边框
)
# 优化边绘制参数
nx.draw_networkx_edges(
    G_bom, pos,
    width=0.3,                  # 更细的边宽
    alpha=0.5                   # 半透明边
)
# 优化标签参数
nx.draw_networkx_labels(
    G_bom, pos,
    labels=node_labels,
    font_size=3,                # 适当增大字号
    font_family='sans-serif',   # 使用无衬线字体
    font_weight='bold',         # 增强可读性
)

# 专业级保存参数设置
plt.savefig(
    f"output_result/risk/g_bom_exp_id_{exp_id}.png",
    bbox_inches='tight',        # 去除图像白边
    pad_inches=0.1,             # 适当内边距
    facecolor='white'           # 保证背景纯白
)
plt.close()

# G_firm
plt.rcParams['font.sans-serif'] = 'SimHei'

sample_id = 1
# G_firm_df = pd.read_sql(
#     sql=text(f'select g_firm from iiabmdb.without_exp_sample where id = {sample_id};'),
#     con=connection
# )
#
# if G_firm_df.empty:
#     raise ValueError(f"No g_firm found for sample_id = {sample_id}")
#
# G_firm_str = G_firm_df['g_firm'].tolist()[0]
# if G_firm_str is None:
#     raise ValueError(f"g_firm data is None for sample_id = {sample_id}")
#
# G_firm = nx.adjacency_graph(json.loads(G_firm_str))

with open("firm_network.pkl", 'rb') as f:
    G_firm = pickle.load(f)
print(f"Successfully loaded cached data from firm_network.pkl")

# 1. 移除孤立节点
isolated_nodes = list(nx.isolates(G_firm))  # 找出所有没有连接的孤立节点
G_firm.remove_nodes_from(isolated_nodes)   # 从图中移除这些节点

# 2. 重新布局和绘图
pos = nx.nx_agraph.graphviz_layout(G_firm, prog="twopi", args="")
node_label = {key: key for key in nx.get_node_attributes(G_firm, 'Revenue_Log').keys()}
node_label = {
    "7": "1",
    "9": "2",
    "829768": "4",
    "863079": "5",
    "1452048": "6",
    "2010673": "7",
    "2624175": "8",
    "2728939": "9",
    "5278074": "10",
    "5849940": "11",
    "7299120": "12",
    "9746245": "13",
    "11807506": "14",
    "15613202": "15",
    "24284343": "19",
    "24673506": "20",
    "25036634": "21",
    "25685135": "24",
    "25945288": "25",
    "26162741": "26",
    "26516263": "27",
    "27075840": "28",
    "27731896": "29",
    "29954548": "30",
    "43407343": "33",
    "70634828": "36",
    "71271700": "37",
    "80158773": "39",
    "118882692": "40",
    "145511905": "42",
    "151606446": "43",
    "152008168": "44",
    "159511306": "45",
    "191912252": "46",
    "194210021": "47",
    "203314437": "48",
    "213386023": "49",
    "218633337": "50",
    "251189644": "53",
    "271860868": "55",
    "278221281": "56",
    "301209792": "57",
    "343012684": "59",
    "354897041": "60",
    "400488703": "62",
    "400692942": "63",
    "413274977": "64",
    "420984285": "65",
    "448033045": "66",
    "453289520": "67",
    "474279224": "68",
    "483081978": "69",
    "495782506": "70",
    "503176785": "73",
    "549184982": "75",
    "560866402": "76",
    "561545339": "77",
    "571058167": "78",
    "581407487": "79",
    "591452402": "80",
    "593312758": "81",
    "594378026": "82",
    "607512171": "83",
    "615763365": "84",
    "620220747": "85",
    "631449822": "86",
    "644292599": "87",
    "653528340": "88",
    "654825436": "89",
    "688155470": "92",
    "695995052": "93",
    "750610681": "95",
    "762985858": "96",
    "771821595": "97",
    "857978527": "100",
    "868012326": "101",
    "887840774": "102",
    "888356483": "103",
    "888395016": "104",
    "888478182": "105",
    "930767828": "107",
    "996174506": "108",
    "1033972427": "110",
    "1128343125": "111",
    "1217957486": "113",
    "1307012237": "115",
    "1375606900": "116",
    "1549474227": "118",
    "1606833003": "120",
    "1679596339": "121",
    "2310825263": "122",
    "2311838590": "124",
    "2312490120": "125",
    "2316990095": "128",
    "2317245827": "129",
    "2317841563": "131",
    "2320102626": "132",
    "2320475044": "133",
    "2321109759": "134",
    "2324787028": "137",
    "2324844174": "138",
    "2326478786": "139",
    "2327031723": "140",
    "2327979389": "141",
    "2329375731": "142",
    "2333843479": "143",
    "2337952436": "146",
    "2339188563": "147",
    "2339684065": "148",
    "2341555098": "149",
    "2343704209": "150",
    "2348941764": "151",
    "2352036411": "155",
    "2354145351": "157",
    "2424229017": "159",
    "2545430247": "161",
    "2820140348": "163",
    "2944892892": "165",
    "3025036704": "168",
    "3026382513": "169",
    "3045721313": "171",
    "3047163873": "172",
    "3048263744": "173",
    "3069206426": "174",
    "3070859372": "175",
    "3072715478": "176",
    "3103797386": "177",
    "3111033905": "178",
    "3113895788": "179",
    "3120341363": "180",
    "3122923980": "181",
    "3127420424": "182",
    "3133307899": "183",
    "3147511625": "184",
    "3177507356": "185",
    "3188903709": "186",
    "3195502499": "187",
    "3203777710": "188",
    "3211956484": "189",
    "3215814536": "190",
    "3221190269": "191",
    "3226664625": "192",
    "3267688490": "193",
    "3269039233": "194",
    "3269940677": "195",
    "3271705843": "196",
    "3299144127": "197",
    "3312358902": "198",
    "3344297292": "200",
    "3372913783": "201",
    "3373311444": "202",
    "3384021594": "203",
    "3395900897": "205",
    "3398677646": "206",
    "3407754893": "207",
    "3433628561": "209",
    "3445244192": "212",
    "3445928818": "213",
    "4208851809": "216",
    "5007015990": "218",
    "11164476478": "219",
    "517717050": "223",
    "737770776": "224",
    "872394725": "225",
    "2311581270": "226",
    "2313209417": "227",
    "2347013470": "228",
    "2350418059": "229",
    "3031009366": "234",
    "3089095447": "235",
    "3100891962": "236",
    "3188352290": "238",
    "3288105727": "239",
    "3462551351": "240"
}

node_size = [value * 5 for value in nx.get_node_attributes(G_firm, 'Revenue_Log').values()]
edge_label = {(n1, n2): label for (n1, n2, _), label in nx.get_edge_attributes(G_firm, "Product").items()}

plt.figure(figsize=(15, 15), dpi=500)
plt.axis('off')  # 完全关闭坐标轴系统

# 分层绘制网络组件
nodes = nx.draw_networkx_nodes(
    G_firm, pos,
    node_size=node_size,        # 保持原始尺寸设置
)

edges = nx.draw_networkx_edges(
    G_firm, pos,
    width=0.3,                  # 保持原始线宽设置
)

# 优化节点标签
labels = nx.draw_networkx_labels(
    G_firm, pos,
    labels=node_label,
    font_size=6,                # 保持原始字号
)

# 增强边标签可读性
edge_labels = nx.draw_networkx_edge_labels(
    G_firm, pos,
    edge_labels=edge_label,
    font_size=2,
    label_pos=0.5,              # 标签沿边偏移量
    rotate=False,              # 禁止自动旋转
)

# 专业级输出配置
plt.savefig(
    f"output_result/risk/g_firm_sample_id_{sample_id}_de.png",
    bbox_inches='tight',
    pad_inches=0.05,            # 更紧凑的边距
    facecolor='white',          # 强制白色背景
    metadata={
        'Title': f"Supply Chain Risk Map - Sample {sample_id}",
        'Author': 'USTB Risk Analytics',
        'Copyright': 'Confidential'
    }
)
plt.close()


# Count firm product
count_firm_prod = result.value_counts(subset=['id_firm', 'id_product'])
count_firm_prod.name = 'count'
count_firm_prod = count_firm_prod.to_frame().reset_index()
count_firm_prod.to_csv('output_result/risk/count_firm_prod.csv', index=False, encoding='utf-8-sig')
print(count_firm_prod)

# Count firm
count_firm = count_firm_prod.groupby('id_firm')['count'].sum()
count_firm = count_firm.to_frame().reset_index()
count_firm.sort_values('count', inplace=True, ascending=False)
count_firm.to_csv('output_result/risk/count_firm.csv', index=False, encoding='utf-8-sig')
print(count_firm)

# Count product
count_prod = count_firm_prod.groupby('id_product')['count'].sum()
count_prod = count_prod.to_frame().reset_index()
count_prod.sort_values('count', inplace=True, ascending=False)
count_prod.to_csv('output_result/risk/count_prod.csv', index=False, encoding='utf-8-sig')
print(count_prod)

# DCP disruption causing probability
result_disrupt_ts_above_0 = result[result['ts'] > 0]
print(result_disrupt_ts_above_0)
result_dcp = pd.DataFrame(columns=[
    's_id', 'up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product'
])

result_dcp_list = []  # 用列表收集数据，避免DataFrame逐行增长的问题
for sid, group in result.groupby('s_id'):
    ts_start = max(group['ts'])
    while ts_start >= 1:
        ts_end = ts_start - 1
        while ts_end >= 0:
            up = group.loc[group['ts'] == ts_end, ['id_firm', 'id_product']]
            down = group.loc[group['ts'] == ts_start, ['id_firm', 'id_product']]
            for _, up_row in up.iterrows():
                for _, down_row in down.iterrows():
                    result_dcp_list.append([sid] + up_row.tolist() + down_row.tolist())
            ts_end -= 1
        ts_start -= 1

# 转换为DataFrame
result_dcp = pd.DataFrame(result_dcp_list, columns=[
    's_id', 'up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product'
])

# 统计
count_dcp = result_dcp.value_counts(
    subset=['up_id_firm', 'up_id_product', 'down_id_firm', 'down_id_product']
).reset_index(name='count')

# 保存文件
count_dcp.to_csv('output_result/risk/count_dcp.csv', index=False, encoding='utf-8-sig')

# 输出结果
print(count_dcp)