IIabm/AmendFirm_20230216.ipynb

241 lines
1.3 MiB
Plaintext
Raw Normal View History

2023-02-16 16:11:54 +08:00
{
"cells": [
{
"cell_type": "code",
2023-02-18 16:46:38 +08:00
"execution_count": 57,
2023-02-16 16:11:54 +08:00
"metadata": {},
2023-02-18 16:46:38 +08:00
"outputs": [],
2023-02-16 16:11:54 +08:00
"source": [
"import pandas as pd\n",
2023-02-18 16:46:38 +08:00
"import numpy as np\n",
"from sklearn.preprocessing import MinMaxScaler\n",
2023-02-16 16:11:54 +08:00
"import networkx as nx\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.rcParams['font.sans-serif'] = 'SimHei'\n",
"\n",
"BomNodes = pd.read_csv('BomNodes.csv', index_col=0)\n",
"BomNodes.set_index('Code', inplace=True)\n",
"BomCateNet = pd.read_csv('BomCateNet.csv', index_col=0)\n",
"BomCateNet.fillna(0, inplace=True)\n",
"\n",
"G = nx.from_pandas_adjacency(BomCateNet, create_using=nx.MultiDiGraph())\n",
"\n",
"labels_dict = {}\n",
"for code in G.nodes:\n",
" labels_dict[code] = BomNodes.loc[code].to_dict()\n",
2023-02-18 16:46:38 +08:00
"nx.set_node_attributes(G, labels_dict)\n"
2023-02-16 16:11:54 +08:00
]
},
{
"cell_type": "code",
2023-02-18 16:46:38 +08:00
"execution_count": 58,
2023-02-16 16:11:54 +08:00
"metadata": {},
2023-02-16 16:58:02 +08:00
"outputs": [],
2023-02-16 16:11:54 +08:00
"source": [
2023-02-18 16:46:38 +08:00
"# amend\n",
2023-02-16 16:11:54 +08:00
"dict_nodes = {0: sorted([node for node in G.nodes() if G.out_degree(node)==0])}\n",
"level = 1\n",
"while True:\n",
" nodes = [list(G.predecessors(node)) for node in dict_nodes[level-1]]\n",
" nodes = sorted(list(set([i for j in nodes for i in j])))\n",
" if nodes:\n",
" dict_nodes[level] = nodes\n",
" level += 1\n",
" else:\n",
" break\n",
"\n",
"Firm = pd.read_csv(\"Firm.csv\")\n",
2023-02-18 16:46:38 +08:00
"Firm.loc[:, '1':].fillna(0, inplace=True)\n",
2023-02-16 16:11:54 +08:00
"Firm_copy = Firm.copy()\n",
"\n",
2023-02-18 16:46:38 +08:00
"# size\n",
"# Firm_copy['Assets_Std_Log'] = Firm_copy['Assets'].map()\n",
"\n",
"\n",
2023-02-16 16:11:54 +08:00
"for tier in list(dict_nodes.keys())[1:]:\n",
" for node in dict_nodes[tier]:\n",
" list_neighbors = list(G.neighbors(node))\n",
" firm_list = Firm_copy.index[(Firm_copy[list_neighbors]==1).all(axis=1)].to_list()\n",
" if firm_list:\n",
" Firm_copy.loc[firm_list, node] = 1\n",
" Firm_copy.loc[firm_list, list_neighbors] = 0\n",
"Firm_copy.to_csv('Firm_amended.csv', index=False, encoding='utf-8-sig')"
]
},
{
"cell_type": "code",
2023-02-18 16:46:38 +08:00
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\25759\\AppData\\Local\\Temp\\ipykernel_1648\\3829775229.py:1: RuntimeWarning: divide by zero encountered in log\n",
" np.log(MinMaxScaler().fit_transform(Firm_copy['Assets'].array.reshape(-1,1)).reshape(-1))\n"
]
},
{
"data": {
"text/plain": [
"array([-5.79695865e+00, -1.05315349e+01, nan, nan,\n",
" nan, nan, -9.82489222e+00, nan,\n",
" nan, nan, nan, -8.54791316e+00,\n",
" nan, -7.39462390e+00, -6.49144375e+00, -6.63297162e+00,\n",
" -8.74295074e+00, nan, nan, nan,\n",
" nan, -4.98672129e+00, nan, -7.15116331e+00,\n",
" -8.67997329e+00, -9.03351444e+00, nan, nan,\n",
" -6.41683918e+00, -1.19415961e+00, nan, nan,\n",
" nan, nan, nan, nan,\n",
" -8.92590857e+00, -8.00385163e+00, nan, -5.62126006e+00,\n",
" -6.94229562e+00, -7.35938613e+00, nan, -8.47199108e+00,\n",
" -8.46956851e+00, nan, nan, nan,\n",
" -6.27245916e+00, -1.22964831e+01, nan, nan,\n",
" nan, -7.09439697e+00, nan, nan,\n",
" -1.07941709e+01, nan, -6.68846593e+00, nan,\n",
" nan, -1.02805224e+01, nan, nan,\n",
" nan, -2.89581363e+00, -3.14642534e+00, -2.01442648e+00,\n",
" -8.44633549e+00, -5.33806488e+00, -4.00155152e+00, -7.47168894e+00,\n",
" -5.76243404e+00, -5.11629547e+00, -3.43739566e+00, -2.72060618e+00,\n",
" nan, -2.72740307e+00, nan, -6.10253090e+00,\n",
" -3.40938096e+00, -3.33850521e+00, nan, -4.39675140e+00,\n",
" -2.87207515e+00, -4.89774846e+00, -2.78787497e+00, -4.39833209e+00,\n",
" -3.54839710e+00, -5.20610112e+00, nan, nan,\n",
" -5.80176128e+00, -6.12994085e+00, -4.03249194e+00, -3.55212392e+00,\n",
" -3.02365487e+00, -2.31068645e+00, -1.79283273e+00, -2.61379988e+00,\n",
" -5.43652599e+00, -3.64625906e+00, -1.56173681e+00, -4.99005631e+00,\n",
" -4.36943191e+00, -2.47700081e+00, -1.11022302e-16, -7.98515146e+00,\n",
" -3.51829256e+00, -6.65807194e+00, nan, -8.62279785e+00,\n",
" -1.04760385e+01, nan, nan, -3.94669845e+00,\n",
" -8.74978915e+00, nan, nan, -7.23791464e+00,\n",
" nan, nan, -8.61078329e+00, -8.64809500e+00,\n",
" -4.14946141e+00, nan, nan, nan,\n",
" -8.23878056e+00, -8.35536371e+00, -7.06209095e+00, nan,\n",
" -8.37466693e+00, -7.72472197e+00, -1.07697369e+01, -8.14581821e+00,\n",
" -3.57151721e+00, -1.10235174e+01, -8.85687933e+00, -1.00163709e+01,\n",
" -8.95076486e+00, nan, -7.15287212e+00, nan,\n",
" -9.55820652e+00, nan, -1.14908579e+01, -7.28574198e+00,\n",
" -2.07280506e+00, nan, -8.89899629e+00, -inf,\n",
" -7.58795439e+00, nan, nan, -3.85259181e+00,\n",
" nan, nan, -1.08449064e+01, -4.41480330e+00,\n",
" -7.22166822e+00, nan, nan, nan,\n",
" nan, nan, nan, -5.17433354e+00,\n",
" -7.21006323e+00, -4.09344850e+00])"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.log(MinMaxScaler().fit_transform(Firm_copy['Assets'].array.reshape(-1,1)).reshape(-1))\n"
]
},
{
"cell_type": "code",
"execution_count": 60,
2023-02-16 16:11:54 +08:00
"metadata": {},
"outputs": [
{
"data": {
2023-02-18 16:46:38 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAACyIAAAsQCAYAAABS037aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAC4jAAAuIwF4pT92AAEAAElEQVR4nOzdd5idZZk/8O85Z3oyyaR3Oop0kKaIiB17QUXsbdfd365r36qr21RwbausfVHUVQyiEaQoICAQEIGEAAk1IT2TTKZl+jnn98eEgSEBOSsQyudzXXNdc973fZ7nfss5XE6+57ZQrVarAQAAAAAAAAAAAACoQXFXFwAAAAAAAAAAAAAAPPEIIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqVvdYL9jZ2ZnLLrts7PWCBQvS2Nj4WJcBAAAAAAAAAAAAAE9og4ODWb169djr448/Pm1tbY/Z+o95EPmyyy7La17zmsd6WQAAAAAAAAAAAAB4Uvv5z3+eV7/61Y/ZesXHbCUAAAAAAAAAAAAA4ElDEBkAAAAAAAAAAAAAqFndY73gggULxr3++c9/nn322eexLgMAAAAAAAAAAAAAntDuuOOOvOY1rxl7/cCc7qPtMQ8iNzY2jnu9zz775IADDnisywAAAAAAAAAAAACAJ5UH5nQfbcXHdDUAAAAAAAAAAAAA4ElBEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAAAAAAAAABAzQSRAQAAAAAAAAAAAICaCSIDAAAAAAAAAAAAADUTRAYAAAAAAAAAAAAAaiaIDAAAAAAAAAAAAADUTBAZAAAAAAAAAAAAAKiZIDIAAAAAAAAAAAAAUDNBZAAAAAAAAAAAAACgZoLIAAAAAAAAAAAAAEDNBJEBAAAAAAAAAAAAgJoJIgMAAAAAAAAAAAAANRNEBgAAAAAAAAAAAABqJogMAAAAAAAAAAAAANRMEBkAAAAAAAAAAAAAqJkgMgAAAAAAAAAAAABQM0FkAAAAAAAAAAAAAKBmgsgAAAAAAAAAAAAAQM0EkQEAAAAAAAAAAACAmgkiAwAAAAAAAAAAAAA1E0QGAAAAAAAAAAAAAGomiAwAAAAAAAAAAAAA1EwQGQAAAAAAAAAAAAComSAyAAAAAAAAAAAAAFAzQWQAAAAAAAAAAAAAoGaCyAAAAAA
2023-02-16 16:11:54 +08:00
"text/plain": [
2023-02-16 16:58:02 +08:00
"<Figure size 3600x3600 with 1 Axes>"
2023-02-16 16:11:54 +08:00
]
},
"metadata": {},
2023-02-16 16:58:02 +08:00
"output_type": "display_data"
2023-02-16 16:11:54 +08:00
}
],
"source": [
2023-02-18 16:46:38 +08:00
"# visualization\n",
2023-02-16 16:58:02 +08:00
"firm_num_dict = {}\n",
"for node in nx.nodes(G):\n",
" firm_num_dict[node]= sum(Firm_copy[node]==1)\n",
"nx.set_node_attributes(G, firm_num_dict, name=\"Num_Firm\")\n",
"\n",
"pos = nx.nx_agraph.graphviz_layout(G, prog=\"twopi\", args=\"\")\n",
2023-02-18 16:46:38 +08:00
"dict_num_firm = nx.get_node_attributes(G, 'Num_Firm')\n",
"dict_node_name = nx.get_node_attributes(G, 'Name')\n",
"node_labels = {}\n",
"for node in nx.nodes(G):\n",
" node_labels[node] = f\"{node} {str(dict_node_name[node])} {str(dict_num_firm[node])}\"\n",
2023-02-16 16:58:02 +08:00
"plt.figure(figsize=(12, 12), dpi=300)\n",
"nx.draw_networkx_nodes(G, pos)\n",
"nx.draw_networkx_edges(G, pos)\n",
2023-02-18 16:46:38 +08:00
"nx.draw_networkx_labels(G, pos, labels = node_labels, font_size=4)\n",
2023-02-16 16:58:02 +08:00
"plt.show()"
2023-02-16 16:11:54 +08:00
]
},
{
"cell_type": "code",
2023-02-18 16:46:38 +08:00
"execution_count": 64,
2023-02-16 16:11:54 +08:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2023-02-18 16:46:38 +08:00
"0 True\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 True\n",
" ... \n",
"165 True\n",
"166 True\n",
"167 True\n",
"168 True\n",
"169 True\n",
"Name: Num_Employ, Length: 170, dtype: bool"
2023-02-16 16:11:54 +08:00
]
},
2023-02-18 16:46:38 +08:00
"execution_count": 64,
2023-02-16 16:11:54 +08:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2023-02-18 16:46:38 +08:00
"import seaborn as sns\n",
"data = Firm_copy[Firm_copy['Num_Employ'] > 0]['Num_Employ']\n",
"data"
2023-02-16 16:11:54 +08:00
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "bcdafc093860683ffb58d6956591562b7f8ed5d58147d17d71a5d4d6605a08df"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}