550 lines
16 KiB
Plaintext
550 lines
16 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"85\n",
|
||
"63\n",
|
||
"51\n",
|
||
"26\n",
|
||
"30\n",
|
||
"4\n",
|
||
"7\n",
|
||
"1\n",
|
||
"17\n",
|
||
"81\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([2, 2])"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"np.random.randint(0.5, 3.5)\n",
|
||
"p_remove = 0.9\n",
|
||
"np.random.choice([True, False], p=[p_remove, 1-p_remove])\n",
|
||
"rng = np.random.default_rng(0)\n",
|
||
"for _ in range(10):\n",
|
||
" print(rng.integers(0,100))\n",
|
||
"np.random.choice([1, 2, 3], 2, p=[0.4, 0.4, 0.2])\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"share = 0.8\n",
|
||
"list_succ_firms = [1, 1]\n",
|
||
"round(share * len(list_succ_firms)) if round(share * len(list_succ_firms)) > 0 else 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[0.17307692307692307, 0.19230769230769232, 0.20192307692307693, 0.21153846153846154, 0.22115384615384615]\n",
|
||
"[0.14899116146026878, 0.1819782155490595, 0.20111703154812216, 0.22226869439668717, 0.24564489704586234]\n",
|
||
"[0.10801741721030356, 0.16114305076975205, 0.19682056666851946, 0.2403971829915773, 0.29362178235984765]\n",
|
||
"[0.07643198434626533, 0.13926815562848321, 0.18799234648997357, 0.25376312466637047, 0.34254438886890737]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import math\n",
|
||
"size = [18,20,21,22,23]\n",
|
||
"p = [s / sum(size) for s in size]\n",
|
||
"print(p)\n",
|
||
"for beta in [0.1, 0.2, 0.3]:\n",
|
||
" damp_size = [math.exp(beta*s) for s in size]\n",
|
||
" print([s / sum(damp_size) for s in damp_size])\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[0.16666666666666666, 0.5, 0.6666666666666666, 0.8333333333333334, 1.0]\n",
|
||
"[0.8359588020779368, 0.9330329915368074, 0.960264500792218, 0.9819330445619127, 1.0]\n",
|
||
"[0.408248290463863, 0.7071067811865476, 0.816496580927726, 0.9128709291752769, 1.0]\n",
|
||
"[0.23849484685087588, 0.5743491774985174, 0.7229811807984657, 0.8642810744472068, 1.0]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import math\n",
|
||
"size = [18,20,21,22,23]\n",
|
||
"p = [(s - min(size) + 1)/(max(size)-min(size)+1) for s in size]\n",
|
||
"print(p)\n",
|
||
"for beta in [0.1, 0.5, 0.8]:\n",
|
||
" p = [((s - min(size) + 1)/(max(size)-min(size)+1))**beta for s in size]\n",
|
||
" print(p)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"32\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import multiprocess as mp\n",
|
||
"\n",
|
||
"print(mp.cpu_count())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"71\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from orm import engine\n",
|
||
"import pandas as pd\n",
|
||
"import pickle\n",
|
||
"str_sql = \"select e_id, count, max_max_ts, dct_lst_init_remove_firm_prod from iiabmdb.without_exp_experiment as a \" \\\n",
|
||
"\"inner join \" \\\n",
|
||
"\"(select e_id, count(id) as count, max(max_ts) as max_max_ts from iiabmdb.without_exp_sample as a \" \\\n",
|
||
"\"inner join (select s_id, max(ts) as max_ts from iiabmdb.without_exp_result where ts > 0 group by s_id) as b \" \\\n",
|
||
"\"on a.id = b.s_id \" \\\n",
|
||
"\"group by e_id) as b \" \\\n",
|
||
"\"on a.id = b.e_id \" \\\n",
|
||
"\"order by count desc;\"\n",
|
||
"result = pd.read_sql(sql=str_sql, con=engine)\n",
|
||
"result['dct_lst_init_remove_firm_prod'] = result['dct_lst_init_remove_firm_prod'].apply(lambda x: pickle.loads(x))\n",
|
||
"# print(result)\n",
|
||
"list_dct = result.loc[result['count']>=9, 'dct_lst_init_remove_firm_prod'].to_list()\n",
|
||
"print(len(list_dct))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "ValueError",
|
||
"evalue": "probabilities do not sum to 1",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[1;32mIn[2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m np\u001b[39m.\u001b[39;49mrandom\u001b[39m.\u001b[39;49mchoice([\u001b[39m1\u001b[39;49m], p\u001b[39m=\u001b[39;49m[\u001b[39m0.9\u001b[39;49m])\n",
|
||
"File \u001b[1;32mmtrand.pyx:933\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[1;34m()\u001b[0m\n",
|
||
"\u001b[1;31mValueError\u001b[0m: probabilities do not sum to 1"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"np.random.choice([1], p=[0.9])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0.004495606232695251"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"prob_remove = 0\n",
|
||
"prob_remove = np.random.uniform(\n",
|
||
" prob_remove - 0.1, prob_remove + 0.1)\n",
|
||
"prob_remove = 1 if prob_remove > 1 else prob_remove\n",
|
||
"prob_remove = 0 if prob_remove < 0 else prob_remove\n",
|
||
"prob_remove"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[8]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"nprandom = np.random.default_rng(0)\n",
|
||
"lst_choose_firm = nprandom.choice(range(10),\n",
|
||
" 1,\n",
|
||
" replace=False\n",
|
||
" )\n",
|
||
"print(lst_choose_firm)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "ValueError",
|
||
"evalue": "Cannot take a larger sample than population when replace is False",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m nprandom \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mdefault_rng(\u001b[39m0\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m lst_choose_firm \u001b[39m=\u001b[39m nprandom\u001b[39m.\u001b[39;49mchoice([\u001b[39m1\u001b[39;49m,\u001b[39m2\u001b[39;49m],\n\u001b[0;32m 3\u001b[0m \u001b[39m3\u001b[39;49m,\n\u001b[0;32m 4\u001b[0m replace\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m\n\u001b[0;32m 5\u001b[0m )\n\u001b[0;32m 6\u001b[0m lst_choose_firm\n",
|
||
"File \u001b[1;32m_generator.pyx:753\u001b[0m, in \u001b[0;36mnumpy.random._generator.Generator.choice\u001b[1;34m()\u001b[0m\n",
|
||
"\u001b[1;31mValueError\u001b[0m: Cannot take a larger sample than population when replace is False"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"nprandom = np.random.default_rng(0)\n",
|
||
"lst_choose_firm = nprandom.choice([1,2],\n",
|
||
" 3,\n",
|
||
" replace=False\n",
|
||
" )\n",
|
||
"lst_choose_firm"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0 0\n",
|
||
"0 1\n",
|
||
"0 2\n",
|
||
"1 0\n",
|
||
"1 1\n",
|
||
"break\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"\n",
|
||
"for j in range(3):\n",
|
||
" for k in range(3):\n",
|
||
" print(j, k)\n",
|
||
" if j == k == 1:\n",
|
||
" print('break')\n",
|
||
" break\n",
|
||
" else:\n",
|
||
" continue\n",
|
||
" break\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2.25\n",
|
||
"2.25\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(27 / (4 * 3))\n",
|
||
"print(27 / 4 / 3)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for i in range(1,1):\n",
|
||
" print(i)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[6, 7, 8, 9, 10]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"lst = list(range(1,11))\n",
|
||
"print(lst)\n",
|
||
"\n",
|
||
"lst[-5:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>e_id</th>\n",
|
||
" <th>n_disrupt_sample</th>\n",
|
||
" <th>total_n_disrupt_firm_prod_experiment</th>\n",
|
||
" <th>dct_lst_init_disrupt_firm_prod</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>383</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>300.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x17\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>227</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>250.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>83</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>200.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>135</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>200.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>179</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>200.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>90</th>\n",
|
||
" <td>76</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>56.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x14\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>91</th>\n",
|
||
" <td>89</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>54.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>92</th>\n",
|
||
" <td>90</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>54.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>93</th>\n",
|
||
" <td>335</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>54.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x17\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>94</th>\n",
|
||
" <td>449</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>53.0</td>\n",
|
||
" <td>b'\\x80\\x05\\x95\\x15\\x00\\x00\\x00\\x00\\x00\\x00\\x00...</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>95 rows × 4 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" e_id n_disrupt_sample total_n_disrupt_firm_prod_experiment \\\n",
|
||
"0 383 50 300.0 \n",
|
||
"1 227 50 250.0 \n",
|
||
"2 83 50 200.0 \n",
|
||
"3 135 50 200.0 \n",
|
||
"4 179 50 200.0 \n",
|
||
".. ... ... ... \n",
|
||
"90 76 24 56.0 \n",
|
||
"91 89 24 54.0 \n",
|
||
"92 90 24 54.0 \n",
|
||
"93 335 24 54.0 \n",
|
||
"94 449 24 53.0 \n",
|
||
"\n",
|
||
" dct_lst_init_disrupt_firm_prod \n",
|
||
"0 b'\\x80\\x05\\x95\\x17\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"1 b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"2 b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"3 b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"4 b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
".. ... \n",
|
||
"90 b'\\x80\\x05\\x95\\x14\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"91 b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"92 b'\\x80\\x05\\x95\\x16\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"93 b'\\x80\\x05\\x95\\x17\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"94 b'\\x80\\x05\\x95\\x15\\x00\\x00\\x00\\x00\\x00\\x00\\x00... \n",
|
||
"\n",
|
||
"[95 rows x 4 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"with open('SQL_export_high_risk_setting.sql', 'r') as f:\n",
|
||
" contents = f.read()\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"from orm import engine\n",
|
||
"result = pd.read_sql(sql=contents, con=engine)\n",
|
||
"result"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.8"
|
||
},
|
||
"orig_nbformat": 4,
|
||
"vscode": {
|
||
"interpreter": {
|
||
"hash": "bcdafc093860683ffb58d6956591562b7f8ed5d58147d17d71a5d4d6605a08df"
|
||
}
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|