Firm Addition
This commit is contained in:
@@ -2,74 +2,141 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"132001000000.0\n"
|
||||
"['Dassault Systemes SE']\n",
|
||||
"['Schneider Electric SE']\n",
|
||||
"['China Electronics Huada Technology Co. Ltd.']\n",
|
||||
"['Kingsoft Cloud Holdings Ltd. ADR']\n",
|
||||
"['Baidu Inc.']\n",
|
||||
"['Kingdee International Software Group Co. Ltd.']\n",
|
||||
"['Tencent Holdings Ltd.']\n",
|
||||
"['Xiaomi Corp.']\n",
|
||||
"['AsiaInfo Technologies Ltd.']\n",
|
||||
"['Fanuc Corp.']\n",
|
||||
"['Omron Corp.']\n",
|
||||
"['Mitsubishi Electric Corp.']\n",
|
||||
"['Hexagon AB Series B']\n",
|
||||
"['Hollysys Automation Technologies Ltd.']\n",
|
||||
"['JD.com Inc. ADR']\n",
|
||||
"['Autodesk Inc.']\n",
|
||||
"['Altair Engineering Inc. Cl A']\n",
|
||||
"['Ansys Inc.']\n",
|
||||
"['Honeywell International Inc.']\n",
|
||||
"['PTC Inc.']\n",
|
||||
"['Texas Instruments Inc.']\n",
|
||||
"['Cadence Design Systems Inc.']\n",
|
||||
"['Cisco Systems Inc.']\n",
|
||||
"['Microsoft Corp.']\n",
|
||||
"['Analog Devices Inc.']\n",
|
||||
"['Amazon.com Inc.']\n",
|
||||
"['Intel Corp.']\n",
|
||||
"['ABB Ltd. ADR']\n",
|
||||
"['International Business Machines Corp.']\n",
|
||||
"['Oracle Corp.']\n",
|
||||
"['Salesforce Inc.']\n",
|
||||
"['SAP SE ADR']\n",
|
||||
"['Emerson Electric Co.']\n",
|
||||
"['Dell Technologies Inc. Cl C']\n",
|
||||
"['Hewlett Packard Enterprise Co.']\n",
|
||||
"['Rockwell Automation Inc.']\n",
|
||||
"['General Electric Co.']\n",
|
||||
"['Synopsys Inc.']\n",
|
||||
"['STMicroelectronics N.V.']\n",
|
||||
"['Alibaba Group Holding Ltd. ADR']\n",
|
||||
"['Siemens AG']\n",
|
||||
"['Infineon Technologies AG']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from urllib.request import urlopen\n",
|
||||
"from urllib.parse import quote\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import time\n",
|
||||
"import re\n",
|
||||
"import json\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"select_year = 2021\n",
|
||||
"Firm = pd.read_csv(\"Firm.csv\", index_col=0)\n",
|
||||
"select_firm = Firm.loc[Firm[\"Source\"]==\"marketwatch\",:\"Type_Region\"]\n",
|
||||
"select_firm[\"Exchange\"] = None\n",
|
||||
"select_firm[\"Crawled_Name\"] = None\n",
|
||||
"select_firm[\"Stock_Currency\"] = None\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" html = urlopen(\n",
|
||||
" f\"https://www.marketwatch.com/investing/stock/ibm/financials/balance-sheet\"\n",
|
||||
" )\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" bsObj = BeautifulSoup(html.read(), 'html.parser')\n",
|
||||
"except Exception as e:\n",
|
||||
" print(e)\n",
|
||||
" print(\"Can't open url.\")\n",
|
||||
"else:\n",
|
||||
" bs_table = bsObj.find(name=\"table\", attrs={\"aria-label\":\"Financials - Assets data table\"})\n",
|
||||
" bs = bs_table.find(name=\"tr\", attrs={\"class\":\"table__row\"}).find_all(lambda tag: tag.name == 'th' and \n",
|
||||
" tag.get('class') == ['overflow__heading'])\n",
|
||||
" years_list = [i.div.contents[0] for i in bs][0:-1]\n",
|
||||
" years_list = map(int, years_list)\n",
|
||||
" bs = bs_table.find_all(name=\"tr\", attrs={\"class\":\"table__row is-highlighted\"})\n",
|
||||
" for i in bs:\n",
|
||||
" if i.td.div.contents[0] == 'Total Assets':\n",
|
||||
" string = i.find(\"div\", attrs={\"class\":\"chart--financials js-financial-chart\"}).attrs['data-chart-data']\n",
|
||||
" total_assets_list = string.split(',')\n",
|
||||
" total_assets_list = map(float, total_assets_list)\n",
|
||||
" total_assets = pd.DataFrame({\"year\":years_list,'total_asset':total_assets_list})\n",
|
||||
" print(total_assets.loc[total_assets['year']==2021,'total_asset'].to_list()[0])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<th class=\"overflow__heading fixed--column m70\">\n",
|
||||
"<div class=\"cell__content fixed--cell\">Item</div>\n",
|
||||
"<div class=\"cell__content\">Item</div>\n",
|
||||
"</th>"
|
||||
]
|
||||
},
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"bs_table.th"
|
||||
"for code, row in select_firm.iterrows():\n",
|
||||
" try:\n",
|
||||
" url = f\"https://www.marketwatch.com/investing/stock/{row['Stock_Code']}/financials/balance-sheet\"\n",
|
||||
" if row['Stock_Region'] != \"US\":\n",
|
||||
" url += f\"?countrycode={row['Stock_Region']}\"\n",
|
||||
" html = urlopen(url)\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" bsObj = BeautifulSoup(html.read(), 'html.parser')\n",
|
||||
" except Exception as e:\n",
|
||||
" print(e)\n",
|
||||
" print(\"Can't open url.\")\n",
|
||||
" else:\n",
|
||||
" basic_info = bsObj.find(name=\"script\", attrs={'type':\"application/ld+json\"}).contents[0]\n",
|
||||
" basic_info = json.loads(basic_info.replace('\\n','').replace('\\t',''))\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Exchange\"] = basic_info[\"exchange\"]\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Crawled_Name\"] = basic_info[\"name\"]\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Stock_Currency\"] = basic_info[\"priceCurrency\"]\n",
|
||||
"\n",
|
||||
" bs_table = bsObj.find(name=\"table\", attrs={\"aria-label\":\"Financials - Assets data table\"})\n",
|
||||
" if bs_table is not None:\n",
|
||||
" bs = bs_table.find(name=\"tr\", attrs={\"class\":\"table__row\"}).find_all(lambda tag: tag.name == 'th' and \n",
|
||||
" tag.get('class') == ['overflow__heading'])\n",
|
||||
" years_list = [i.div.contents[0] for i in bs][0:-1]\n",
|
||||
" years_list = list(map(int, years_list))\n",
|
||||
" bs = bs_table.find_all(name=\"tr\", attrs={\"class\":\"table__row is-highlighted\"})\n",
|
||||
" for i in bs:\n",
|
||||
" if i.td.div.contents[0] == 'Total Assets':\n",
|
||||
" string = i.find(\"div\", attrs={\"class\":\"chart--financials js-financial-chart\"}).attrs['data-chart-data']\n",
|
||||
" total_assets_list = string.split(',')\n",
|
||||
" total_assets_list = list(map(lambda x: float(x) if x!='' else 0, total_assets_list))\n",
|
||||
" total_assets = pd.DataFrame({\"year\":years_list,'total_asset':total_assets_list})\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Assets\"] = total_assets.loc[total_assets['year']==select_year,'total_asset'].to_list()[0]\n",
|
||||
" try:\n",
|
||||
" url = f\"https://www.marketwatch.com/investing/stock/{row['Stock_Code']}/financials\"\n",
|
||||
" if row['Stock_Region'] != \"US\":\n",
|
||||
" url += f\"?countrycode={row['Stock_Region']}\"\n",
|
||||
" html = urlopen(url)\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" bsObj = BeautifulSoup(html.read(), 'html.parser')\n",
|
||||
" except Exception as e:\n",
|
||||
" print(e)\n",
|
||||
" print(\"Can't open url.\")\n",
|
||||
" else:\n",
|
||||
" basic_info = bsObj.find(name=\"script\", attrs={'type':\"application/ld+json\"}).contents[0]\n",
|
||||
" basic_info = json.loads(basic_info.replace('\\n','').replace('\\t',''))\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Exchange\"] = basic_info[\"exchange\"]\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Crawled_Name\"] = basic_info[\"name\"]\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Stock_Currency\"] = basic_info[\"priceCurrency\"]\n",
|
||||
"\n",
|
||||
" bs_table = bsObj.find(name=\"table\", attrs={\"aria-label\":\"Financials - data table\"})\n",
|
||||
" if bs_table is not None:\n",
|
||||
" bs = bs_table.find(name=\"tr\", attrs={\"class\":\"table__row\"}).find_all(lambda tag: tag.name == 'th' and \n",
|
||||
" tag.get('class') == ['overflow__heading'])\n",
|
||||
" years_list = [i.div.contents[0] for i in bs][0:-1]\n",
|
||||
" years_list = list(map(int, years_list))\n",
|
||||
" bs = bs_table.find_all(name=\"tr\", attrs={\"class\":\"table__row is-highlighted\"})\n",
|
||||
" for i in bs:\n",
|
||||
" if i.td.div.contents[0] == 'Sales/Revenue':\n",
|
||||
" string = i.find(\"div\", attrs={\"class\":\"chart--financials js-financial-chart\"}).attrs['data-chart-data']\n",
|
||||
" total_revenue_list = string.split(',')\n",
|
||||
" total_revenue_list = list(map(lambda x: float(x) if x!='' else 0, total_revenue_list))\n",
|
||||
" total_revenue = pd.DataFrame({\"year\":years_list,'total_revenue':total_revenue_list})\n",
|
||||
" select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Revenue\"] = total_revenue.loc[total_revenue['year']==select_year,'total_revenue'].to_list()[0]\n",
|
||||
" print(select_firm.loc[select_firm[\"Stock_Code\"]==row['Stock_Code'], \"Crawled_Name\"].to_list())\n",
|
||||
"\n",
|
||||
"select_firm['Exchange_Rate'] = select_firm['Stock_Currency'].map({'EUR': 7.2505, 'HKD': 0.88, 'JPY': 0.051, 'SEK': 0.65, 'USD': 6.88})\n",
|
||||
"select_firm['Assets_zh'] = select_firm['Assets'] * select_firm['Exchange_Rate'] * 0.00000001\n",
|
||||
"select_firm['Revenue_zh'] = select_firm['Revenue'] * select_firm['Exchange_Rate'] * 0.00000001\n",
|
||||
"select_firm.to_csv('MarketWatch.csv', index=False, encoding='utf-8-sig')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user