from io import StringIO
import pandas as pd
url = 'website'
r = requests.get(url)
#let it can read chinese
r.encoding = 'big5'
dfs = pd.read_html(StringIO(r.text))
#get the first table of html
df = dfs[0]
# in [:,:10],before comma it indicate to row id,after comma,it indicates columns id
df = df.iloc[:,:10]
# we can get the row which first column is '公司代號'
column_name = df[df[0]=='公司代號']
# select any row of column_name as column 's name' (because in here,all the row is the same)
df.columns = column_name.iloc[0]
df['當月營收'] = pd.to_numeric(df['當月營收'],errors = 'coerce')
# delect row under 當月營收 which shows NaN by using .dropna
df = df.dropna(subset=['當月營收'])
# 刪除「公司代號」中出現「合計」的行數,其中「~」是否定的意思
#delect df['公司代號'] which shows 合計 ,and ~ means "not"
df = df.loc[~(df['公司代號']=='合計')]
df = df.set_index(['公司代號','公司名稱'])
df = df.apply(pd.to_numeric)
#df.head()
**********************************************************************
#check pandas version
pd.__version__
**********************************************************************
df.to_csv('test.csv',encoding='utf_8_sig')
df = pd.read_csv('test.csv',index_col=['公司代號','公司名稱'])
**********************************************************************
import sqlite3
conn = sqlite3.connect('test.sqlite3')
df.to_sql('monthly_report', conn, if_exists='replace')
df = pd.read_sql('select * from monthly_report', conn, index_col=['公司代號','公司名稱'])
df.head()
**********************************************************************
沒有留言:
張貼留言