2019年3月19日 星期二

python dataframe#read row and column#.iloc #.loc #dropna #set_index #how to check pandas version #how to save CSV and read #how to read and save sqlit3

import requests
from io import StringIO
import pandas as pd

url = 'website'
r = requests.get(url)
#let it can read chinese
r.encoding = 'big5'

dfs = pd.read_html(StringIO(r.text))
#get the first table of html
df = dfs[0]

# in [:,:10],before comma it indicate to row id,after comma,it indicates columns id
df = df.iloc[:,:10]
# we can get the row which first column is '公司代號'
column_name = df[df[0]=='公司代號']
# select any row of column_name as column 's name' (because in here,all the row is the same)
df.columns = column_name.iloc[0]
df['當月營收'] = pd.to_numeric(df['當月營收'],errors = 'coerce')
# delect row under 當月營收 which shows NaN by using   .dropna 
df = df.dropna(subset=['當月營收'])
# 刪除「公司代號」中出現「合計」的行數,其中「~」是否定的意思
#delect df['公司代號'] which shows 合計 ,and ~ means "not"
df = df.loc[~(df['公司代號']=='合計')]

df = df.set_index(['公司代號','公司名稱'])
df = df.apply(pd.to_numeric)
#df.head()


**********************************************************************
#check pandas version
pd.__version__

**********************************************************************
df.to_csv('test.csv',encoding='utf_8_sig')

df = pd.read_csv('test.csv',index_col=['公司代號','公司名稱'])

**********************************************************************
import sqlite3

conn = sqlite3.connect('test.sqlite3')
df.to_sql('monthly_report', conn, if_exists='replace')


df = pd.read_sql('select * from monthly_report', conn, index_col=['公司代號','公司名稱'])
df.head()
********************************************************************** 

沒有留言:

張貼留言