主要有兩種資料結構
- Series : 一條時間序列
- DataFrame : 很多的時間序列
把pandas 叫進來,命名為pd
import pandas as pd
給它一串list
s = pd.Series([1,2,3,4,5,6,7,8])
結果為
0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8
0~7為index
1~8為對應到的值
*************************************
date = pd.date_range('20190309',periods=6) s = pd.Series([1,2,3,4,5,6], date)
s
****************************************************
2019-03-09 1 2019-03-10 2 2019-03-11 3 2019-03-12 4 2019-03-13 5 2019-03-14 6 Freq: D, dtype: int64
********************************************************
# 建立一個 time series import pandas as pd date = pd.date_range('20190309', periods=6) s = pd.Series([1,2,3,4,5,6], index=date) # 查找 s.loc['20190312']
***********************************************
4
******************************************************
s.loc['20190310':'20190312']
**********************************
2019-03-10 2 2019-03-11 3 2019-03-12 4 Freq: D, dtype: int64
*********************************
s.iloc[1]
****************************
2
**********************************
s.iloc[1:4]
*********************************
2019-03-10 2 2019-03-11 3 2019-03-12 4 Freq: D, dtype: int64
***********************************
print(s) print(s.max()) print(s.min()) print(s.mean()) print(s.std())
***********************************
2019-03-09 1 2019-03-10 2 2019-03-11 3 2019-03-12 4 2019-03-13 5 2019-03-14 6 Freq: D, dtype: int64 6 1 3.5 1.8708286933869707
*********************************************
print(s) print(s.cumsum()) print(s.cumprod())
*********************************************
2019-03-09 1 2019-03-10 2 2019-03-11 3 2019-03-12 4 2019-03-13 5 2019-03-14 6 Freq: D, dtype: int64 2019-03-09 1 2019-03-10 3 2019-03-11 6 2019-03-12 10 2019-03-13 15 2019-03-14 21 Freq: D, dtype: int64 2019-03-09 1 2019-03-10 2 2019-03-11 6 2019-03-12 24 2019-03-13 120 2019-03-14 720 Freq: D, dtype: int64
*************************************************
print(s) #將當下跟上一個相加,1+nan 等於nan print(s.rolling(2).sum()) print(s.rolling(2).max()) print(s.rolling(2).min()) print(s.rolling(2).mean()) print(s.rolling(2).std())
*********************************************
2019-03-09 1 2019-03-10 2 2019-03-11 3 2019-03-12 4 2019-03-13 5 2019-03-14 6 Freq: D, dtype: int64 2019-03-09 NaN 2019-03-10 3.0 2019-03-11 5.0 2019-03-12 7.0 2019-03-13 9.0 2019-03-14 11.0 Freq: D, dtype: float64 2019-03-09 NaN 2019-03-10 2.0 2019-03-11 3.0 2019-03-12 4.0 2019-03-13 5.0 2019-03-14 6.0 Freq: D, dtype: float64 2019-03-09 NaN 2019-03-10 1.0 2019-03-11 2.0 2019-03-12 3.0 2019-03-13 4.0 2019-03-14 5.0 Freq: D, dtype: float64 2019-03-09 NaN 2019-03-10 1.5 2019-03-11 2.5 2019-03-12 3.5 2019-03-13 4.5 2019-03-14 5.5 Freq: D, dtype: float64 2019-03-09 NaN 2019-03-10 0.707107 2019-03-11 0.707107 2019-03-12 0.707107 2019-03-13 0.707107 2019-03-14 0.707107 Freq: D, dtype: float64
*************************************************
print(s) s < 3
Out[12]:
*****************************************************************
s.loc[larger_than_3] = s.loc[larger_than_3] + 1 s
*****************************************************************
2019-03-09 1 2019-03-10 2 2019-03-11 3 2019-03-12 5 2019-03-13 6 2019-03-14 7 Freq: D, dtype: int64
*****************************************************************
s1 = pd.Series([1,2,3,4,5,6], index=date) s2 = pd.Series([5,6,7,8,9,10], index=date) s3 = pd.Series([11,12,5,7,8,2], index=date) dictionary = { 'C1':s1, 'C2':s2, 'C3':s3, } df = pd.DataFrame(dictionary) df
*****************************************************************
*****************************************************************
%matplotlib inline df.plot()
*****************************************************************
*****************************************************************
df.loc['2019-03-11']
*****************************************************************
C1 3 C2 7 C3 5
*****************************************************************
df.iloc[1]
*****************************************************************
C1 2 C2 6 C3 12
*****************************************************************
print(df) df.loc['2019-03-09':'2019-03-11', ['C1','C2']]
*****************************************************************
C1 C2 C3 2019-03-09 1 5 11 2019-03-10 2 6 12 2019-03-11 3 7 5 2019-03-12 4 8 7 2019-03-13 5 9 8 2019-03-14 6 10 2
C1 | C2 | |
---|---|---|
2019-03-09 | 1 | 5 |
2019-03-10 | 2 | 6 |
2019-03-11 | 3 | 7 |
*****************************************************************
df.iloc[1:4, [0, 1]]
*****************************************************************
C1 | C2 | |
---|---|---|
2019-03-10 | 2 | 6 |
2019-03-11 | 3 | 7 |
2019-03-12 | 4 | 8 |
*****************************************************************
df.cumsum()
*****************************************************************
C1 | C2 | C3 | |
---|---|---|---|
2019-03-09 | 1 | 5 | 11 |
2019-03-10 | 3 | 11 | 23 |
2019-03-11 | 6 | 18 | 28 |
2019-03-12 | 10 | 26 | 35 |
2019-03-13 | 15 | 35 | 43 |
2019-03-14 | 21 | 45 | 45 |
*****************************************************************
df.cumprod()
*****************************************************************
C1 | C2 | C3 | |
---|---|---|---|
2019-03-09 | 1 | 5 | 11 |
2019-03-10 | 2 | 30 | 132 |
2019-03-11 | 6 | 210 | 660 |
2019-03-12 | 24 | 1680 | 4620 |
2019-03-13 | 120 | 15120 | 36960 |
2019-03-14 | 720 | 151200 | 73920 |
*****************************************************************
df.rolling(2).mean()
*****************************************************************
C1 | C2 | C3 | |
---|---|---|---|
2019-03-09 | NaN | NaN | NaN |
2019-03-10 | 1.5 | 5.5 | 11.5 |
2019-03-11 | 2.5 | 6.5 | 8.5 |
2019-03-12 | 3.5 | 7.5 | 6.0 |
2019-03-13 | 4.5 | 8.5 | 7.5 |
2019-03-14 | 5.5 | 9.5 | 5.0 |
*****************************************************************
print(df) df.cumsum(axis=1)
*****************************************************************
**********************************************************************
import pandas as pd def create_list (start_value,end_value): listz = [] for v in range(end_value): if v+1 >= start_value: listz.append(v+1); return listz list1 = create_list(1,10) list2 = create_list(11,20) list3 = create_list(21,30) list1_series = pd.Series(list1,range(1,11,1)) list2_series = pd.Series(list2,range(1,11,1)) list3_series = pd.Series(list3,range(1,11,1)) dictionary = { 'C1':list1_series, 'C2':list2_series, 'C3':list3_series } df = pd.DataFrame(dictionary) print(df) print("************************") print(df.loc[2]) print("************************") print(df.iloc[0]) print("************************") print(df.loc[2,["C2"]]) print("************************") print(df.iloc[1,1]) print("************************") print(df) print("************************") print(df.cumsum()) print("************************") print(df.cumsum(axis = 1)) print("************************") df_larger_than_3 = df > 3 print(df_larger_than_3) print("************************")
%matplotlib inline df.plot()
*******************************************************************
C1 C2 C3 1 1 11 21 2 2 12 22 3 3 13 23 4 4 14 24 5 5 15 25 6 6 16 26 7 7 17 27 8 8 18 28 9 9 19 29 10 10 20 30 ************************ C1 2 C2 12 C3 22 Name: 2, dtype: int64 ************************ C1 1 C2 11 C3 21 Name: 1, dtype: int64 ************************ C2 12 Name: 2, dtype: int64 ************************ 12 ************************ C1 C2 C3 1 1 11 21 2 2 12 22 3 3 13 23 4 4 14 24 5 5 15 25 6 6 16 26 7 7 17 27 8 8 18 28 9 9 19 29 10 10 20 30 ************************ C1 C2 C3 1 1 11 21 2 3 23 43 3 6 36 66 4 10 50 90 5 15 65 115 6 21 81 141 7 28 98 168 8 36 116 196 9 45 135 225 10 55 155 255 ************************ C1 C2 C3 1 1 12 33 2 2 14 36 3 3 16 39 4 4 18 42 5 5 20 45 6 6 22 48 7 7 24 51 8 8 26 54 9 9 28 57 10 10 30 60 ************************ C1 C2 C3 1 False True True 2 False True True 3 False True True 4 True True True 5 True True True 6 True True True 7 True True True 8 True True True 9 True True True 10 True True True ************************
沒有留言:
張貼留言