Pandas 可以用來分析time series
主要有兩種資料結構
- Series : 一條時間序列
- DataFrame : 很多的時間序列
把pandas 叫進來,命名為pd
import pandas as pd
給它一串list
s = pd.Series([1,2,3,4,5,6,7,8])
結果為
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
0~7為index
1~8為對應到的值
*************************************
date = pd.date_range('20190309',periods=6)
s = pd.Series([1,2,3,4,5,6], date)
s
****************************************************
2019-03-09 1
2019-03-10 2
2019-03-11 3
2019-03-12 4
2019-03-13 5
2019-03-14 6
Freq: D, dtype: int64
********************************************************
# 建立一個 time series
import pandas as pd
date = pd.date_range('20190309', periods=6)
s = pd.Series([1,2,3,4,5,6], index=date)
# 查找
s.loc['20190312']
***********************************************
4
******************************************************
s.loc['20190310':'20190312']
**********************************
2019-03-10 2
2019-03-11 3
2019-03-12 4
Freq: D, dtype: int64
*********************************
s.iloc[1]
****************************
2
**********************************
s.iloc[1:4]
*********************************
2019-03-10 2
2019-03-11 3
2019-03-12 4
Freq: D, dtype: int64
***********************************
print(s)
print(s.max())
print(s.min())
print(s.mean())
print(s.std())
***********************************
2019-03-09 1
2019-03-10 2
2019-03-11 3
2019-03-12 4
2019-03-13 5
2019-03-14 6
Freq: D, dtype: int64
6
1
3.5
1.8708286933869707
*********************************************
print(s)
print(s.cumsum())
print(s.cumprod())
*********************************************
2019-03-09 1
2019-03-10 2
2019-03-11 3
2019-03-12 4
2019-03-13 5
2019-03-14 6
Freq: D, dtype: int64
2019-03-09 1
2019-03-10 3
2019-03-11 6
2019-03-12 10
2019-03-13 15
2019-03-14 21
Freq: D, dtype: int64
2019-03-09 1
2019-03-10 2
2019-03-11 6
2019-03-12 24
2019-03-13 120
2019-03-14 720
Freq: D, dtype: int64
*************************************************
print(s)
#將當下跟上一個相加,1+nan 等於nan
print(s.rolling(2).sum())
print(s.rolling(2).max())
print(s.rolling(2).min())
print(s.rolling(2).mean())
print(s.rolling(2).std())
*********************************************
2019-03-09 1
2019-03-10 2
2019-03-11 3
2019-03-12 4
2019-03-13 5
2019-03-14 6
Freq: D, dtype: int64
2019-03-09 NaN
2019-03-10 3.0
2019-03-11 5.0
2019-03-12 7.0
2019-03-13 9.0
2019-03-14 11.0
Freq: D, dtype: float64
2019-03-09 NaN
2019-03-10 2.0
2019-03-11 3.0
2019-03-12 4.0
2019-03-13 5.0
2019-03-14 6.0
Freq: D, dtype: float64
2019-03-09 NaN
2019-03-10 1.0
2019-03-11 2.0
2019-03-12 3.0
2019-03-13 4.0
2019-03-14 5.0
Freq: D, dtype: float64
2019-03-09 NaN
2019-03-10 1.5
2019-03-11 2.5
2019-03-12 3.5
2019-03-13 4.5
2019-03-14 5.5
Freq: D, dtype: float64
2019-03-09 NaN
2019-03-10 0.707107
2019-03-11 0.707107
2019-03-12 0.707107
2019-03-13 0.707107
2019-03-14 0.707107
Freq: D, dtype: float64
*************************************************
print(s)
s < 3
2018-01-01 1
2018-01-02 2
2018-01-03 3
2018-01-04 4
2018-01-05 5
2018-01-06 6
Freq: D, dtype: int64
Out[12]:
2018-01-01 True
2018-01-02 True
2018-01-03 False
2018-01-04 False
2018-01-05 False
2018-01-06 False
Freq: D, dtype: bool
*****************************************************
#在瀏覽器畫圖
%matplotlib inline
s.plot()
*****************************************************************
larger_than_3 = s > 3
print (larger_than_3)
s.loc[larger_than_3]
*****************************************************************
2019-03-09 False
2019-03-10 False
2019-03-11 False
2019-03-12 True
2019-03-13 True
2019-03-14 True
Freq: D, dtype: bool
Out[35]:
2019-03-12 4
2019-03-13 5
2019-03-14 6
Freq: D, dtype: int64
*****************************************************************
s.loc[larger_than_3] = s.loc[larger_than_3] + 1
s
*****************************************************************
2019-03-09 1
2019-03-10 2
2019-03-11 3
2019-03-12 5
2019-03-13 6
2019-03-14 7
Freq: D, dtype: int64
*****************************************************************
s1 = pd.Series([1,2,3,4,5,6], index=date)
s2 = pd.Series([5,6,7,8,9,10], index=date)
s3 = pd.Series([11,12,5,7,8,2], index=date)
dictionary = {
'C1':s1,
'C2':s2,
'C3':s3,
}
df = pd.DataFrame(dictionary)
df
*****************************************************************
*****************************************************************
%matplotlib inline
df.plot()
*****************************************************************
*****************************************************************
df.loc['2019-03-11']
*****************************************************************
C1 3
C2 7
C3 5
*****************************************************************
df.iloc[1]
*****************************************************************
C1 2
C2 6
C3 12
*****************************************************************
print(df)
df.loc['2019-03-09':'2019-03-11', ['C1','C2']]
*****************************************************************
C1 C2 C3
2019-03-09 1 5 11
2019-03-10 2 6 12
2019-03-11 3 7 5
2019-03-12 4 8 7
2019-03-13 5 9 8
2019-03-14 6 10 2
| C1 | C2 |
2019-03-09 | 1 | 5 |
2019-03-10 | 2 | 6 |
2019-03-11 | 3 | 7 |
*****************************************************************
df.iloc[1:4, [0, 1]]
*****************************************************************
| C1 | C2 |
2019-03-10 | 2 | 6 |
2019-03-11 | 3 | 7 |
2019-03-12 | 4 | 8 |
*****************************************************************
df.cumsum()
*****************************************************************
C1 | C2 | C3 |
2019-03-09 | 1 | 5 | 11 |
2019-03-10 | 3 | 11 | 23 |
2019-03-11 | 6 | 18 | 28 |
2019-03-12 | 10 | 26 | 35 |
2019-03-13 | 15 | 35 | 43 |
2019-03-14 | 21 | 45 | 45 |
*****************************************************************
df.cumprod()
*****************************************************************
| C1 | C2 | C3 |
2019-03-09 | 1 | 5 | 11 |
2019-03-10 | 2 | 30 | 132 |
2019-03-11 | 6 | 210 | 660 |
2019-03-12 | 24 | 1680 | 4620 |
2019-03-13 | 120 | 15120 | 36960 |
2019-03-14 | 720 | 151200 | 73920 |
*****************************************************************
df.rolling(2).mean()
*****************************************************************
| C1 | C2 | C3 |
2019-03-09 | NaN | NaN | NaN |
2019-03-10 | 1.5 | 5.5 | 11.5 |
2019-03-11 | 2.5 | 6.5 | 8.5 |
2019-03-12 | 3.5 | 7.5 | 6.0 |
2019-03-13 | 4.5 | 8.5 | 7.5 |
2019-03-14 | 5.5 | 9.5 | 5.0 |
*****************************************************************
print(df)
df.cumsum(axis=1)
*****************************************************************
C1 C2 C3
2019-03-09 1 5 11
2019-03-10 2 6 12
2019-03-11 3 7 5
2019-03-12 4 8 7
2019-03-13 5 9 8
2019-03-14 6 10 2
| C1 | C2 | C3 |
2019-03-09 | 1 | 6 | 17 |
2019-03-10 | 2 | 8 | 20 |
2019-03-11 | 3 | 10 | 15 |
2019-03-12 | 4 | 12 | 19 |
2019-03-13 | 5 | 14 | 22 |
2019-03-14 | 6 | 16 | 18 |
**********************************************************************
import pandas as pd
def create_list (start_value,end_value):
listz = []
for v in range(end_value):
if v+1 >= start_value:
listz.append(v+1);
return listz
list1 = create_list(1,10)
list2 = create_list(11,20)
list3 = create_list(21,30)
list1_series = pd.Series(list1,range(1,11,1))
list2_series = pd.Series(list2,range(1,11,1))
list3_series = pd.Series(list3,range(1,11,1))
dictionary = {
'C1':list1_series,
'C2':list2_series,
'C3':list3_series
}
df = pd.DataFrame(dictionary)
print(df)
print("************************")
print(df.loc[2])
print("************************")
print(df.iloc[0])
print("************************")
print(df.loc[2,["C2"]])
print("************************")
print(df.iloc[1,1])
print("************************")
print(df)
print("************************")
print(df.cumsum())
print("************************")
print(df.cumsum(axis = 1))
print("************************")
df_larger_than_3 = df > 3
print(df_larger_than_3)
print("************************")
%matplotlib inline
df.plot()
*******************************************************************
C1 C2 C3
1 1 11 21
2 2 12 22
3 3 13 23
4 4 14 24
5 5 15 25
6 6 16 26
7 7 17 27
8 8 18 28
9 9 19 29
10 10 20 30
************************
C1 2
C2 12
C3 22
Name: 2, dtype: int64
************************
C1 1
C2 11
C3 21
Name: 1, dtype: int64
************************
C2 12
Name: 2, dtype: int64
************************
12
************************
C1 C2 C3
1 1 11 21
2 2 12 22
3 3 13 23
4 4 14 24
5 5 15 25
6 6 16 26
7 7 17 27
8 8 18 28
9 9 19 29
10 10 20 30
************************
C1 C2 C3
1 1 11 21
2 3 23 43
3 6 36 66
4 10 50 90
5 15 65 115
6 21 81 141
7 28 98 168
8 36 116 196
9 45 135 225
10 55 155 255
************************
C1 C2 C3
1 1 12 33
2 2 14 36
3 3 16 39
4 4 18 42
5 5 20 45
6 6 22 48
7 7 24 51
8 8 26 54
9 9 28 57
10 10 30 60
************************
C1 C2 C3
1 False True True
2 False True True
3 False True True
4 True True True
5 True True True
6 True True True
7 True True True
8 True True True
9 True True True
10 True True True
************************