[英]How do I subtract pandas columns that correspond to different time intervals in python?
如何從python中的同一csv中減去不同的時間間隔?
例如,如果我想從09:30:00高中減去09:15:00高。
我嘗試了幾種不同的方法,但一直都在努力。
這就是我嘗試過的。
exm = pd.read_csv('exm')
a915 = exm.HIGH.at_time("09:15:00")
a930 = exm.HIGH.at_time("09:30:00")
exm.sub13 = a915 - a930
也,
sub13 = a915 - a930
也,
a915 = exm.at_time("09:15:00")
a930 = exm.at_time("09:30:00")
exm.sub13 = a915 - a930
也,
sub13 = a915 - a930
甚至無法獲得一個獨立的專欄
感謝你的幫助!!!!
日期,時間,打開,高,低,關閉,音量02/03 / 1997,09:04:00,3046.00,3048.50,3046.00,3047.50,505
02/03 / 1997,09:05:00,3047.00,3048.00,3046.00,3047.00,162
02/03 / 1997,09:06:00,3047.50,3048.00,3047.00,3047.50,98
02/03 / 1997,09:07:00,3047.50,3047.50,3047.00,3047.50,228
02/03 / 1997,09:08:00,3048.00,3048.00,3047.50,3048.00,136
02/03 / 1997,09:09:00,3048.00,3048.00,3046.50,3046.50,174
02/03 / 1997,09:10:00,3046.50,3046.50,3045.00,3045.00,134
02/03 / 1997,09:11:00,3045.50,3046.00,3044.00,3045.00,43
02/03 / 1997,09:12:00,3045.00,3045.50,3045.00,3045.00,214
02/03 / 1997,09:13:00,3045.50,3045.50,3045.50,3045.50,8
02/03 / 1997,09:14:00,3045.50,3046.00,3044.50,3044.50,152
02/03 / 1997,09:15:00,3044.00,3044.00,3042.50,3042.50,126
02/03 / 1997,09:16:00,3043.50,3043.50,3043.00,3043.00,128
02/03 / 1997,09:17:00,3042.50,3043.50,3042.50,3043.50,23
02/03 / 1997,09:18:00,3043.50,3044.50,3043.00,3044.00,51
02/03 / 1997,09:19:00,3044.50,3044.50,3043.00,3043.00,18
02/03 / 1997,09:20:00,3043.00,3045.00,3043.00,3045.00,23
02/03 / 1997,09:21:00,3045.00,3045.00,3044.50,3045.00,51
02/03 / 1997,09:22:00,3045.00,3045.00,3045.00,3045.00,47
02/03 / 1997,09:23:00,3045.50,3046.00,3045.00,3045.00,77
02/03 / 1997,09:24:00,3045.00,3045.00,3045.00,3045.00,131
02/03 / 1997,09:25:00,3044.50,3044.50,3043.50,3043.50,138
02/03 / 1997,09:26:00,3043.50,3043.50,3043.50,3043.50,6
02/03 / 1997,09:27:00,3043.50,3043.50,3043.00,3043.00,56
02/03 / 1997,09:28:00,3043.00,3044.00,3043.00,3044.00,32
02/03 / 1997,09:29:00,3044.50,3044.50,3044.50,3044.50,63
02/03 / 1997,09:30:00,3045.00,3045.00,3045.00,3045.00,28
02/03 / 1997,09:31:00,3045.00,3045.50,3045.00,3045.50,75
02/03 / 1997,09:32:00,3045.50,3045.50,3044.00,3044.00,54
02/03 / 1997,09:33:00,3043.50,3044.50,3043.50,3044.00,96
02/03 / 1997,09:34:00,3044.00,3044.50,3044.00,3044.50,27
02/03 / 1997,09:35:00,3044.50,3044.50,3043.50,3044.50,44
02/03 / 1997,09:36:00,3044.00,3044.00,3043.00,3043.00,61
02/03 / 1997,09:37:00,3043.50,3043.50,3043.50,3043.50,18
02/03 / 1997,09:38:00,3043.50,3045.00,3043.50,3045.00,156
您可以在datetime
時間中使用strptime
來為您的時間生成日期時間對象,然后減去它們以獲得差值。 例如:
>>> import datetime
>>> t1=datetime.datetime.strptime('01/01/2016 20:00:00', "%d/%m/%Y %H:%M:%S")
>>> t2=datetime.datetime.strptime('01/01/2016 21:00:00', "%d/%m/%Y %H:%M:%S")
>>> t2-t1
datetime.timedelta(0, 3600)
>>> (t2-t1).seconds
3600
我認為您可以先通過參數parse_dates
將DATE
和TIME
列轉換為datetime
,並在read_csv
從這個新的DATE_TIME
列設置索引:
import pandas as pd
import io
temp=u"""DATE,TIME,OPEN,HIGH,LOW,CLOSE,VOLUME
02/03/1997,09:04:00,3046.00,3048.50,3046.00,3047.50,505
02/03/1997,09:05:00,3047.00,3048.00,3046.00,3047.00,162
02/03/1997,09:06:00,3047.50,3048.00,3047.00,3047.50,98
02/03/1997,09:07:00,3047.50,3047.50,3047.00,3047.50,228
02/03/1997,09:08:00,3048.00,3048.00,3047.50,3048.00,136
02/03/1997,09:09:00,3048.00,3048.00,3046.50,3046.50,174
02/03/1997,09:10:00,3046.50,3046.50,3045.00,3045.00,134
02/03/1997,09:11:00,3045.50,3046.00,3044.00,3045.00,43
02/03/1997,09:12:00,3045.00,3045.50,3045.00,3045.00,214
02/03/1997,09:13:00,3045.50,3045.50,3045.50,3045.50,8
02/03/1997,09:14:00,3045.50,3046.00,3044.50,3044.50,152
02/03/1997,09:15:00,3044.00,3044.00,3042.50,3042.50,126
02/03/1997,09:16:00,3043.50,3043.50,3043.00,3043.00,128
02/03/1997,09:17:00,3042.50,3043.50,3042.50,3043.50,23
02/03/1997,09:18:00,3043.50,3044.50,3043.00,3044.00,51
02/03/1997,09:19:00,3044.50,3044.50,3043.00,3043.00,18
02/03/1997,09:20:00,3043.00,3045.00,3043.00,3045.00,23
02/03/1997,09:21:00,3045.00,3045.00,3044.50,3045.00,51
02/03/1997,09:22:00,3045.00,3045.00,3045.00,3045.00,47
02/03/1997,09:23:00,3045.50,3046.00,3045.00,3045.00,77
02/03/1997,09:24:00,3045.00,3045.00,3045.00,3045.00,131
02/03/1997,09:25:00,3044.50,3044.50,3043.50,3043.50,138
02/03/1997,09:26:00,3043.50,3043.50,3043.50,3043.50,6
02/03/1997,09:27:00,3043.50,3043.50,3043.00,3043.00,56
02/03/1997,09:28:00,3043.00,3044.00,3043.00,3044.00,32
02/03/1997,09:29:00,3044.50,3044.50,3044.50,3044.50,63
02/03/1997,09:30:00,3045.00,3045.00,3045.00,3045.00,28
02/03/1997,09:31:00,3045.00,3045.50,3045.00,3045.50,75"""
#after testing replace io.StringIO(temp) to filename
exm = pd.read_csv(io.StringIO(temp), parse_dates = [['DATE', 'TIME']], index_col=0)
print exm
OPEN HIGH LOW CLOSE VOLUME
DATE_TIME
1997-02-03 09:04:00 3046.0 3048.5 3046.0 3047.5 505
1997-02-03 09:05:00 3047.0 3048.0 3046.0 3047.0 162
1997-02-03 09:06:00 3047.5 3048.0 3047.0 3047.5 98
1997-02-03 09:07:00 3047.5 3047.5 3047.0 3047.5 228
1997-02-03 09:08:00 3048.0 3048.0 3047.5 3048.0 136
1997-02-03 09:09:00 3048.0 3048.0 3046.5 3046.5 174
1997-02-03 09:10:00 3046.5 3046.5 3045.0 3045.0 134
1997-02-03 09:11:00 3045.5 3046.0 3044.0 3045.0 43
1997-02-03 09:12:00 3045.0 3045.5 3045.0 3045.0 214
1997-02-03 09:13:00 3045.5 3045.5 3045.5 3045.5 8
1997-02-03 09:14:00 3045.5 3046.0 3044.5 3044.5 152
1997-02-03 09:15:00 3044.0 3044.0 3042.5 3042.5 126
1997-02-03 09:16:00 3043.5 3043.5 3043.0 3043.0 128
1997-02-03 09:17:00 3042.5 3043.5 3042.5 3043.5 23
1997-02-03 09:18:00 3043.5 3044.5 3043.0 3044.0 51
1997-02-03 09:19:00 3044.5 3044.5 3043.0 3043.0 18
1997-02-03 09:20:00 3043.0 3045.0 3043.0 3045.0 23
1997-02-03 09:21:00 3045.0 3045.0 3044.5 3045.0 51
1997-02-03 09:22:00 3045.0 3045.0 3045.0 3045.0 47
1997-02-03 09:23:00 3045.5 3046.0 3045.0 3045.0 77
1997-02-03 09:24:00 3045.0 3045.0 3045.0 3045.0 131
1997-02-03 09:25:00 3044.5 3044.5 3043.5 3043.5 138
1997-02-03 09:26:00 3043.5 3043.5 3043.5 3043.5 6
1997-02-03 09:27:00 3043.5 3043.5 3043.0 3043.0 56
1997-02-03 09:28:00 3043.0 3044.0 3043.0 3044.0 32
1997-02-03 09:29:00 3044.5 3044.5 3044.5 3044.5 63
1997-02-03 09:30:00 3045.0 3045.0 3045.0 3045.0 28
1997-02-03 09:31:00 3045.0 3045.5 3045.0 3045.5 75
a915 = exm.HIGH.at_time("09:15:00")
a930 = exm.HIGH.at_time("09:30:00")
print a915
DATE_TIME
1997-02-03 09:15:00 3044.0
print a930
DATE_TIME
1997-02-03 09:30:00 3045.0
Name: HIGH, dtype: float64
如果需要減數Series
(列),則需要相同的indexes
,因為會得到NAN
:
print a915 - a930
DATE_TIME
1997-02-03 09:15:00 NaN
1997-02-03 09:30:00 NaN
Name: HIGH, dtype: float64
如果您只需要減去HIGH
列中的值,則可以通過values
將Series
(列)轉換為numpy arrays
:
print a915.values - a930.values
[-1.]
但是,如果你需要添加新列sub13
,你需要改變index
的Series
a930
由a915
。 然后可以減去值,輸出在索引為a915
1997-02-03 09:15:00
。 其他值缺失NaN
:
print a915
DATE_TIME
1997-02-03 09:15:00 3044.0
Name: HIGH, dtype: float64
print pd.Series(a930.values, index=a915.index)
DATE_TIME
1997-02-03 09:15:00 3045.0
dtype: float64
exm['sub13'] = a915 - pd.Series(a930.values, index=a915.index)
print exm
OPEN HIGH LOW CLOSE VOLUME sub13
DATE_TIME
1997-02-03 09:04:00 3046.0 3048.5 3046.0 3047.5 505 NaN
1997-02-03 09:05:00 3047.0 3048.0 3046.0 3047.0 162 NaN
1997-02-03 09:06:00 3047.5 3048.0 3047.0 3047.5 98 NaN
1997-02-03 09:07:00 3047.5 3047.5 3047.0 3047.5 228 NaN
1997-02-03 09:08:00 3048.0 3048.0 3047.5 3048.0 136 NaN
1997-02-03 09:09:00 3048.0 3048.0 3046.5 3046.5 174 NaN
1997-02-03 09:10:00 3046.5 3046.5 3045.0 3045.0 134 NaN
1997-02-03 09:11:00 3045.5 3046.0 3044.0 3045.0 43 NaN
1997-02-03 09:12:00 3045.0 3045.5 3045.0 3045.0 214 NaN
1997-02-03 09:13:00 3045.5 3045.5 3045.5 3045.5 8 NaN
1997-02-03 09:14:00 3045.5 3046.0 3044.5 3044.5 152 NaN
1997-02-03 09:15:00 3044.0 3044.0 3042.5 3042.5 126 -1.0
1997-02-03 09:16:00 3043.5 3043.5 3043.0 3043.0 128 NaN
1997-02-03 09:17:00 3042.5 3043.5 3042.5 3043.5 23 NaN
1997-02-03 09:18:00 3043.5 3044.5 3043.0 3044.0 51 NaN
1997-02-03 09:19:00 3044.5 3044.5 3043.0 3043.0 18 NaN
1997-02-03 09:20:00 3043.0 3045.0 3043.0 3045.0 23 NaN
1997-02-03 09:21:00 3045.0 3045.0 3044.5 3045.0 51 NaN
1997-02-03 09:22:00 3045.0 3045.0 3045.0 3045.0 47 NaN
1997-02-03 09:23:00 3045.5 3046.0 3045.0 3045.0 77 NaN
1997-02-03 09:24:00 3045.0 3045.0 3045.0 3045.0 131 NaN
1997-02-03 09:25:00 3044.5 3044.5 3043.5 3043.5 138 NaN
1997-02-03 09:26:00 3043.5 3043.5 3043.5 3043.5 6 NaN
1997-02-03 09:27:00 3043.5 3043.5 3043.0 3043.0 56 NaN
1997-02-03 09:28:00 3043.0 3044.0 3043.0 3044.0 32 NaN
1997-02-03 09:29:00 3044.5 3044.5 3044.5 3044.5 63 NaN
1997-02-03 09:30:00 3045.0 3045.0 3045.0 3045.0 28 NaN
1997-02-03 09:31:00 3045.0 3045.5 3045.0 3045.5 75 NaN
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.