I have a time series DataFrame and i want to find the constant values of rows matching the values in another rows. Let's assume this is the DF:
temp = [27.18, 27.18, 27.18, 27.18, 20.82, 20.82, 20.82, 20.82, 15.18,
15.18, 15.18, 15.18, 15.24, 15.24, 15.24, 15.24, 20.4 , 20.4 ,
20.4 , 20.4 , 21.48, 21.48, 21.48, 21.48, 27.66, 27.66, 27.66,
27.66, 27.9 , 27.9 , 27.9 , 27.9 , 27.9 , 27.9 , 27.9 , 27.9 ,
27.84, 27.84, 27.84, 27.84, 27.84, 27.84, 27.84, 27.84, 21.72,
21.72, 21.72, 21.72]
heat = [11.94, 12. , 10.56, 6. , 6. , 6. , 6. , 6. , 6. ,
6. , 6. , 6. , 6. , 6.78, 9. , 9. , 9. , 9. ,
9. , 9. , 9. , 11.58, 12. , 11.94, 11.94, 12. , 12. ,
11.94, 11.94, 12. , 11.94, 12. , 11.94, 12. , 12. , 11.94,
12. , 11.94, 11.94, 12. , 11.94, 9.48, 9. , 9. , 9. ,
9. , 8.94, 9. ]
date = ['2016-01-29 12:00:00', '2016-01-29 12:15:00',
'2016-01-29 12:30:00', '2016-01-29 12:45:00',
'2016-01-29 13:00:00', '2016-01-29 13:15:00',
'2016-01-29 13:30:00', '2016-01-29 13:45:00',
'2016-01-29 14:00:00', '2016-01-29 14:15:00',
'2016-01-29 14:30:00', '2016-01-29 14:45:00',
'2016-01-29 15:00:00', '2016-01-29 15:15:00',
'2016-01-29 15:30:00', '2016-01-29 15:45:00',
'2016-01-29 16:00:00', '2016-01-29 16:15:00',
'2016-01-29 16:30:00', '2016-01-29 16:45:00',
'2016-01-29 17:00:00', '2016-01-29 17:15:00',
'2016-01-29 17:30:00', '2016-01-29 17:45:00',
'2016-01-29 18:00:00', '2016-01-29 18:15:00',
'2016-01-29 18:30:00', '2016-01-29 18:45:00',
'2016-01-29 19:00:00', '2016-01-29 19:15:00',
'2016-01-29 19:30:00', '2016-01-29 19:45:00',
'2016-01-29 20:00:00', '2016-01-29 20:15:00',
'2016-01-29 20:30:00', '2016-01-29 20:45:00',
'2016-01-29 21:00:00', '2016-01-29 21:15:00',
'2016-01-29 21:30:00', '2016-01-29 21:45:00',
'2016-01-29 22:00:00', '2016-01-29 22:15:00',
'2016-01-29 22:30:00', '2016-01-29 22:45:00',
'2016-01-29 23:00:00', '2016-01-29 23:15:00',
'2016-01-29 23:30:00', '2016-01-29 23:45:00']
df = pd.DataFrame(date, columns=['date'])
df.insert(1 ,'temp', temp, True)
df.insert(2, 'heat', heat, True )
df.index = df.date
del df['date']
The plot looks like this :
I need to find the area marked between two yellow lines where values are almost constant and don't have the ramp area. I have been using shift method here but this is not working very optimal. Any idea how to achieve this thanks in advance. shift method i am trying df.heat != df.heat.shift(1)).cumsum()
desired output:
The second try:
df= pd.DataFrame({"temp":temp,"heat":heat}, index= pd.to_datetime(date) )
thtemp=0.5 # threshold
thheat=0.5
crit= df.temp.diff().abs().lt(thtemp) & df.heat.diff().abs().lt(thheat)
rng=np.arange(1,len(df)+1)
df["const"]= np.where(crit.eq(False),rng,np.nan)
df["const"]= df.const.ffill()
temp heat const
2016-01-29 12:00:00 27.18 11.94 1.0
2016-01-29 12:15:00 27.18 12.00 1.0
2016-01-29 12:30:00 27.18 10.56 3.0
2016-01-29 12:45:00 27.18 6.00 4.0
2016-01-29 13:00:00 20.82 6.00 5.0
2016-01-29 13:15:00 20.82 6.00 5.0
2016-01-29 13:30:00 20.82 6.00 5.0
2016-01-29 13:45:00 20.82 6.00 5.0
2016-01-29 14:00:00 15.18 6.00 9.0
2016-01-29 14:15:00 15.18 6.00 9.0
2016-01-29 14:30:00 15.18 6.00 9.0
2016-01-29 14:45:00 15.18 6.00 9.0
2016-01-29 15:00:00 15.24 6.00 9.0
...
G= df.groupby(df.const)
for key,grp in G:
if len(grp)>1:
print(f"\t{grp.index[0]}\n\t{grp.index[-1]}\n")
2016-01-29 12:00:00
2016-01-29 12:15:00
2016-01-29 13:00:00
2016-01-29 13:45:00
2016-01-29 14:00:00
2016-01-29 15:00:00
2016-01-29 15:30:00
2016-01-29 15:45:00
2016-01-29 16:00:00
2016-01-29 16:45:00
2016-01-29 17:15:00
2016-01-29 17:45:00
2016-01-29 18:00:00
2016-01-29 22:00:00
2016-01-29 22:15:00
2016-01-29 22:45:00
2016-01-29 23:00:00
2016-01-29 23:45:00
Plotting:
vrep=13
#vrep= (df.temp.mean()+df.heat.mean())/2
for key,grp in G:
if len(grp)>1:
ser= grp.const.replace(key,vrep).reindex(df.index)
plt.plot(ser.index,ser,color="orange", linewidth=2)
plt.plot(df.index,df.temp,color="darkgreen",label="temp")
plt.plot(df.index,df.heat,color="darkblue",label="heat")
plt.legend(loc="best")
plt.grid()
plt.show()
EDIT: This was the first solution but didn't provide all the constant segments:
thtemp=0.5 # threshold
thheat=0.5
crit= df.temp.diff().abs().lt(thtemp) & df.heat.diff().abs().lt(thheat)
df["const"]= crit.astype(int).replace(0,np.nan)
# List the times:
rng=np.arange(len(df)) # serves as key for groupby
G= df.groupby( df.const.replace({1:np.nan,np.nan:rng}).ffill().where(crit,np.nan) )
for key,grp in G.groups.items():
t1,t2=grp[0],grp[-1]
if t1!=t2:
print(f"{t1}\n{t2}\n")
2016-01-29 13:15:00
2016-01-29 13:45:00
2016-01-29 14:15:00
2016-01-29 15:00:00
2016-01-29 16:15:00
2016-01-29 16:45:00
2016-01-29 17:30:00
2016-01-29 17:45:00
2016-01-29 18:15:00
2016-01-29 22:00:00
2016-01-29 22:30:00
2016-01-29 22:45:00
2016-01-29 23:15:00
2016-01-29 23:45:00
这个情节掩码是你要找的那个吗:
df[df.temp.duplicated() & df.heat.duplicated()].plot()
Expanding current accepted answer, create your dataframe
import pandas as pd
temp = [27.18, 27.18, 27.18, 27.18, 20.82, 20.82, 20.82, 20.82, 15.18,
15.18, 15.18, 15.18, 15.24, 15.24, 15.24, 15.24, 20.4 , 20.4 ,
20.4 , 20.4 , 21.48, 21.48, 21.48, 21.48, 27.66, 27.66, 27.66,
27.66, 27.9 , 27.9 , 27.9 , 27.9 , 27.9 , 27.9 , 27.9 , 27.9 ,
27.84, 27.84, 27.84, 27.84, 27.84, 27.84, 27.84, 27.84, 21.72,
21.72, 21.72, 21.72]
heat = [11.94, 12. , 10.56, 6. , 6. , 6. , 6. , 6. , 6. ,
6. , 6. , 6. , 6. , 6.78, 9. , 9. , 9. , 9. ,
9. , 9. , 9. , 11.58, 12. , 11.94, 11.94, 12. , 12. ,
11.94, 11.94, 12. , 11.94, 12. , 11.94, 12. , 12. , 11.94,
12. , 11.94, 11.94, 12. , 11.94, 9.48, 9. , 9. , 9. ,
9. , 8.94, 9. ]
date = ['2016-01-29 12:00:00', '2016-01-29 12:15:00',
'2016-01-29 12:30:00', '2016-01-29 12:45:00',
'2016-01-29 13:00:00', '2016-01-29 13:15:00',
'2016-01-29 13:30:00', '2016-01-29 13:45:00',
'2016-01-29 14:00:00', '2016-01-29 14:15:00',
'2016-01-29 14:30:00', '2016-01-29 14:45:00',
'2016-01-29 15:00:00', '2016-01-29 15:15:00',
'2016-01-29 15:30:00', '2016-01-29 15:45:00',
'2016-01-29 16:00:00', '2016-01-29 16:15:00',
'2016-01-29 16:30:00', '2016-01-29 16:45:00',
'2016-01-29 17:00:00', '2016-01-29 17:15:00',
'2016-01-29 17:30:00', '2016-01-29 17:45:00',
'2016-01-29 18:00:00', '2016-01-29 18:15:00',
'2016-01-29 18:30:00', '2016-01-29 18:45:00',
'2016-01-29 19:00:00', '2016-01-29 19:15:00',
'2016-01-29 19:30:00', '2016-01-29 19:45:00',
'2016-01-29 20:00:00', '2016-01-29 20:15:00',
'2016-01-29 20:30:00', '2016-01-29 20:45:00',
'2016-01-29 21:00:00', '2016-01-29 21:15:00',
'2016-01-29 21:30:00', '2016-01-29 21:45:00',
'2016-01-29 22:00:00', '2016-01-29 22:15:00',
'2016-01-29 22:30:00', '2016-01-29 22:45:00',
'2016-01-29 23:00:00', '2016-01-29 23:15:00',
'2016-01-29 23:30:00', '2016-01-29 23:45:00']
df = pd.DataFrame({'date': date, 'temp': temp, 'heat': heat})
df.index = pd.to_datetime(df['date'],infer_datetime_format=True)
del df['date']
create a boolean variable that is True when values are constant
thtemp=0.5 # threshold
thheat=0.5
df["const"] = df.temp.diff().abs().lt(thtemp) & df.heat.diff().abs().lt(thheat)
df.head()
temp heat const
date
2016-01-29 12:00:00 27.18 11.94 False
2016-01-29 12:15:00 27.18 12.00 True
2016-01-29 12:30:00 27.18 10.56 False
2016-01-29 12:45:00 27.18 6.00 False
2016-01-29 13:00:00 20.82 6.00 False
plot and fill the area when const == True
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
ax.plot(df.index, df['temp'])
ax.plot(df.index, df['heat'])
ax.fill_between(df.index, 0, 1, where=df['const'], alpha=0.1, transform=ax.get_xaxis_transform())
plt.gcf().autofmt_xdate()
plt.show()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.