I'm trying to merge the two data frames below on "day", but the time delta object is preventing this. My understanding is that I should be able to then do something like print(df['day'].days)
and get the actual day without the "days."
df1
import pandas as pd
from itertools import product
missing = pd.DataFrame(product(range(1,31), range(1,5)), columns=['date','time_of_day'])
missing = pd.concat([missing.assign(pid=_id) for _id in vec], ignore_index=True)
#pd.DataFrame(product([vec, range(1,31), range(1,5)], names=['pid','day','time_of_day']))
missing.pid = missing.pid.astype(str)
df 2
from timedelta import date
def random_dates(start, end, n=12):
start_u = start.value//10**9
end_u = end.value//10**9
return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')
start = pd.to_datetime('2015-01-01')
end = pd.to_datetime('2018-01-01')
random_dates(start, end)
df = pd.DataFrame({'datestamp':random_dates(start, end)})
df['date'] = pd.to_datetime(df['datestamp']).dt.date
df['time'] = pd.to_datetime(df['datestamp']).dt.time
df = pd.concat([df.assign(pid=_id) for _id in vec], ignore_index=True)
df['last'] = df\
.sort_values('date')\
.groupby('pid')['date']\
.transform('last')
df['days'] = df['last'] - df['date']
df
df['days'].days #doesn't work
merging
missing.merge(df, how = 'left', on = ['pid', 'day'])
so this would convert to days as a non-timedelta object:
def get_custom_str_time(x): return math.floor(x.total_seconds()/(3600*24))
df.insert( 0, 'day', df['days'].apply(get_custom_str_time) )
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.