I have a dataframe like this *EDITED
StartDate EndDate Company Location
2019-01-15 2019-01-31 1.0 121.0
2019-02-01 2020-03-10 1.0 136.0
2006-10-02 2020-03-10 2.0 136.0
2003-07-31 2020-03-10 2.0 321.0
2010-11-03 2020-03-10 3.0 322.0
2013-02-01 2017-02-07 4.0 375.0
2017-02-08 2019-01-14 4.0 375.0
2019-01-15 2019-04-29 4.0 375.0
2019-04-30 2020-03-10 4.0 375.0
As mentioned in this link: Pandas: decompress date range to individual dates I wanted it to decompressed to only 1 field which is the date. I followed the step by step in the solution. However, when I try to do group by with resample I get this error: ValueError: cannot reindex a non-unique index with a method or limit
What is the reason this occur?
To be more clear, this is my code (the index of original dataframe is just normal index 1, 2, 3, ...
df=read_parquet('company_location.parquet')
df=df[['COMPANY','STARTDATE','ENDDATE','LOCATION']]
df['STARTDATE']=pd.to_datetime(df['STARTDATE'])
df['ENDDATE']=pd.to_datetime(df['ENDDATE'])
df=df.dropna(axis=0,how='any')
df['rows']=range(len(df))
starts=df[['COMPANY','STARTDATE','LOCATION','rows']].rename(columns={'STARTDATE':'DATE'})
ends=df[['COMPANY','ENDDATE','LOCATION','rows']].rename(columns={'ENDDATE':'DATE'})
df_decomp=pd.concat([starts,ends])
df_decomp=df_decomp.set_index('rows', append=True)
df_decomp.sort_index()
Everything is fine until here.
then when I wrote this line, there is an error:
df_decomp=df_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
The error is: (this is Jupyter Notebook)
ValueError Traceback (most recent call last)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
688 try:
--> 689 result = self._python_apply_general(f)
690 except Exception:
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
706 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707 self.axis)
708
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
189 group_axes = _get_axes(group)
--> 190 res = f(group)
191 if not _is_indexed_like(res, group_axes):
<ipython-input-29-e5d0ce53cd1c> in <lambda>(x)
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in fillna(self, method, limit)
759 """
--> 760 return self._upsample(method, limit=limit)
761
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1072 result = obj.reindex(res_index, method=method,
-> 1073 limit=limit, fill_value=fill_value)
1074
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
198
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3810
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3740 frame = frame._reindex_index(index, method, copy, level,
-> 3741 fill_value, limit, tolerance)
3742
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3748 level=level, limit=limit,
-> 3749 tolerance=tolerance)
3750 return self._reindex_with_indexers({0: [new_index, indexer]},
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3137 if method is not None or limit is not None:
-> 3138 raise ValueError("cannot reindex a non-unique index "
3139 "with a method or limit")
ValueError: cannot reindex a non-unique index with a method or limit
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-29-e5d0ce53cd1c> in <module>()
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
699
700 with _group_selection_context(self):
--> 701 return self._python_apply_general(f)
702
703 return result
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
705 def _python_apply_general(self, f):
706 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707 self.axis)
708
709 return self._wrap_applied_output(
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
188 # group might be modified
189 group_axes = _get_axes(group)
--> 190 res = f(group)
191 if not _is_indexed_like(res, group_axes):
192 mutated = True
<ipython-input-29-e5d0ce53cd1c> in <lambda>(x)
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in fillna(self, method, limit)
758 2018-01-01 02:00:00 6.0 5
759 """
--> 760 return self._upsample(method, limit=limit)
761
762 @Appender(_shared_docs['interpolate'] % _shared_docs_kwargs)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1071 else:
1072 result = obj.reindex(res_index, method=method,
-> 1073 limit=limit, fill_value=fill_value)
1074
1075 result = self._apply_loffset(result)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
195 @wraps(func)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
198
199 if not PY2:
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3807 kwargs.pop('axis', None)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3810
3811 @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4354 # perform the reindex on the axes
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
4358 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3739 if index is not None:
3740 frame = frame._reindex_index(index, method, copy, level,
-> 3741 fill_value, limit, tolerance)
3742
3743 return frame
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3747 new_index, indexer = self.index.reindex(new_index, method=method,
3748 level=level, limit=limit,
-> 3749 tolerance=tolerance)
3750 return self._reindex_with_indexers({0: [new_index, indexer]},
3751 copy=copy, fill_value=fill_value,
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3136 else:
3137 if method is not None or limit is not None:
-> 3138 raise ValueError("cannot reindex a non-unique index "
3139 "with a method or limit")
3140 indexer, missing = self.get_indexer_non_unique(target)
ValueError: cannot reindex a non-unique index with a method or limit
I was able to follow Pandas: decompress date range to individual dates and I did not get any errors on your data set. See code below in answer
import pandas as pd
df = pd.DataFrame([['2019-01-15','2019-01-31','A',121.0],
['2019-02-01','2020-03-10','A',136.0],
['2006-10-02','2020-03-10','B',136.0],
['2003-07-31','2020-03-10','B',321.0],
['2010-11-03','2020-03-10','C',322.0],
['2013-02-01','2017-02-07','D',375.0],
['2017-02-08','2019-01-14','D',375.0],
['2019-01-15','2019-04-29','D',375.0],
['2019-04-30','2020-03-10','D',375.0]],
columns=['StartDate','EndDate','Company','Location'])
df['StartDate'] = pd.to_datetime(df['StartDate'])
df['EndDate'] = pd.to_datetime(df['EndDate'])
df.set_index('Company', inplace=True)
df['row'] = range(len(df))
print(df)
starts = df[['StartDate', 'Location', 'row']].rename(columns={'StartDate': 'Date'})
ends = df[['EndDate', 'Location', 'row']].rename(columns={'EndDate':'Date'})
df_decomp = pd.concat([starts, ends])
df_decomp = df_decomp.set_index('row', append=True)
df_decomp.sort_index()
print(df_decomp)
df_decomp = df_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('Date').resample('D').fillna(method='pad'))
df_decomp = df_decomp.reset_index(level=1, drop=True)
print(df_decomp.loc['D'])
StartDate EndDate Company Location
0 1/15/2019 1/31/2019 A 121
1 2/1/2019 3/10/2020 A 136
2 10/2/2006 3/10/2020 B 136
3 7/31/2003 3/10/2020 B 321
4 11/3/2010 3/10/2020 C 322
5 2/7/2017 2/7/2017 D 375
6 2/8/2017 1/14/2019 D 375
7 1/15/2019 4/29/2019 D 375
8 4/30/2019 3/10/2020 D 375
Not sure what's going wrong, but copy the above and run the below:
import pandas as pd
df = pd.read_clipboard()
Then, put two lines of code in addition to the post you linked:
df['StartDate'] = pd.to_datetime(df['StartDate'])
df['EndDate'] = pd.to_datetime(df['EndDate'])
Run the below, and it should work:
df['StartDate'] = pd.to_datetime(df['StartDate'])
df['EndDate'] = pd.to_datetime(df['EndDate'])
df['row'] = range(len(df))
starts = df[['StartDate', 'Location', 'Company', 'row']].rename(columns={'StartDate': 'date'})
ends = df[['EndDate', 'Location', 'Company', 'row']].rename(columns={'EndDate':'date'})
df_decomp = pd.concat([starts, ends]).drop_duplicates()
df_decomp = df_decomp.set_index('row', append=True)
df_decomp.sort_index()
df_decomp = df_decomp.groupby(level=[0,1]).apply(lambda x:
x.set_index('date').resample('D').fillna(method='pad'))
df_decomp = df_decomp.reset_index(level=1, drop=True)
df_decomp
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.