I get an error whenever I try to perform actions on U objects. I always get a KeyError. I have tried many times to plot U (last statement.)
import pandas as pd
users =pd.read_csv('./users.csv',sep=',',parse_dates=[30])
users.head()
users.columns.get_loc("WhenCreated")
users['Month'] = users['WhenCreated'].dt.month
users.head()
UsersCreatedin2017=users['WhenCreated'] > '2017-01-01'
users2017=users[UsersCreatedin2017].copy()
users2017[['Month','UserPrincipalName']].groupby('Month').count()
u=users2017[['Month','UserPrincipalName']].groupby('Month').count().copy()
u.plot(kind='line', x='Month', y='UserPrincipalName')
where I go the following error
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2392 try:
-> 2393 return self._engine.get_loc(key)
2394 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()
KeyError: 'Month'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-7-841cfe522e84> in <module>()
13 #u.sort_values(by='Month', ascending=True)
14 #u.plot.line('Month','UserPrincipalName')
---> 15 u.plot(kind='line', x='Month', y='UserPrincipalName')
16
17
C:\ProgramData\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2618 fontsize=fontsize, colormap=colormap, table=table,
2619 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2620 sort_columns=sort_columns, **kwds)
2621 __call__.__doc__ = plot_frame.__doc__
2622
C:\ProgramData\Anaconda3\lib\site-packages\pandas\plotting\_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
1855 yerr=yerr, xerr=xerr,
1856 secondary_y=secondary_y, sort_columns=sort_columns,
-> 1857 **kwds)
1858
1859
C:\ProgramData\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
1660 if is_integer(x) and not data.columns.holds_integer():
1661 x = data.columns[x]
-> 1662 data = data.set_index(x)
1663
1664 if y is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
2926 names.append(None)
2927 else:
-> 2928 level = frame[col]._values
2929 names.append(col)
2930 if drop:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2060 return self._getitem_multilevel(key)
2061 else:
-> 2062 return self._getitem_column(key)
2063
2064 def _getitem_column(self, key):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2067 # get column
2068 if self.columns.is_unique:
-> 2069 return self._get_item_cache(key)
2070
2071 # duplicate columns & possible reduce dimensionality
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1532 res = cache.get(item)
1533 if res is None:
-> 1534 values = self._data.get(item)
1535 res = self._box_item_values(item, values)
1536 cache[item] = res
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2393 return self._engine.get_loc(key)
2394 except KeyError:
-> 2395 return self._engine.get_loc(self._maybe_cast_indexer(key))
2396
2397 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()
KeyError: 'Month'
I try to u.plot() but i got the following output
where data I am trying to plot is : UserPrincipalName Month
1 61 2 53 3 44 4 31 5 34 6 21 7 28 8 196 9 42
You want to use as_index=False in the groupby (or reset_index after):
In [11]: df = pd.DataFrame([[1, 2], [1, 3], [2, 4]], columns=["A", "B"])
In [12]: df
Out[12]:
A B
0 1 2
1 1 3
2 2 4
In [13]: df.groupby("A").count()
Out[13]:
B
A
1 2
2 1
In [14]: df.groupby("A", as_index=False).count()
Out[14]:
A B
0 1 2
1 2 1
As you can see without the as_index=False the A is in the index rather than a column, that's why the plot method can't find it (raises a KeyError).
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.