KeyError When I want try to plot

Question

I get an error whenever I try to perform actions on U objects. I always get a KeyError. I have tried many times to plot U (last statement.)

import pandas as pd
users =pd.read_csv('./users.csv',sep=',',parse_dates=[30])
users.head()
users.columns.get_loc("WhenCreated")
users['Month'] = users['WhenCreated'].dt.month
users.head()
UsersCreatedin2017=users['WhenCreated'] > '2017-01-01'
users2017=users[UsersCreatedin2017].copy()
users2017[['Month','UserPrincipalName']].groupby('Month').count()
u=users2017[['Month','UserPrincipalName']].groupby('Month').count().copy()
u.plot(kind='line', x='Month', y='UserPrincipalName')

where I go the following error

KeyError                                  Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2392             try:
-> 2393                 return self._engine.get_loc(key)
   2394             except KeyError:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()

KeyError: 'Month'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-7-841cfe522e84> in <module>()
     13 #u.sort_values(by='Month', ascending=True)
     14 #u.plot.line('Month','UserPrincipalName')
---> 15 u.plot(kind='line', x='Month', y='UserPrincipalName')
     16 
     17 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2618                           fontsize=fontsize, colormap=colormap, table=table,
   2619                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2620                           sort_columns=sort_columns, **kwds)
   2621     __call__.__doc__ = plot_frame.__doc__
   2622 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\plotting\_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   1855                  yerr=yerr, xerr=xerr,
   1856                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 1857                  **kwds)
   1858 
   1859 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   1660                 if is_integer(x) and not data.columns.holds_integer():
   1661                     x = data.columns[x]
-> 1662                 data = data.set_index(x)
   1663 
   1664             if y is not None:

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
   2926                 names.append(None)
   2927             else:
-> 2928                 level = frame[col]._values
   2929                 names.append(col)
   2930                 if drop:

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2060             return self._getitem_multilevel(key)
   2061         else:
-> 2062             return self._getitem_column(key)
   2063 
   2064     def _getitem_column(self, key):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
   2067         # get column
   2068         if self.columns.is_unique:
-> 2069             return self._get_item_cache(key)
   2070 
   2071         # duplicate columns & possible reduce dimensionality

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
   1532         res = cache.get(item)
   1533         if res is None:
-> 1534             values = self._data.get(item)
   1535             res = self._box_item_values(item, values)
   1536             cache[item] = res

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
   3588 
   3589             if not isnull(item):
-> 3590                 loc = self.items.get_loc(item)
   3591             else:
   3592                 indexer = np.arange(len(self.items))[isnull(self.items)]

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2393                 return self._engine.get_loc(key)
   2394             except KeyError:
-> 2395                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2396 
   2397         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()

KeyError: 'Month'

I try to u.plot() but i got the following output

where data I am trying to plot is : UserPrincipalName Month
1 61 2 53 3 44 4 31 5 34 6 21 7 28 8 196 9 42

Answer 1

You want to use as_index=False in the groupby (or reset_index after):

In [11]: df = pd.DataFrame([[1, 2], [1, 3], [2, 4]], columns=["A", "B"])

In [12]: df
Out[12]:
   A  B
0  1  2
1  1  3
2  2  4

In [13]: df.groupby("A").count()
Out[13]:
   B
A
1  2
2  1

In [14]: df.groupby("A", as_index=False).count()
Out[14]:
   A  B
0  1  2
1  2  1

As you can see without the as_index=False the A is in the index rather than a column, that's why the plot method can't find it (raises a KeyError).

KeyError When I want try to plot

Question

1 answers

solution1
0 ACCPTED 2017-10-19 19:53:51

KeyError When I want try to plot

Question

1 answers

solution1 0 ACCPTED 2017-10-19 19:53:51

solution1
0 ACCPTED 2017-10-19 19:53:51