I've seen several posts with the same error, but I'm sorry, I still can't properly solve my issue.
Here is my code from a.py file:
def get_sum(self, data, list_of_items):
# I coded this return line, which worked according to one of the cells of the .ipynb file
# Returns an integer summing up all of the 1s in a given column (or set of columns)
return data.iloc[:, list_of_items].all(axis = 'columns').sum()
def get_list(self, data):
basket = [[i] for i in data.columns]
product_list = []
for item in basket:
# I coded these two lines, which I am unable to test due to the error
if self.get_sum(data, [item]) >= 5:
product_list.append(item)
return product_list
Which will be accessed by this cell in an.ipynb:
basket_list = groceries.get_list(df)
print(basket_list)
# expected to be something like [0, 2]. In the case of the given sample, [1]
The error shows up in the first line of the.ipynb cell, but that cannot be as I only changed the.py file (both files were supplied). That being said, I don't know where the error really is because it doesn't point towards anything I've written. If someone could at least drop a hint to what I did wrong, that would be greatly appreciated.
Edit: Here is a sample of the data:
0 1 2
0 0 0 1
1 1 1 1
2 1 1 0
3 0 1 0
4 1 1 1
5 1 1 0
And here is the full error traceback,
ValueError Traceback (most recent call last)
<ipython-input-142-6c2c13d12cb0> in <module>()
----> 1 frequent_itemsets = rule_miner.get_frequent_itemsets(syn_df)
2 print(frequent_itemsets)
/content/rule_miner.py in get_frequent_itemsets(self, data)
121 # this class.
122
--> 123 if self.get_support(data, [itemset]) >= self.support_t:
124 new_itemsets.append(itemset)
125
/content/rule_miner.py in get_support(self, data, itemset)
28 # function.
29
---> 30 return data.iloc[:, itemset].all(axis = 'columns').sum()
31
32 def merge_itemsets(self, itemsets):
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
871 # AttributeError for IntervalTree get_value
872 pass
--> 873 return self._getitem_tuple(key)
874 else:
875 # we by definition only have the 0th axis
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
1447 pass
1448
-> 1449 return self._getitem_tuple_same_dim(tup)
1450
1451 def _get_list_axis(self, key, axis: int):
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_tuple_same_dim(self, tup)
748 continue
749
--> 750 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
751 # We should never have retval.ndim < self.ndim, as that should
752 # be handled by the _getitem_lowerdim call above.
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1485 # a list of integers
1486 elif is_list_like_indexer(key):
-> 1487 return self._get_list_axis(key, axis=axis)
1488
1489 # a single integer
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _get_list_axis(self, key, axis)
1467 """
1468 try:
-> 1469 return self.obj._take_with_is_copy(key, axis=axis)
1470 except IndexError as err:
1471 # re-raise with different error message
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in _take_with_is_copy(self, indices, axis)
3361 See the docstring of `take` for full explanation of the parameters.
3362 """
-> 3363 result = self.take(indices=indices, axis=axis)
3364 # Maybe set copy if we didn't actually change the index.
3365 if not result._get_axis(axis).equals(self._get_axis(axis)):
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in take(self, indices, axis, is_copy, **kwargs)
3349
3350 new_data = self._mgr.take(
-> 3351 indices, axis=self._get_block_manager_axis(axis), verify=True
3352 )
3353 return self._constructor(new_data).__finalize__(self, method="take")
/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py in take(self, indexer, axis, verify, convert)
1455 new_labels = self.axes[axis].take(indexer)
1456 return self.reindex_indexer(
-> 1457 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
1458 )
1459
/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, consolidate)
1289
1290 if axis == 0:
-> 1291 new_blocks = self._slice_take_blocks_ax0(indexer, fill_value=fill_value)
1292 else:
1293 new_blocks = [
/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py in _slice_take_blocks_ax0(self, slice_or_indexer, fill_value, only_slice)
1369 else:
1370 blknos = algos.take_1d(
-> 1371 self.blknos, slobj, fill_value=-1, allow_fill=allow_fill
1372 )
1373 blklocs = algos.take_1d(
/usr/local/lib/python3.7/dist-packages/pandas/core/algorithms.py in take_nd(arr, indexer, axis, out, fill_value, allow_fill)
1735 arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
1736 )
-> 1737 func(arr, indexer, out, fill_value)
1738
1739 if flip_order:
pandas/_libs/algos_take_helper.pxi in pandas._libs.algos.take_1d_int64_int64()
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
No need for a complex custom function, you can use pandas vector functions for that:
get the name of columns with at least five 1:
>>> df.columns[df.eq(1).sum().ge(5)]
Index(['1'], dtype='object')
filter those columns:
df.loc[:, df.eq(1).sum().ge(5)]
output:
1
0 0
1 1
2 1
3 1
4 1
5 1
How it works:
(df.eq(1) # values equal to 1 -> True
.sum() # count number of True
.ge(5) # True if sum ≥ 5
)
loc
used in place of iloc
and wrong list passed to get_sum
def get_sum(data, list_of_items):
# I coded this return line, which worked according to one of the cells of the .ipynb file
return data.loc[:, list_of_items].all(axis = 'columns').sum()
def get_list(data):
product_list = []
for item in df.columns:
# I coded these two lines, which I am unable to test due to the error
if get_sum(data, [item]) >= 5:
product_list.append(item)
return product_list
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.