I have two dataframes, one is all_df , another is df_good_sample , when I use one_hot_encoding separately, all is fine. But when I merge these two dataframes, something wrong happens.
My implementation of one_hot_encoding is:
def one_hot_encoding(register_info, fea):
flag = True
fea_g_id = 1
if flag:
X_df = pd.get_dummies(register_info[fea])
fea_group_ids = [fea_g_id for i in range(X_df.shape[1])]
flag = False
fea_g_id = fea_g_id + 1
else:
X_cur = pd.get_dummies(register_info[fea])
fea_group_ids += [fea_g_id for i in range(X_cur.shape[1])]
fea_g_id = fea_g_id + 1
X_df = pd.concat([X_df,X_cur],axis=1)
X = X_df.values
return X, X_df
and when I use it for all_df , I get one_hot_encoding result for all_df
same works fine for df_good_sample
But when I use for their combination, I got:
NotImplementedError: > 1 ndim Categorical are not supported at this time
Detailed error message:
NotImplementedError Traceback (most recent call last)
<ipython-input-325-54e4d184cdb1> in <module>()
2 record_column_length = []
3 for i in range(0, len(category_feature)):
----> 4 category_df[i] = one_hot_encoding(all_df.append(df_good_sample).replace(0, np.nan), category_feature[i])[1]
5 record_column_length.append(len(category_df[i].columns))
6 concat_group = pd.concat(category_df, ignore_index=True, axis=1)
<ipython-input-312-82e782b3856b> in one_hot_encoding(register_info, fea)
16 # print fea
17 if flag:
---> 18 X_df = pd.get_dummies(register_info[fea])
19 fea_group_ids = [fea_g_id for i in range(X_df.shape[1])]
20 flag = False
/home/ubuntu/app/anaconda2/lib/python2.7/site-packages/pandas/core/reshape/reshape.pyc in get_dummies(data, prefix, prefix_sep, dummy_na, columns, sparse, drop_first)
1211 else:
1212 result = _get_dummies_1d(data, prefix, prefix_sep, dummy_na,
-> 1213 sparse=sparse, drop_first=drop_first)
1214 return result
1215
/home/ubuntu/app/anaconda2/lib/python2.7/site-packages/pandas/core/reshape/reshape.pyc in _get_dummies_1d(data, prefix, prefix_sep, dummy_na, sparse, drop_first)
1218 sparse=False, drop_first=False):
1219 # Series avoids inconsistent NaN handling
-> 1220 codes, levels = _factorize_from_iterable(Series(data))
1221
1222 def get_empty_Frame(data, sparse):
/home/ubuntu/app/anaconda2/lib/python2.7/site-packages/pandas/core/categorical.pyc in _factorize_from_iterable(values)
2142 codes = values.codes
2143 else:
-> 2144 cat = Categorical(values, ordered=True)
2145 categories = cat.categories
2146 codes = cat.codes
/home/ubuntu/app/anaconda2/lib/python2.7/site-packages/pandas/core/categorical.pyc in __init__(self, values, categories, ordered, fastpath)
294
295 # FIXME
--> 296 raise NotImplementedError("> 1 ndim Categorical are not "
297 "supported at this time")
298
NotImplementedError: > 1 ndim Categorical are not supported at this time
Hope someone can help me out of this!!!
I got this error when trying to get dummies on a column with Unicode characters in the name or in the actual values of the records. I switched the column names and their values, and it solved the problem:
import pandas as pd
#replace the column names with 'col1', 'col2' and so forth
colnum=1
for colname in list(df):
df.rename(columns={'' + colname + '': 'col' + str(colnum)}, inplace=True)
colnum+=1
#replace the column values with 'val1', 'val2' and so forth:
for colname in list(df):
f_values= df[colname].unique().tolist()
mapping = dict(zip(f_values, ['val' + str(i) for i in range(len(f_values))] ))
df.replace({'' + colname + '': mapping}, inplace=True)
#now running get_dummies will work
df = pd.get_dummies(df)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.