I have a dataframe that have over 200 columns of dummy variable:
Row1 Feature1 Feature2 Feature3 Feature4 Feature5
A 0 1 1 1 0
B 0 0 1 1 1
C 1 0 1 0 1
D 0 1 0 1 0
I want to do iteration to separate each feature to create extra 3 dataframes with df1 only contains keep the first feature that=1 as 1 and change all the later columns to 0 and df2 only contains keep the second feature that=1 as 1 and change all the previous and later columns to 0.
I have create codes to do it, but I figured there gotta be better ways to do it. Please help me with a more efficient way to approach this. Thank you!
Below is my code:
for index, row in hcit1.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
hcit1.loc[index,title2]=0
else:
pass
for index, row in hcit2.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
if row[title2]==1:
for k in range(j+1,261):
title3="feature"+str(k)
hcit1.loc[index,title3]=0
hcit1.loc[index,title]=0
else:
pass
for index, row in hcit3.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
if row[title2]==1:
for k in range(j+1,261):
title3="feature"+str(k)
if row[title3]==1:
for l in range(k+1,261):
title4="feature"+str(l)
hcit1.loc[index,title4]=0
hcit1.loc[index,title2]=0
hcit1.loc[index,title]=0
else:
pass
for index, row in hcit4.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
if row[title2]==1:
for k in range(j+1,261):
title3="feature"+str(k)
if row[title3]==1:
for l in range(k+1,261):
title4="feature"+str(l)
if row[title4]==1:
for m in range(l+1,261):
title5="feature"+str(m)
hcit1.loc[index,title5]=0
hcit1.loc[index,title3]=0
hcit1.loc[index,title2]=0
hcit1.loc[index,title]=0
else:
pass
Here:
df1 = df[df['Feature1'] == 1]
df1.iloc[:, :] = 0
df1.loc[:, 'Feature1'] = 1
df2 = df[df['Feature2'] == 1]
df2.iloc[:, :] = 0
df2.loc[:, 'Feature2'] = 1
df3 = df[df['Feature2'] == 1]
df3.iloc[:, :] = 0
df3.loc[:, 'Feature3'] = 1
That should be what you are looking for.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.