I'm looking for a more efficient way of doing the below (perhaps using boolean masks and vecotrization). I'm new to this forum so apologies if my first question is not quite what was expected.
#order each row by values descending
#remove any 0 value column from row
#for each non 0 value return the index, column name, and score to a new dataframe
test_data={'a':[1,0,8,5],
'b':[36,2,0,6],
'c':[2,8,100,0],
'd':[7,8,9,50]}
df=pd.DataFrame(test_data,columns=['a','b','c','d'])
column_names = ['index_row','header','score']
#create empty df with final output columns
df_result = pd.DataFrame(columns = column_names)
row_index=list(df.index.values)
for row in row_index:
working_row=row
#change all 0 values to null and drop any extraneous columns
subset_cols=df.loc[[working_row],:].replace(0,pd.np.nan).dropna(axis=1,how='any').columns.to_list()
#order by score
sub_df = df.loc[[working_row],subset_cols].sort_values(by =row, axis=1, ascending=False)
s_cols = sub_df.columns.to_list()
scores = sub_df.values.tolist()
scores = scores[0]
index_row=[]
header=[]
score=[]
for count, value in enumerate(scores):
header.append(s_cols[count])
score.append(value)
index_row.append(row)
data={'index_row': index_row,
'header': header,
'score': score}
result_frame = pd.DataFrame (data, columns =['index_row','header','score'])
df_result=pd.concat([df_result, result_frame], ignore_index=True)
df_result
You could do it directly with melt
and some additional processing:
df_result = df.reset_index().rename(columns={'index': 'index_row'}).melt(
id_vars='index_row', var_name='header', value_name='score').query(
"score!=0").sort_values(['index_row', 'score'], ascending=[True, False]
).reset_index(drop=True)
it gives as expected:
index_row header score
0 0 b 36
1 0 d 7
2 0 c 2
3 0 a 1
4 1 c 8
5 1 d 8
6 1 b 2
7 2 c 100
8 2 d 9
9 2 a 8
10 3 d 50
11 3 b 6
12 3 a 5
for index in df.index: temp_df = df.loc[index].reset_index().reset_index() temp_df.columns = ['index_row', 'header', 'score'] temp_df['index_row'] = index temp_df.sort_values(by=['score'], ascending=False, inplace=True) df_result = df_result.append(temp_df[temp_df.score != 0], ignore_index=True)
test_data={'a':[1,0,8,5],
'b':[36,2,0,6],
'c':[2,8,100,0],
'd':[7,8,9,50]}
df=pd.DataFrame(test_data,columns=['a','b','c','d'])
df=df.reset_index()
results=pd.melt(df,id_vars='index',var_name='header',value_name='score')
filter=results['score']!=0
print(results[filter].sort_values(by=['index','score'],ascending=[True,False]))
output:
index header score
4 0 b 36
12 0 d 7
8 0 c 2
0 0 a 1
9 1 c 8
13 1 d 8
5 1 b 2
10 2 c 100
14 2 d 9
2 2 a 8
15 3 d 50
7 3 b 6
3 3 a 5
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.