简体   繁体   中英

Remove any 0 value from row, order values descending for row, for each non 0 value in row return the index, column name, and score to a new df

I'm looking for a more efficient way of doing the below (perhaps using boolean masks and vecotrization). I'm new to this forum so apologies if my first question is not quite what was expected.

#order each row by values descending
#remove any 0 value column from row
#for each non 0 value return the index, column name, and score to a new dataframe

test_data={'a':[1,0,8,5],
          'b':[36,2,0,6],
          'c':[2,8,100,0],
          'd':[7,8,9,50]}

df=pd.DataFrame(test_data,columns=['a','b','c','d'])

column_names = ['index_row','header','score']

#create empty df with final output columns
df_result = pd.DataFrame(columns = column_names)

row_index=list(df.index.values)
    
for row in row_index:
    
    working_row=row


    #change all 0 values to null and drop any extraneous columns
    subset_cols=df.loc[[working_row],:].replace(0,pd.np.nan).dropna(axis=1,how='any').columns.to_list()

    #order by score
    sub_df = df.loc[[working_row],subset_cols].sort_values(by =row, axis=1, ascending=False)
    
    s_cols = sub_df.columns.to_list()

    scores = sub_df.values.tolist()
    scores = scores[0]
    
    index_row=[]
    header=[]
    score=[]
    
    for count, value in enumerate(scores):
 
        header.append(s_cols[count])
        score.append(value)
        index_row.append(row)
        
        data={'index_row': index_row,
             'header': header,
             'score': score}
        result_frame = pd.DataFrame (data, columns =['index_row','header','score'])
    
    df_result=pd.concat([df_result, result_frame], ignore_index=True)
    
df_result

You could do it directly with melt and some additional processing:

df_result = df.reset_index().rename(columns={'index': 'index_row'}).melt(
    id_vars='index_row', var_name='header', value_name='score').query(
        "score!=0").sort_values(['index_row', 'score'], ascending=[True, False]
                                ).reset_index(drop=True)

it gives as expected:

    index_row header  score
0           0      b     36
1           0      d      7
2           0      c      2
3           0      a      1
4           1      c      8
5           1      d      8
6           1      b      2
7           2      c    100
8           2      d      9
9           2      a      8
10          3      d     50
11          3      b      6
12          3      a      5
for index in df.index: temp_df = df.loc[index].reset_index().reset_index() temp_df.columns = ['index_row', 'header', 'score'] temp_df['index_row'] = index temp_df.sort_values(by=['score'], ascending=False, inplace=True) df_result = df_result.append(temp_df[temp_df.score != 0], ignore_index=True)
  test_data={'a':[1,0,8,5],
             'b':[36,2,0,6],
             'c':[2,8,100,0],
             'd':[7,8,9,50]}
  df=pd.DataFrame(test_data,columns=['a','b','c','d'])
  df=df.reset_index()

  results=pd.melt(df,id_vars='index',var_name='header',value_name='score')
  filter=results['score']!=0
  print(results[filter].sort_values(by=['index','score'],ascending=[True,False]))

  output:

  index header  score
  4       0      b     36
  12      0      d      7
  8       0      c      2
  0       0      a      1
  9       1      c      8
  13      1      d      8
  5       1      b      2
  10      2      c    100
  14      2      d      9
  2       2      a      8
  15      3      d     50
  7       3      b      6
  3       3      a      5

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM