简体   繁体   中英

convert multiple rows to columns in pandas dataframe with calculations

i have the following data frame df

df = pd.DataFrame({'cell': ['cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2'], 'cond_one': ['cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2'], 'cond_two': ['cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8'],'pvt': ['pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2','pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2'], 'value': ['val1', 'val2', 'val3', 'val4','val5', 'val6','val7', 'val8','val9', 'val10', 'val11', 'val12','val13', 'val14','val15', 'val16','val17', 'val18', 'val19', 'val20','val21', 'val22','val23', 'val24','val25', 'val26', 'val27', 'val28','val29', 'val30','val31', 'val32']})
cell cond_one cond_two  pvt value
cell1    cond1   cond_1 pvt1  val1
cell1    cond1   cond_2 pvt1  val2
cell1    cond1   cond_3 pvt1  val3
cell1    cond1   cond_4 pvt1  val4
cell1    cond2   cond_1 pvt1  val5
cell1    cond2   cond_2 pvt1  val6
cell1    cond2   cond_3 pvt1  val7
cell1    cond2   cond_4 pvt1  val8
cell1    cond1   cond_1 pvt2  val9
cell1    cond1   cond_2 pvt2 val10
cell1    cond1   cond_3 pvt2 val11
cell1    cond1   cond_4 pvt2 val12
cell1    cond2   cond_1 pvt2 val13
cell1    cond2   cond_2 pvt2 val14
cell1    cond2   cond_3 pvt2 val15
cell1    cond2   cond_4 pvt2 val16
cell2    cond1   cond_5 pvt1 val17
cell2    cond1   cond_6 pvt1 val18
cell2    cond1   cond_7 pvt1 val19
cell2    cond1   cond_8 pvt1 val20
cell2    cond2   cond_5 pvt1 val21
cell2    cond2   cond_6 pvt1 val22
cell2    cond2   cond_7 pvt1 val23
cell2    cond2   cond_8 pvt1 val24
cell2    cond1   cond_5 pvt2 val25
cell2    cond1   cond_6 pvt2 val26
cell2    cond1   cond_7 pvt2 val27
cell2    cond1   cond_8 pvt2 val28
cell2    cond2   cond_5 pvt2 val29
cell2    cond2   cond_6 pvt2 val30
cell2    cond2   cond_7 pvt2 val31
cell2    cond2   cond_8 pvt2 val32

columns: cell, cond_one, cond_two, pvt : string type value : float64 type

i want to create columns based up on the unique values in the 'pvt' column and does the calculation as shown below. below is the dataframe that i want to create

df2 = pd.DataFrame({'cell': ['cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2'], 'cond_one': ['cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2'], 'cond_two': ['cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8'],'pvt': ['pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2','pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2'], 'value': ['val1', 'val2', 'val3', 'val4','val5', 'val6','val7', 'val8','val9', 'val10', 'val11', 'val12','val13', 'val14','val15', 'val16','val17', 'val18', 'val19', 'val20','val21', 'val22','val23', 'val24','val25', 'val26', 'val27', 'val28','val29', 'val30','val31', 'val32'],'pvt1': ['val1-val1', 'val2-val2', 'val3-val3', 'val4-val4' ,'val5-val5', 'val6-val6' ,'val7-val7', 'val8-val8','val9-val1', 'val10-val2', 'val11-val3', 'val12-val4' ,'val13-va5', 'val14-val6' ,'val15-val7', 'val16-val8','val17-val17', 'val18-val18', 'val19-val19', 'val20-val20' ,'val21-val21', 'val22-val22' ,'val23-val23', 'val24-val24','val25-val17', 'val26-val18', 'val27-val19', 'val28-va120' ,'val29-val21', 'val30-val22' ,'val31-val23', 'val32-val24'],'pvt2': ['val1-val9', 'val2-val10', 'val3-val11', 'val4-val12' ,'val5-val13', 'val6-val14' ,'val7-val15', 'val8-val16','val9-val9', 'val10-val10', 'val11-val11', 'val12-val12' ,'val13-val13', 'val14-val14' ,'val15-val15', 'val16-val16','val17-val25', 'val18-val26', 'val19-val27', 'val20-val28' ,'val21-val29', 'val22-val30' ,'val23-val31', 'val24-val32','val25-val25', 'val26-val26', 'val27-val27', 'val28-val28' ,'val29-val29', 'val30-val30' ,'val31-val31', 'val32-val32']})

 cell cond_one cond_two  pvt value        pvt1        pvt2
cell1    cond1   cond_1 pvt1  val1   val1-val1   val1-val9
cell1    cond1   cond_2 pvt1  val2   val2-val2  val2-val10
cell1    cond1   cond_3 pvt1  val3   val3-val3  val3-val11
cell1    cond1   cond_4 pvt1  val4   val4-val4  val4-val12
cell1    cond2   cond_1 pvt1  val5   val5-val5  val5-val13
cell1    cond2   cond_2 pvt1  val6   val6-val6  val6-val14
cell1    cond2   cond_3 pvt1  val7   val7-val7  val7-val15
cell1    cond2   cond_4 pvt1  val8   val8-val8  val8-val16
cell1    cond1   cond_1 pvt2  val9   val9-val1   val9-val9
cell1    cond1   cond_2 pvt2 val10  val10-val2 val10-val10
cell1    cond1   cond_3 pvt2 val11  val11-val3 val11-val11
cell1    cond1   cond_4 pvt2 val12  val12-val4 val12-val12
cell1    cond2   cond_1 pvt2 val13   val13-va5 val13-val13
cell1    cond2   cond_2 pvt2 val14  val14-val6 val14-val14
cell1    cond2   cond_3 pvt2 val15  val15-val7 val15-val15
cell1    cond2   cond_4 pvt2 val16  val16-val8 val16-val16
cell2    cond1   cond_5 pvt1 val17 val17-val17 val17-val25
cell2    cond1   cond_6 pvt1 val18 val18-val18 val18-val26
cell2    cond1   cond_7 pvt1 val19 val19-val19 val19-val27
cell2    cond1   cond_8 pvt1 val20 val20-val20 val20-val28
cell2    cond2   cond_5 pvt1 val21 val21-val21 val21-val29
cell2    cond2   cond_6 pvt1 val22 val22-val22 val22-val30
cell2    cond2   cond_7 pvt1 val23 val23-val23 val23-val31
cell2    cond2   cond_8 pvt1 val24 val24-val24 val24-val32
cell2    cond1   cond_5 pvt2 val25 val25-val17 val25-val25
cell2    cond1   cond_6 pvt2 val26 val26-val18 val26-val26
cell2    cond1   cond_7 pvt2 val27 val27-val19 val27-val27
cell2    cond1   cond_8 pvt2 val28 val28-va120 val28-val28
cell2    cond2   cond_5 pvt2 val29 val29-val21 val29-val29
cell2    cond2   cond_6 pvt2 val30 val30-val22 val30-val30
cell2    cond2   cond_7 pvt2 val31 val31-val23 val31-val31
cell2    cond2   cond_8 pvt2 val32 val32-val24 val32-val32

i tried to implement this using nested for loops but it is taking more time as my dataframe has more than 10000 rows. please let me know any efficient way for this problem?

IIUC, you can use merge as starting point of your result:

pvt = (df.reset_index().merge(df, on=['cond_one', 'cond_two'], how='left')
         .assign(value=lambda x: x['value_x'] - x['value_y'],
                 column=lambda x: x.groupby('index').cumcount().add(1))
         .pivot_table(index='index', columns='column', values='value')
         .add_prefix('pvt'))
out = pd.concat([df, pvt], axis=1)

Output:

>>> out
     cell cond_one cond_two   pvt  value  pvt1  pvt2
0   cell1    cond1   cond_1  pvt1      0     0    -8
1   cell1    cond1   cond_2  pvt1      1     0    -8
2   cell1    cond1   cond_3  pvt1      2     0    -8
3   cell1    cond1   cond_4  pvt1      3     0    -8
4   cell1    cond2   cond_1  pvt1      4     0    -8
5   cell1    cond2   cond_2  pvt1      5     0    -8
6   cell1    cond2   cond_3  pvt1      6     0    -8
7   cell1    cond2   cond_4  pvt1      7     0    -8
8   cell1    cond1   cond_1  pvt2      8     8     0
9   cell1    cond1   cond_2  pvt2      9     8     0
10  cell1    cond1   cond_3  pvt2     10     8     0
11  cell1    cond1   cond_4  pvt2     11     8     0
12  cell1    cond2   cond_1  pvt2     12     8     0
13  cell1    cond2   cond_2  pvt2     13     8     0
14  cell1    cond2   cond_3  pvt2     14     8     0
15  cell1    cond2   cond_4  pvt2     15     8     0

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM