i have the following data frame df
df = pd.DataFrame({'cell': ['cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2'], 'cond_one': ['cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2'], 'cond_two': ['cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8'],'pvt': ['pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2','pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2'], 'value': ['val1', 'val2', 'val3', 'val4','val5', 'val6','val7', 'val8','val9', 'val10', 'val11', 'val12','val13', 'val14','val15', 'val16','val17', 'val18', 'val19', 'val20','val21', 'val22','val23', 'val24','val25', 'val26', 'val27', 'val28','val29', 'val30','val31', 'val32']})
cell cond_one cond_two pvt value
cell1 cond1 cond_1 pvt1 val1
cell1 cond1 cond_2 pvt1 val2
cell1 cond1 cond_3 pvt1 val3
cell1 cond1 cond_4 pvt1 val4
cell1 cond2 cond_1 pvt1 val5
cell1 cond2 cond_2 pvt1 val6
cell1 cond2 cond_3 pvt1 val7
cell1 cond2 cond_4 pvt1 val8
cell1 cond1 cond_1 pvt2 val9
cell1 cond1 cond_2 pvt2 val10
cell1 cond1 cond_3 pvt2 val11
cell1 cond1 cond_4 pvt2 val12
cell1 cond2 cond_1 pvt2 val13
cell1 cond2 cond_2 pvt2 val14
cell1 cond2 cond_3 pvt2 val15
cell1 cond2 cond_4 pvt2 val16
cell2 cond1 cond_5 pvt1 val17
cell2 cond1 cond_6 pvt1 val18
cell2 cond1 cond_7 pvt1 val19
cell2 cond1 cond_8 pvt1 val20
cell2 cond2 cond_5 pvt1 val21
cell2 cond2 cond_6 pvt1 val22
cell2 cond2 cond_7 pvt1 val23
cell2 cond2 cond_8 pvt1 val24
cell2 cond1 cond_5 pvt2 val25
cell2 cond1 cond_6 pvt2 val26
cell2 cond1 cond_7 pvt2 val27
cell2 cond1 cond_8 pvt2 val28
cell2 cond2 cond_5 pvt2 val29
cell2 cond2 cond_6 pvt2 val30
cell2 cond2 cond_7 pvt2 val31
cell2 cond2 cond_8 pvt2 val32
columns: cell, cond_one, cond_two, pvt : string type value : float64 type
i want to create columns based up on the unique values in the 'pvt' column and does the calculation as shown below. below is the dataframe that i want to create
df2 = pd.DataFrame({'cell': ['cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1', 'cell1','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2','cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2', 'cell2'], 'cond_one': ['cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2','cond1', 'cond1', 'cond1', 'cond1', 'cond2', 'cond2', 'cond2', 'cond2'], 'cond_two': ['cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_1', 'cond_2', 'cond_3', 'cond_4', 'cond_1', 'cond_2', 'cond_3', 'cond_4','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8','cond_5', 'cond_6', 'cond_7', 'cond_8'],'pvt': ['pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2','pvt1', 'pvt1', 'pvt1', 'pvt1' ,'pvt1', 'pvt1', 'pvt1', 'pvt1','pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2' ,'pvt2', 'pvt2'], 'value': ['val1', 'val2', 'val3', 'val4','val5', 'val6','val7', 'val8','val9', 'val10', 'val11', 'val12','val13', 'val14','val15', 'val16','val17', 'val18', 'val19', 'val20','val21', 'val22','val23', 'val24','val25', 'val26', 'val27', 'val28','val29', 'val30','val31', 'val32'],'pvt1': ['val1-val1', 'val2-val2', 'val3-val3', 'val4-val4' ,'val5-val5', 'val6-val6' ,'val7-val7', 'val8-val8','val9-val1', 'val10-val2', 'val11-val3', 'val12-val4' ,'val13-va5', 'val14-val6' ,'val15-val7', 'val16-val8','val17-val17', 'val18-val18', 'val19-val19', 'val20-val20' ,'val21-val21', 'val22-val22' ,'val23-val23', 'val24-val24','val25-val17', 'val26-val18', 'val27-val19', 'val28-va120' ,'val29-val21', 'val30-val22' ,'val31-val23', 'val32-val24'],'pvt2': ['val1-val9', 'val2-val10', 'val3-val11', 'val4-val12' ,'val5-val13', 'val6-val14' ,'val7-val15', 'val8-val16','val9-val9', 'val10-val10', 'val11-val11', 'val12-val12' ,'val13-val13', 'val14-val14' ,'val15-val15', 'val16-val16','val17-val25', 'val18-val26', 'val19-val27', 'val20-val28' ,'val21-val29', 'val22-val30' ,'val23-val31', 'val24-val32','val25-val25', 'val26-val26', 'val27-val27', 'val28-val28' ,'val29-val29', 'val30-val30' ,'val31-val31', 'val32-val32']})
cell cond_one cond_two pvt value pvt1 pvt2
cell1 cond1 cond_1 pvt1 val1 val1-val1 val1-val9
cell1 cond1 cond_2 pvt1 val2 val2-val2 val2-val10
cell1 cond1 cond_3 pvt1 val3 val3-val3 val3-val11
cell1 cond1 cond_4 pvt1 val4 val4-val4 val4-val12
cell1 cond2 cond_1 pvt1 val5 val5-val5 val5-val13
cell1 cond2 cond_2 pvt1 val6 val6-val6 val6-val14
cell1 cond2 cond_3 pvt1 val7 val7-val7 val7-val15
cell1 cond2 cond_4 pvt1 val8 val8-val8 val8-val16
cell1 cond1 cond_1 pvt2 val9 val9-val1 val9-val9
cell1 cond1 cond_2 pvt2 val10 val10-val2 val10-val10
cell1 cond1 cond_3 pvt2 val11 val11-val3 val11-val11
cell1 cond1 cond_4 pvt2 val12 val12-val4 val12-val12
cell1 cond2 cond_1 pvt2 val13 val13-va5 val13-val13
cell1 cond2 cond_2 pvt2 val14 val14-val6 val14-val14
cell1 cond2 cond_3 pvt2 val15 val15-val7 val15-val15
cell1 cond2 cond_4 pvt2 val16 val16-val8 val16-val16
cell2 cond1 cond_5 pvt1 val17 val17-val17 val17-val25
cell2 cond1 cond_6 pvt1 val18 val18-val18 val18-val26
cell2 cond1 cond_7 pvt1 val19 val19-val19 val19-val27
cell2 cond1 cond_8 pvt1 val20 val20-val20 val20-val28
cell2 cond2 cond_5 pvt1 val21 val21-val21 val21-val29
cell2 cond2 cond_6 pvt1 val22 val22-val22 val22-val30
cell2 cond2 cond_7 pvt1 val23 val23-val23 val23-val31
cell2 cond2 cond_8 pvt1 val24 val24-val24 val24-val32
cell2 cond1 cond_5 pvt2 val25 val25-val17 val25-val25
cell2 cond1 cond_6 pvt2 val26 val26-val18 val26-val26
cell2 cond1 cond_7 pvt2 val27 val27-val19 val27-val27
cell2 cond1 cond_8 pvt2 val28 val28-va120 val28-val28
cell2 cond2 cond_5 pvt2 val29 val29-val21 val29-val29
cell2 cond2 cond_6 pvt2 val30 val30-val22 val30-val30
cell2 cond2 cond_7 pvt2 val31 val31-val23 val31-val31
cell2 cond2 cond_8 pvt2 val32 val32-val24 val32-val32
i tried to implement this using nested for loops but it is taking more time as my dataframe has more than 10000 rows. please let me know any efficient way for this problem?
IIUC, you can use merge
as starting point of your result:
pvt = (df.reset_index().merge(df, on=['cond_one', 'cond_two'], how='left')
.assign(value=lambda x: x['value_x'] - x['value_y'],
column=lambda x: x.groupby('index').cumcount().add(1))
.pivot_table(index='index', columns='column', values='value')
.add_prefix('pvt'))
out = pd.concat([df, pvt], axis=1)
Output:
>>> out
cell cond_one cond_two pvt value pvt1 pvt2
0 cell1 cond1 cond_1 pvt1 0 0 -8
1 cell1 cond1 cond_2 pvt1 1 0 -8
2 cell1 cond1 cond_3 pvt1 2 0 -8
3 cell1 cond1 cond_4 pvt1 3 0 -8
4 cell1 cond2 cond_1 pvt1 4 0 -8
5 cell1 cond2 cond_2 pvt1 5 0 -8
6 cell1 cond2 cond_3 pvt1 6 0 -8
7 cell1 cond2 cond_4 pvt1 7 0 -8
8 cell1 cond1 cond_1 pvt2 8 8 0
9 cell1 cond1 cond_2 pvt2 9 8 0
10 cell1 cond1 cond_3 pvt2 10 8 0
11 cell1 cond1 cond_4 pvt2 11 8 0
12 cell1 cond2 cond_1 pvt2 12 8 0
13 cell1 cond2 cond_2 pvt2 13 8 0
14 cell1 cond2 cond_3 pvt2 14 8 0
15 cell1 cond2 cond_4 pvt2 15 8 0
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.