![](/img/trans.png)
[英]print column values of one dataframe based on the column values of another dataframe
[英]Creating columns in one dataframe based on the values of a column in another dataset
我有两个熊猫数据框
import pandas as pd
import numpy as np
import datetime
# intialise data of lists.
data = {'group' :["A","A","A","B","B","B","B"],
'val': ["AA","AB","AC","B1","B2","AA","AB"],
'cal1' :[4,5,7,6,5,8,9],
'cal2' :[10,100,100,10,1,10,100]
}
# Create DataFrame
df1 = pd.DataFrame(data)
df1
group val cal1 cal2
0 A AA 4 10
1 A AB 5 100
2 A AC 7 100
3 B B1 6 10
4 B B2 5 1
5 B AA 8 10
6 B AB 9 100
import pandas as pd
import numpy as np
import datetime
# intialise data of lists.
data = {'group' :["A","A","A","B","B","B","B"],
'flag' : [1,0,0,1,0,0,0],
'var1': [1,2,3,7,8,9,10]
}
# Create DataFrame
df2 = pd.DataFrame(data)
df2
group flag var1
0 A 1 1
1 A 0 2
2 A 0 3
3 B 1 7
4 B 0 8
5 B 0 9
6 B 0 10
第 1 步:根据 df1 中的唯一“val”在 df2 中创建列,如下所示:
unique_val = df1['val'].unique().tolist()
new_cols = [t + '_new' for t in unique_val]
for i in new_cols:
df2[i] = 0
df2
group flag var1 AA_new AB_new AC_new B1_new B2_new
0 A 1 1 0 0 0 0 0
1 A 0 2 0 0 0 0 0
2 A 0 3 0 0 0 0 0
3 B 1 7 0 0 0 0 0
4 B 0 8 0 0 0 0 0
5 B 0 9 0 0 0 0 0
6 B 0 10 0 0 0 0 0
第 2 步:对于 flag = 1 的行,AA_new 将计算为 var1(from df2)* 来自 df1 的 'cal1' 值,用于组“A”和 val“AA” * 来自 df1 的 'cal2' 值,用于组“A” " 和 val "AA",类似地,AB_new 将计算为 var1(来自 df2) * 来自 df1 的 'cal1' 值对于组“A”和 val“AB” * 来自 df1 的 'cal2' 值对于组“A”和值“AB”
我的预期输出应如下所示:
group flag var1 AA_new AB_new AC_new B1_new B2_new
0 A 1 1 40 500 700 0 0
1 A 0 2 0 0 0 0 0
2 A 0 3 0 0 0 0 0
3 B 1 7 570 6300 0 420 35
4 B 0 8 0 0 0 0 0
5 B 0 9 0 0 0 0 0
6 B 0 10 0 0 0 0 0
使用DataFrame.pivot_table
和GroupBy.bfill
然后我们可以使用DataFrame.mul
。
df2.assign(**df1.pivot_table(columns='val',
values='cal',
index = ['group', df2.index])
.add_suffix('_new')
.groupby(level=0)
#.apply(lambda x: x.bfill().ffill()) #maybe neccesary instead bfill
.bfill()
.reset_index(level='group',drop='group')
.fillna(0)
.mul(df2['var1'], axis=0)
.where(df2['flag'].eq(1), 0)
#.astype(int) # if you want int
)
输出
group flag var1 AA_new AB_new AC_new B1_new B2_new
0 A 1 1 4.0 5.0 7.0 0.0 0.0
1 A 0 2 0.0 0.0 0.0 0.0 0.0
2 A 0 3 0.0 0.0 0.0 0.0 0.0
3 B 1 7 56.0 63.0 0.0 42.0 35.0
4 B 0 8 0.0 0.0 0.0 0.0 0.0
5 B 0 9 0.0 0.0 0.0 0.0 0.0
6 B 0 10 0.0 0.0 0.0 0.0 0.0
编辑
df2.assign(**df1.assign(mul_cal = df1['cal1'].mul(df1['cal2']))
.pivot_table(columns='val',
values='mul_cal',
index = ['group', df2.index])
.add_suffix('_new')
.groupby(level=0)
#.apply(lambda x: x.bfill().ffill()) #maybe neccesary instead bfill
.bfill()
.reset_index(level='group',drop='group')
.fillna(0)
.mul(df2['var1'], axis=0)
.where(df2['flag'].eq(1), 0)
#.astype(int) # if you want int
)
group flag var1 AA_new AB_new AC_new B1_new B2_new
0 A 1 1 40.0 500.0 700.0 0.0 0.0
1 A 0 2 0.0 0.0 0.0 0.0 0.0
2 A 0 3 0.0 0.0 0.0 0.0 0.0
3 B 1 7 560.0 6300.0 0.0 420.0 35.0
4 B 0 8 0.0 0.0 0.0 0.0 0.0
5 B 0 9 0.0 0.0 0.0 0.0 0.0
6 B 0 10 0.0 0.0 0.0 0.0 0.0
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.