[英]Copy contents from one Dataframe to another based on column values in Pandas
[英]Copy values from one dataframe column to another
我有两个数据框 SF 和 OF。
顺丰:
PartNumber ParentPartNumber Webname Brand Value_Size Full Description ImagePath Short Description Weight RetailPriceEUR
2.5 2 Sidi Si S Honeycomb elastic https://link1,https://link2 Honey 2.3 331
2.6 2 Sidi Si M Honeycomb elastic https://link1,https://link2 Honey 2.3 331
2.7 2 Sidi Si L Honeycomb elastic https://link1,https://link2 Honey 2.3 331
3.2 3 Shoei Sho S E.Q.R.S. https://link3 ERQS 1.5 331
3.3 3 Shoei Sho M E.Q.R.S. https://link3 ERQS 1.5 331
2.9 2 Sidi Si XL Honeycomb elastic https://link1,https://link2 Honey 2.3 331
的:
Type SKU Published Name Parent Size Full Full Description Image ShortDescription Weight (kg) Regular Price Isfeatured height
simple 4 1 Bec
simple 8 1 Lin
我想要做的是在 SF 和 append 中存在的每个重复行之前添加一个额外的行到 OF 数据帧。 例如,如果父项中有重复项,例如 2,2,3,3,则需要复制 2 的第一行和 2 的第二行,并且除了所有行之外,还必须在之前添加一个额外的行他们与描述中的信息。 所以最终结果应该看起来像
结果(附加在 OF 中的 SF 行):
Type SKU Published Name Parent Size Full Description ImagePath ShortDescription Weight Regular Price Isfeatured height
simple 4 1 Bec
simple 8 1 Lin
variable 2 1 Sidi S,M,L,XL Honeycomb elastic https://link1,https://link2 yes
variation 2.5 0 Honey 2 S Honey 2.3 331 yes
variation 2.6 0 Honey 2 M Honey 2.3 331 yes
variation 2.7 0 Honey 2 L Honey 2.3 331 yes
variation 2.9 0 Honey 2 XL Honey 2.3 331 yes
variable 3 (extra) 1 Sho E41,E42 E.Q.R.S. https://link3 yes
variation 3 0 ERQS 3 E41 EQRS 1.5 33 yes
variation 3 0 ERQS 3 E42 ERQS 1.5 33 yes
基本上这就是我想要做的
if SF ParentPartNumber has duplicates (more than one) AND SF VisibleToCustomer == Y then
Create new row in OF with values as follows (PARENT PART)
OF Type = variable
OF SKU = SF ParentPartNumber
OF Name = SF WebName
OF Published = 0
OF Is featured = yes
OF Description = SF FullDescription
OF Images = SF ImagePath (when there is more than 1 link in SF replace | with , (comma))
OF Attribute 1 value(s) = all values from SF Size that belong to the same ParentPartNumber separated with comma
then below that row copy all rows that belong to this ParentPartNumber as follows ( all the part numbers under parent part number)
OF Type = variation
OF SKU = SF PartNumber
OF Published = 0
OF Name = SF ShortDescription
OF Weight (kg) = SF Weight
OF Regular price = SF RetailPriceEUR
OF Parent = SF ParentPartNumber
OF Is featured = yes
这就是我尝试更改代码的方式
SFs = SF[SF.VisibleToConsumer == 'Y']
SFs = SFs[SFs['ParentPartNumber'].duplicated(keep = False)]
def get_group_by_data(df1, name, parent_cols, child_cols, final_col_names):
df_dict = {col_name: [] for col_name in final_col_names} # for the final dataframe
col_names_map = {
'Type' : 'Type','SKU': 'SKU','WebName': 'Name','Published': 'Published', 'Isfeatured': 'yes', 'ShortDescription': 'Name','FullDescription' :'Description',
'Weight': 'Weight (kg)', 'height' : 'height'
'RetailPriceEUR': 'Regular price',
'ImagePath': 'Images','ParentPartNumber': 'Parent',
'Size': 'Attribute 1 value(s)',
} # for mapping the output column names to input col names
# extra row
parent_comm_cols_n_elems = dict()
df_dict['Type'].append('variable')
df_dict['SKU'].append(str(name))
df_dict['Published'].append(1)
df_dict['Is featured?'].append('yes')
df_dict['Parent'].append("")
for col in parent_cols:
parent_col_vals = list(dict.fromkeys(list(df1[col])).keys()) # using dictionary for ignoring the duplicate values and still retaining the order
parent_comm_cols_n_elems[col] = len(parent_col_vals)
df_dict[col_names_map[col]].append(",".join(val for val in parent_col_vals if val == val)) # val == val for ignoring nan values
for col in child_cols:
df_dict[col_names_map[col]].append("")
# for adding all the part numbers under parent part number
for idx, row in df1.iterrows():
df_dict['Type'].append('variation')
df_dict['SKU'].append(row['PartNumber'])
df_dict['Published'].append(1)
df_dict['Is featured?'].append(0)
df_dict['Parent'].append(str(name))
for col in parent_cols:
# in case of S,M,L,XL chile rows would have size populated,
# but in case of 1 elem, like Honeycomb elastic, size not populated in child rows
if parent_comm_cols_n_elems[col] > 1:
df_dict[col_names_map[col]].append(row[col])
else:
df_dict[col_names_map[col]].append("")
for col in child_cols:
df_dict[col_names_map[col]].append(row[col])
return pd.DataFrame.from_dict(df_dict)
parent_cols = ['Size', 'FullDescription', 'ImagePath']
#common_cols = ['WebName']
child_cols = ['ShortDescription', 'Weight', 'RetailPriceEUR']
df_append_cols = ['Type', 'SKU', 'Name', 'Published', 'Is featured?',
'Short description', 'Full Description',
'Weight (kg)',
'Height ',', 'Regular price', 'Images',
'Parent', 'Attribute 1 value(s)']
df_append = pd.DataFrame(SF.groupby('ParentPartNumber')[['PartNumber'] + parent_cols + child_cols]
.apply(lambda x: get_group_by_data(x, x.name, parent_cols,
child_cols, df_append_cols)).values, # x.name is 'ParentPartNumber'
columns=df_append_cols)
df_append = df_append.fillna('')
def get_group_by_data(df1, name, parent_cols, child_cols, final_col_names):
df_dict = {col_name: [] for col_name in final_col_names} # for the final dataframe
col_names_map = {
'Type' : 'Type','SKU': 'SKU','WebName': 'Name','Published': 'Published',
'Isfeatured': 'yes', 'Short Description': 'Name','Full Description' :'Full Description',
'Weight': 'Weight (kg)', 'height' : 'height',
'RetailPriceEUR': 'Regular price',
'ImagePath': 'Images','ParentPartNumber': 'Parent',
'Value_Size': 'Attribute 1 value(s)',
} # for mapping the output column names to input col names
# extra row
# print(df_dict)
parent_comm_cols_n_elems = dict()
df_dict['Type'].append('variable')
df_dict['SKU'].append(str(name))
df_dict['Published'].append(1)
df_dict['Is featured?'].append('yes')
df_dict['Parent'].append("")
df_dict['Height'].append("") # added this
df_dict['Short Description'].append("") # added this
# print(f"Parent cols: {parent_cols}")
for col in parent_cols:
parent_col_vals = list(dict.fromkeys(list(df1[col])).keys()) # using dictionary for ignoring the duplicate values and still retaining the order
parent_comm_cols_n_elems[col] = len(parent_col_vals)
# print(f"parent_cols: {parent_col_vals}")
df_dict[col_names_map[col]].append(",".join(val for val in parent_col_vals if val == val)) # val == val for ignoring nan values
for col in child_cols:
df_dict[col_names_map[col]].append("")
# for adding all the part numbers under parent part number
for idx, row in df1.iterrows():
df_dict['Type'].append('variation')
df_dict['SKU'].append(row['PartNumber'])
df_dict['Short Description'].append(row['Short Description']) # added this
df_dict['Published'].append(1)
df_dict['Is featured?'].append(0)
df_dict['Height'].append("") # added this
df_dict['Parent'].append(str(name))
for col in parent_cols:
# in case of S,M,L,XL chile rows would have size populated,
# but in case of 1 elem, like Honeycomb elastic, size not populated in child rows
if parent_comm_cols_n_elems[col] > 1:
df_dict[col_names_map[col]].append(row[col])
else:
df_dict[col_names_map[col]].append("")
for col in child_cols:
df_dict[col_names_map[col]].append(row[col])
# print(df_dict)
return pd.DataFrame.from_dict(df_dict)
按“ParentPartNumber”分组并对下面各种列表中的所有列应用分组操作。 基本思想是,当您应用分组依据时,属于该组的所有行都将作为数据框传递(只有您正在应用操作的列)。
SF = SF[SF['ParentPartNumber'].duplicated(keep = False)]
parent_cols = ['Value_Size', 'Full Description', 'ImagePath']
#common_cols = ['WebName']
child_cols = ['Short Description', 'Weight', 'RetailPriceEUR']
df_append_cols = ['Type', 'SKU', 'Name', 'Published', 'Is featured?',
'Short Description', 'Full Description', 'Weight (kg)',
'Height', 'Regular price', 'Images','Parent', 'Attribute 1 value(s)']
df_append = pd.DataFrame(SF.groupby('ParentPartNumber')[['PartNumber'] + parent_cols + child_cols]
.apply(lambda x: get_group_by_data(x, x.name, parent_cols,
child_cols, df_append_cols)).values, # x.name is 'ParentPartNumber'
columns=df_append_cols)
df_append = df_append.fillna('')
df_append
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.