[英]compare list of dicts and merge to one python
我正在編寫一個 python 程序來將類似的字典合並為一個。 我有兩個字典的列表。 我有一個空的filter_cache
字典。 我有input_completed =false
變量。 我將使用 for 循環獲取 dicts。 在輸入第一個 dict 后,它會被處理並將相似的內容合並為一個,然后我將結果分配給filter_cache
dict。 通過 for 循環輸入的列表中的下一個 dict 得到處理並將其合並為一個。 之后,由於data
為空,我將input_completed
更改為true
。 接下來,我想比較filter_cache
的前一個 dict 和當前的filter_cache
,然后使用某個函數將其再次組合並將其分配給fiter_cache
。 如何使它成為可能
這是代碼:
from itertools import groupby
field_to_be_check = "state"
merger = ["city", "haps"]
merge_name = ["cities", "my_haps"]
data = [
[{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'tenkasi'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad4', 'city': 'nagerkoil'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'tuticorin'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kolikodu'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kottayam'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'idukki'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Akola'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Washim'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Jalna'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Latur'}],
[{'haps': 'hap1', 'state': 'tamil nadu','ads': 'ad1', 'city': 'madurai'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'chennai'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'palakad'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'guruvayor'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Nanded'},
]
]
input_completed = False
filter_cache = {}
for datas in data:
#Function to make the merger lists
def process_group(group, merger_item):
item_set = set()
item_list = []
for item in group:
item_set.add(item[merger_item])
for item in item_set:
item_list.append({merger_item: item})
return item_list
#Group on haps, state and ads
grp = groupby(datas,key=lambda x:(x[field_to_be_check]))
result = []
#Iterate through the group and build your result list
for model, group in grp:
cities_dict = {}
cities_dict[field_to_be_check] = model
group_list = list(group)
#Make the list for merger fields
for idx, name in enumerate(merger):
cities_dict[merge_name[idx]] = process_group(group_list, name)
result.append(cities_dict)
filter_cache.update({'aggregate':result})
print(filter_cache)
final_result = filter_cache
我得到的輸出:
{'aggregate': [{'state': 'tamil nadu', 'my_haps': [{'haps': 'hap0'}], 'cities': [{'city': 'tuticorin'}, {'city': 'tenkasi'}, {'city': 'nagerkoil'}]}, {'state': 'kerala', 'my_haps': [{'haps': 'hap1'}], 'cities': [{'city': 'idukki'}, {'city': 'kolikodu'}, {'city': 'kottayam'}]}, {'state': 'mumbai', 'my_haps': [{'haps': 'hap2'}], 'cities': [{'city': 'Akola'}, {'city': 'Jalna'}, {'city': 'Latur'}, {'city': 'Washim'}]}]}
{'aggregate': [{'state': 'tamil nadu', 'my_haps': [{'haps': 'hap1'}, {'haps': 'hap0'}], 'cities': [{'city': 'madurai'}, {'city': 'chennai'}]}, {'state': 'kerala', 'my_haps': [{'haps': 'hap1'}], 'cities': [{'city': 'palakad'}, {'city': 'guruvayor'}]}, {'state': 'mumbai', 'my_haps': [{'haps': 'hap2'}], 'cities': [{'city': 'Nanded'}]}]}
所需輸出:
{'aggregate': [{'state': 'tamil nadu', 'my_haps': [{'haps': 'hap0'},{'haps': 'hap1'}], 'cities': [{'city': 'tuticorin'}, {'city': 'tenkasi'}, {'city': 'nagerkoil'},{'city': 'madurai'}, {'city': 'chennai'}]}, {'state': 'kerala', 'my_haps': [{'haps': 'hap1'}], 'cities': [{'city': 'idukki'}, {'city': 'kolikodu'}, {'city': 'kottayam'},{'city': 'palakad'}, {'city': 'guruvayor'}]}, {'state': 'mumbai', 'my_haps': [{'haps': 'hap2'}], 'cities': [{'city': 'Akola'}, {'city': 'Jalna'}, {'city': 'Latur'}, {'city': 'Washim'},{'city': 'Nanded'}]}]}
功能:
def group_dicts_from_list(lst, group_by, merge_rules, result):
if not lst or not isinstance(lst, list) or not group_by or not merge_rules or \
not isinstance(merge_rules, dict) or not isinstance(result, dict):
return
if "aggregate" not in result:
result["aggregate"] = []
for item in lst:
if isinstance(item, dict):
if group_by in item:
for res in result["aggregate"]:
if res[group_by] == item[group_by]:
tmp = res
break
else:
tmp = {group_by: item[group_by]}
result["aggregate"].append(tmp)
for src, dst in merge_rules.items():
if src in item:
if dst not in tmp:
tmp[dst] = []
src_tmp = {src: item[src]}
if src_tmp not in tmp[dst]:
tmp[dst].append(src_tmp)
elif isinstance(item, list):
group_dicts_from_list(item, group_by, merge_rules, result) # !!! recursion !!!
用法:
field_to_be_check = "state"
my_merge_rules = {
"city": "cities",
"haps": "my_haps",
}
data = [
[{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'tenkasi'},
{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad4', 'city': 'nagerkoil'},
{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'tuticorin'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kolikodu'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kottayam'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'idukki'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Akola'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Washim'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Jalna'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Latur'}],
[{'haps': 'hap1', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'madurai'},
{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'chennai'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'palakad'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'guruvayor'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Nanded'},
]
]
result = {}
group_dicts_from_list(data, field_to_be_check, my_merge_rules, result)
print(result)
輸出:
{'aggregate': [{'state': 'tamil nadu', 'cities': [{'city': 'tenkasi'}, {'city': 'nagerkoil'}, {'city': 'tuticorin'}, {'city': 'madurai'}, {'city': 'chennai'}], 'my_haps': [{'haps': 'hap0'}, {'haps': 'hap1'}]}, {'state': 'kerala', 'cities': [{'city': 'kolikodu'}, {'city': 'kottayam'}, {'city': 'idukki'}, {'city': 'palakad'}, {'city': 'guruvayor'}], 'my_haps': [{'haps': 'hap1'}]}, {'state': 'mumbai', 'cities': [{'city': 'Akola'}, {'city': 'Washim'}, {'city': 'Jalna'}, {'city': 'Latur'}, {'city': 'Nanded'}], 'my_haps': [{'haps': 'hap2'}]}]}
注釋:
我換了
merger = ["city", "haps"]
merge_name = ["cities", "my_haps"]
和
my_merge_rules = {
"city": "cities",
"haps": "my_haps",
}
因為我發現它更可靠(它可以防止列表長度不同的情況) 。
您的代碼和接受的答案對我來說似乎過於復雜(也許我不理解要求)。 為什么不建立一個字典: field_to_be_check -> merger_name -> merger values
,然后將其轉換為所需的格式?
字典的創建:
import itertools
data = [[{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'tenkasi'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad4', 'city': 'nagerkoil'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'tuticorin'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kolikodu'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kottayam'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'idukki'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Akola'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Washim'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Jalna'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Latur'}],
[{'haps': 'hap1', 'state': 'tamil nadu','ads': 'ad1', 'city': 'madurai'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'chennai'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'palakad'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'guruvayor'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Nanded'},]]
field_to_be_check = "state"
name_by_merger = {"city": "cities", "haps": "my_haps"}
d = {}
for row in itertools.chain(*data):
inner_d = d.setdefault(row[field_to_be_check], {})
for m, n in name_by_merger.items():
inner_d.setdefault(n, set()).add(row[m])
d
內容:
{'tamil nadu': {'cities': {'chennai', 'nagerkoil', 'tuticorin', 'madurai', 'tenkasi'}, 'my_haps': {'hap0', 'hap1'}}, 'kerala': {'cities': {'kolikodu', 'palakad', 'idukki', 'guruvayor', 'kottayam'}, 'my_haps': {'hap1'}}, 'mumbai': {'cities': {'Nanded', 'Latur', 'Jalna', 'Akola', 'Washim'}, 'my_haps': {'hap2'}}}
函數itertools.chain
合並了兩個列表。 如果需要, setdefault
方法會在字典中創建一個新條目。
轉換為所需格式只是一個(丑陋的)字典理解:
{'aggregate': [{field_to_be_check: k, **{n: [{m: x} for x in v[n]] for m, n in name_by_merger.items()}} for k, v in d.items()]}
輸出:
{'aggregate': [{'state': 'tamil nadu', 'cities': [{'city': 'chennai'}, {'city': 'nagerkoil'}, {'city': 'tuticorin'}, {'city': 'madurai'}, {'city': 'tenkasi'}], 'my_haps': [{'haps': 'hap0'}, {'haps': 'hap1'}]}, {'state': 'kerala', 'cities': [{'city': 'kolikodu'}, {'city': 'palakad'}, {'city': 'idukki'}, {'city': 'guruvayor'}, {'city': 'kottayam'}], 'my_haps': [{'haps': 'hap1'}]}, {'state': 'mumbai', 'cities': [{'city': 'Nanded'}, {'city': 'Latur'}, {'city': 'Jalna'}, {'city': 'Akola'}, {'city': 'Washim'}], 'my_haps': [{'haps': 'hap2'}]}]}
當然,您可以一次性構建所需的輸出,但是由於格式繁瑣,我認為首先創建一個 nive 字典然后遵守這種格式會更清晰。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.