简体   繁体   中英

python: filter list of dict based on another list of dict

I have two list of dicts. Lets call the first one as dd :

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '3', 'channels': '35'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]

The key features in the dict element are id ie 11,22,33 and the priority ie 1,2,3..

Another dict is a filter dict :

filter_dict = [{'11': 2}, {'33': 2}]

This filter dict has a key-value where key defines the id in the first dict dd and value means the number of elements to be selected from the first dict dd ie {'11': 2} means select top 2 elements from the dd based on priority . And, select only 1 top-priority element from dd if filter_dct does not have the corresponding id in it.

What I have so far is a way to get only 1 top priority element from the dd based on priority :

tmp = {}
for elem in dd:
    tmp.setdefault([*elem][0], []).append(elem)
out = [subl[0] for subl in tmp.values()]
print(out)

Desired output that I am trying to achieve:

res = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
          {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
          {'22': {'xx': '300', 'priority': '1', 'channels': '40'}}, # one elem because no record in the filter_dict
          {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
          {'33': {'xx': '500', 'priority': '2', 'channels': '30'}}]

EDIT:

The proposed solutions fail when the elements are more than 4.

ie for the input:

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '4', 'channels': '35'}},
      {'11': {'xx': '260', 'priority': '9', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '11', 'channels': '35'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]

the output is:

[{'11': {'xx': '259', 'priority': '1', 'channels': '55'}}, {'11': {'xx': '270', 'priority': '11', 'channels': '35'}}, {'11': {'xx': '260', 'priority': '2', 'channels': '35'}}, {'22': {'xx': '300', 'priority': '1', 'channels': '40'}}, {'33': {'xx': '400', 'priority': '1', 'channels': '40'}}]

Which is incorrect.

this worked for me:

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '3', 'channels': '35'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]

filter_dict = [{'11': 2}, {'33': 2}]

res = []

for dd_one_dic in dd:
    for dir_id, priority in dd_one_dic.items():
        if priority['priority'] == '1':
            res.append(dd_one_dic)
        else:
            for filter_one_dic in filter_dict:
                if dir_id == list(filter_one_dic.keys())[0]: 
                    if int(priority['priority']) <= filter_one_dic[list(filter_one_dic.keys())[0]]:
                        res.append(dd_one_dic)

print(*res, sep = '\n')

Output

{'11': {'xx': '259', 'priority': '1', 'channels': '55'}}
{'11': {'xx': '260', 'priority': '2', 'channels': '35'}}
{'22': {'xx': '300', 'priority': '1', 'channels': '40'}}
{'33': {'xx': '400', 'priority': '1', 'channels': '40'}}
{'33': {'xx': '500', 'priority': '2', 'channels': '30'}}

EDIT:

Here is a more general approach that also change the format

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '4', 'channels': '35'}},
      {'11': {'xx': '260', 'priority': '9', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '11', 'channels': '35'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]

filter_dict = [{'11': 2}, {'33': 2}]

res = []
middle_dict = {}
position = 0
first = True

def sortSecond(val): 
    return val[0]

for dd_one_dict in dd:
    for dict_id, sub_dict in dd_one_dict.items():
        if middle_dict.get(dict_id,False):
            middle_dict[dict_id].append((int(sub_dict['priority']),position))
        else:
            middle_dict[dict_id] = [(int(sub_dict['priority']),position)]
        position += 1
    middle_dict[dict_id].sort(key = sortSecond)

external_counter = 0 
for one_key in middle_dict.keys():
    internal_counter = 2
    for items in middle_dict[one_key]:
        if first:
            res.append({one_key:[dd[items[1]][one_key]]})
            first = False
        else:
            for filter_one_dic in filter_dict:
                if one_key == list(filter_one_dic.keys())[0]:
                    if internal_counter <= filter_one_dic[list(filter_one_dic.keys())[0]]:
                        res[external_counter][one_key].append(dd[items[1]][one_key])
                        internal_counter += 1
                    else:
                        break
    first = True
    external_counter += 1

print(res) 

Output:

[{'11': [{'xx': '259', 'priority': '1', 'channels': '55'}, {'xx': '260', 'priority': '2', 'channels': '35'}]}, {'22': [{'xx': '300', 'priority': '1', 'channels': '40'}]}, {'33': [{'xx': '400', 'priority': '1', 'channels': '40'}, {'xx': '500', 'priority': '2', 'channels': '30'}]}]

Not very elegant, but this gets the job done, using intermediate dictionary.

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '3', 'channels': '35'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]

filter_dict = [{'11': 2}, {'33': 2}]

tmp = {k: d[k] for d in filter_dict for k in d}  # {'11': 2, '33': 2}
out = []
for d in dd:
    id = [*d][0]
    if id not in tmp:
        out.append(d)
        tmp[id] = 0
    elif tmp[id]:
        out.append(d)
        tmp[id] -= 1
    else:
        del tmp[id]

print(out)
[{'11': {'xx': '259', 'priority': '1', 'channels': '55'}}, 
 {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
 {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
 {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
 {'33': {'xx': '500', 'priority': '2', 'channels': '30'}}]

You can use defaultdict to help you with that

from collections import defaultdict

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'11': {'xx': '270', 'priority': '3', 'channels': '35'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]

res_each = defaultdict(list)
filter_dict = [{'11': 2}, {'33': 2}]
filter_map = {list(i.keys())[0]: list(i.values())[0] for i in filter_dict}
for i in dd:
    res_each[list(i.keys())[0]].append(i)
res = []
for i in [sorted(v, key=lambda x: int(list(x.values())[0]['priority']))[:filter_map.get(k, 1)] for k, v in res_each.items()]:
    res.extend(i)
print(res)

Output

[{'11': {'xx': '259', 'priority': '1', 'channels': '55'}}, 
{'11':{'xx':'260','priority': '2', 'channels': '35'}}, 
{'22': {'xx': '300', 'priority': '1', 'channels':'40'}}, 
{'33': {'xx': '400', 'priority': '1', 'channels': '40'}}, 
{'33': {'xx': '500', 'priority': '2', 'channels': '30'}}]

I assume that dd not sorted at all. I hope it helps.

dd = [{'11': {'xx': '259', 'priority': '1', 'channels': '55'}},
      {'11': {'xx': '270', 'priority': '3', 'channels': '35'}},
      {'22': {'xx': '303', 'priority': '2', 'channels': '30'}},
      {'33': {'xx': '400', 'priority': '1', 'channels': '40'}},
      {'11': {'xx': '260', 'priority': '2', 'channels': '35'}},
      {'33': {'xx': '500', 'priority': '2', 'channels': '30'}},
      {'22': {'xx': '300', 'priority': '1', 'channels': '40'}},
      {'33': {'xx': '606', 'priority': '3', 'channels': '30'}}]
# firstly sorted by key, then sorted by priority
dd = sorted(dd, key=lambda e: (list(e.keys())[0], int(list(e.values())[0]['priority'])))

filter_dict = [{'11': 2}, {'33': 2}]
filter_dict = {k:v for elem in filter_dict for k,v in elem.items()}
res = []
for i in dd:
    key = list(i.keys())[0]
    if key in filter_dict:
        if filter_dict[key] > 0:
            res.append(i)
            filter_dict[key] -= 1
    else:
        res.append(i)
        filter_dict[key] = 0

for i in res:
    print(i)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM