简体   繁体   中英

Aggregate an array of dicts based on value

Revision

data = [[{'type': 'low', 'count': 100}, {'type': 'medium', 'count': 45}, {'type': 'high', 'count': 5}],
[{'type': 'medium', 'count': 45}, {'type': 'high', 'count': 5}],
[{'type': 'low', 'count': 100}, {'type': 'medium', 'count': 45}]]

def unique_type(data):
    result = []
    type_list = []
    for x in data:
      for y in x: # arrays of dict
          if y['type'] not in type_list: #iterate through dicts in array
              type_list.append(y['type']) #if not type in list
              result.append({'type': y['type'], 'count': []}) #add type in list
    return result, type_list

result, type_list = unique_type(data)
empty_results = result
for arr in data:
    
    for num in range(0, len(type_list)):
        try:
            number = 0
            while number < len(type_list):
                print(f"{arr[num]['type']} == {result[number]['type']}")
                if arr[num]['type'] == result[number]['type']:
                    result[number]['count'].append(arr[num]['count'])
                    number += 1
                    break
                else:
                    number += 1
        except IndexError:
    
            # TODO need some way for evaluating if a key is missing from arr compared to result**
    
            print(f"Index Error{result[number]['type']}")
                
            result[number]['count'].append(float('nan'))

Results I get

[{'type': 'low', 'count': [100, nan, 100, nan]}, {'type': 'medium', 'count': [45, 45, 45]}, {'type': 'high', 'count': [5, 5]}]

Results desired

[{'type': 'low', 'count': [100, nan, 100]}, {'type': 'medium', 'count': [45, 45, 45]}, {'type': 'high', 'count': [5, 5, nan]}]

Unique_type() allows me to iterate through my array of dictionaries and create an array of unique keys

results allows me to build my desired array of dictionaries

When iterating over the array of dictionaries, I want to ensure that there is a value whether or not that key exits in results.

Original Post

Hey folks im building out a model thats been a little tricky for me

    a = {'type': 'Low', 'count': 184} 
    b = {'type': 'Low', 'count': 186} 
    c = {'type': 'Low', 'count': 97}

 

    new = [a,b,c]

### how do i accomplish below?

    result = {'type': low, 'count': [184, 186, 97]}

Here is a more generic solution, assuming you can have several types:

from itertools import groupby

[{'type': k, 'count': [i['count'] for i in g]}
 for k,g in groupby(sorted(new, key=lambda d: d['type']),
                    key=lambda d: d['type'])
]

input:

[{'type': 'Low', 'count': 184},
 {'type': 'Low', 'count': 186},
 {'type': 'Low', 'count': 97},
 {'type': 'High', 'count': 1000}]

output:

[{'type': 'High', 'count': [1000]},
 {'type': 'Low', 'count': [184, 186, 97]}]

as long as you have just 'low' you can use this, otherwise you would need to change it around to be more modular

a = {'type': 'Low', 'count': 184} 
b = {'type': 'Low', 'count': 186} 
c = {'type': 'Low', 'count': 97}
new = [a,b,c]
result = {'type':'Low', 'count':[]}


for i in new:
    if i['type'] == 'Low':
        result['count'].append(i['count'])
print (result)
data = [[{'type': 'low', 'count': 100}, {'type': 'medium', 'count': 45}, {'type': 'high', 'count': 5}],
        [{'type': 'medium', 'count': 45}, {'type': 'high', 'count': 5}],
        [{'type': 'low', 'count': 100}, {'type': 'medium', 'count': 45}]]


def get_all_types(dicts):
    types = set()
    for i in dicts:
        if i['type'] not in types:
            types.add(i['type'])
    return types


def multiple(dicts, types):
    finaldicts = []
    for i in types:
        dic = {'type': i, 'count': []}
        for j in dicts:
            if j['type'] == i:
                if type(j['count']) == list:
                    if  len(j['count'])>0:
                        dic['count'].extend(j['count'])
                    else:
                        dic['count'].append("nan")
                else:
                    dic['count'].append(j['count'])
        finaldicts.append(dic)
    return finaldicts


new_dics_with_types_aggregate = []
types = set()
for dicts in data:
    types = types.union(get_all_types(dicts))

for dicts in data:
    new_dics_with_types_aggregate.extend(multiple(dicts,types))
print(types)
print(new_dics_with_types_aggregate)

print(multiple(new_dics_with_types_aggregate,types))

Output

{'low', 'medium', 'high'}
[{'type': 'low', 'count': [100]}, {'type': 'medium', 'count': [45]}, {'type': 'high', 'count': [5]}, {'type': 'low', 'count': []}, {'type': 'medium', 'count': [45]}, {'type': 'high', 'count': [5]}, {'type': 'low', 'count': [100]}, {'type': 'medium', 'count': [45]}, {'type': 'high', 'count': []}]
[{'type': 'low', 'count': [100, 'nan', 100]}, {'type': 'medium', 'count': [45, 45, 45]}, {'type': 'high', 'count': [5, 5, 'nan']}]
a = {'type': 'Low', 'count': 184} 
b = {'type': 'Low', 'count': 186} 
c = {'type': 'Low', 'count': 97}
d = {'type': 'High', 'count': 102}
e = {'type': 'High', 'count': 142}
new = [a,b,c,d,e]

result = []
type_list = []
for x in new:
    if x['type'] not in type_list:
        type_list.append(x['type'])
        result.append({'type': x['type'], 'count': [x['count']]})
    else:
        result[type_list.index(x['type'])]['count'].append(x['count'])
print(result)

output:

[
    {'type': 'Low', 'count': [184, 186, 97]},
    {'type': 'High', 'count': [102, 142]}
]

Updated answer:

data = [[{'type': 'low', 'count': 100}, {'type': 'medium', 'count': 45}, {'type': 'high', 'count': 5}], [{'type': 'medium', 'count': 45}, {'type': 'high', 'count': 5}], [{'type': 'low', 'count': 100}, {'type': 'medium', 'count': 45}]] 

type_list = []
[type_list.append(item["type"]) for sublist in data for item in sublist if item["type"] not in type_list]
print(type_list)

result = [] 
for type in type_list:
    result.append({'type': type, 'count': []}) 
    for sublist in data:
        for type in type_list:
            if type not in [item['type'] for item in sublist]:
                result[type_list.index(type)]['count'].append(None)
            else:
                x = [item['count'] for item in sublist if item['type'] == type][0]
                result[type_list.index(type)]['count'].append(x)
print(result)

Output:

['low', 'medium', 'high']
[
    {'type': 'low', 'count': [100, None, 100]}, 
    {'type': 'medium', 'count': [45, 45, 45]}, 
    {'type': 'high', 'count': [5, 5, None]}
]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM