[英]Construct pandas DataFrame from items in 3 level nested dictionary with to list of values
I have the following dictionary:我有以下字典:
data ={
'Allehelgens gate 4': {
'Direkte el1': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
537,
967
]
},
'Direkte el2': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
100,
89
]
},
'Direkte el3': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
203,
191
]
},
'Direkte el4': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Direkte el5': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
8993,
10113
]
},
'Fjernvarme6': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
10280,
13630
]
},
'Direkte el7': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
1345,
1380
]
},
'Tappevann8': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
9,
15
]
},
'Tappevann9': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
8,
15
]
}
},
'Christian Krohgs gate 32': {
'Fjernvarme1': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
9147,
12761
]
},
'Fjernvarme2': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
8869,
12732
]
},
'Fjernvarme3': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
50,
1724
]
},
'Nærkjøling4': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Nærkjøling5': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Fjernvarme6': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Nærkjøling7': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Fjernvarme8': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Nærkjøling9': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Fjernvarme10': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
2,
1
]
},
'Nærkjøling11': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Fjernvarme12': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
1
]
},
'Nærkjøling13': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Direkte el14': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
8626,
16143
]
},
'Direkte el15': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
18211,
18211
]
},
'Fjernvarme16': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
20600,
35310
]
},
'Direkte el17': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
95,
138
]
},
'Direkte el18': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
358,
357
]
},
'Direkte el19': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
130,
130
]
},
'Direkte el20': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
331,
472
]
},
'Direkte el21': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
309,
355
]
},
'Direkte el22': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
382,
698
]
},
'Direkte el23': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
373,
440
]
},
'Direkte el24': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
173,
183
]
},
'Direkte el25': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
706,
1015
]
},
'Direkte el26': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
80,
141
]
},
'Direkte el27': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
175,
210
]
},
'Direkte el28': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
406,
702
]
},
'Direkte el29': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
251,
364
]
},
'Direkte el30': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
517,
510
]
},
'Direkte el31': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
146,
299
]
},
'Direkte el32': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
659,
857
]
},
'Direkte el33': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
55,
90
]
},
'Direkte el34': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
192,
224
]
},
'Direkte el35': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
976,
1009
]
},
'Tappevann36': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Tappevann37': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
0,
0
]
},
'Direkte el38': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
654,
2436
]
},
'Direkte el39': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
27,
212
]
},
'Direkte el40': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
75,
393
]
},
'Direkte el41': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
82,
323
]
},
'Direkte el42': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
8,
584
]
},
'Direkte el43': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
9,
842
]
},
'Direkte el44': {
'Datetime': [
'2020-12-28T00:00:00.000Z',
'2021-01-04T00:00:00.000Z'
],
'Value': [
695,
905
]
}
}
}
I would like to transform the dictionary to a pandas data frame.我想将字典转换为 pandas 数据框。 I think the best way would like to get something like:
我认为最好的方法是得到类似的东西:
enter image description here在此处输入图像描述
Tried several ways without success, somebody do have a clue to how to solve this?尝试了几种方法都没有成功,有人知道如何解决这个问题吗?
It seems that the reply from Tranbi almost works:似乎 Tranbi 的回复几乎有效:
sugested solution:建议的解决方案:
series = pd.DataFrame(data).unstack()
cols = series.index
s = series.apply(lambda x: dict(zip(x["Datetime"], x["Value"])))
pd.DataFrame(s.values.tolist(), index=cols).T
but when running但是跑步的时候
series = pd.DataFrame(data).unstack()"
the Pandas.Series gets a lot of NaN. Pandas.Series 有很多 NaN。 This cause an erro when running
这会导致运行时出错
s = series.apply(lambda x: dict(zip(x["Datetime"], x["Value"])))
Do you have any clue about how to fix this?你知道如何解决这个问题吗?
just use a list to store the buildings number and use the index to reference the building.只需使用列表来存储建筑物编号并使用索引来引用建筑物。 You can also change the name of the index column to be
Building
.您还可以将索引列的名称更改为
Building
。 in other words you do not need Building 1
, Building 2
, ... , Building n
换句话说你不需要
Building 1
, Building 2
, ... , Building n
import pandas as pd
data = [
{
"Energy consumption": {
"Datetime": ["2020-12-28","2021-01-04"],
"Value": [537,967]},
"Water consumption": {
"Datetime": ["2020-12-28","2021-01-04"],
"Value": [537,967]}
},
{
"Energy consumption": {
"Datetime": ["2020-12-28","2021-01-04"],
"Value": [600,700]},
"Water consumption": {
"Datetime": ["2020-12-28","2021-01-04"],
"Value": [800,500]}
}
]
df = pd.DataFrame.from_dict(data)
print(df)
output: output:
Energy consumption Water consumption
0 {'Datetime': ['2020-12-28', '2021-01-04'], 'Va... {'Datetime': ['2020-12-28', '2021-01-04'], 'Va...
1 {'Datetime': ['2020-12-28', '2021-01-04'], 'Va... {'Datetime': ['2020-12-28', '2021-01-04'], 'Va...
The structure of the nested dictionaries is a little intricate.嵌套字典的结构有点复杂。 You can first get the multi-index columns with unstack then transform the cell values to create a new dataframe:
您可以先使用 unstack 获取多索引列,然后转换单元格值以创建新的 dataframe:
series = pd.DataFrame(data).unstack().dropna()
s = series.apply(lambda x: dict(zip(x["Datetime"], x["Value"])))
pd.DataFrame(s.values.tolist(), index=series.index).T
Edit: adding dropna
following your edit编辑:在您的编辑之后添加
dropna
Output: Output:
Building 1 Building 2
Energy consumption Water consumption Energy consumption Water consumption
2020-12-28 537 537 600 800
2021-01-04 967 967 700 500
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.