[英]Multi Nested Json to Flat Json in Python
我正在尝试通过使用熊猫中的内置 function 和堆栈上可用的一些解决方案,将多嵌套 JSON 转换为平面,并且能够正常化直到第一级或扁平化创建不需要的结果或修改它们没有给出期望output,任何见解,欢迎。
样品 JSON:
{
"Records": [
{
"Name": "Student1",
"Result": "Pass",
"Marks": [
{
"Sub1": "50",
"Sub2": "40",
"YOP": [
{
"prim": "2010",
"sch": "abc"
},
{
"prim": "2010",
"sch": "abc"
}
]
}
]
},
{
"Name": "Stu2",
"Result": "Pass",
"Marks": [
{
"Sub1": "33",
"Sub2": "33",
"YOP": [
{
"prim": "2010",
"sch": "def"
},
{
"high": "2010",
"sch": "abc"
}
]
}
]
}
]
}
当前代码
from itertools import chain, starmap
import json
from itertools import islice
from pandas.io.json import json_normalize
from collections import MutableMapping
crumbs = True
def flatten(dictionary, parent_key=False, separator='.'):
"""
Turn a nested dictionary into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
if crumbs: print('checking:',key)
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, MutableMapping):
if crumbs: print(new_key,': dict found')
if not value.items():
if crumbs: print('Adding key-value pair:',new_key,None)
items.append((new_key,None))
else:
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
if crumbs: print(new_key,': list found')
if len(value):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
if crumbs: print('Adding key-value pair:',new_key,None)
items.append((new_key,None))
else:
if crumbs: print('Adding key-value pair:',new_key,value)
items.append((new_key, value))
return dict(items)
def main():
with open("aaa.json", "r") as f:
data = json.loads(f.read())
print(type(data))
flat = flatten(data)
print(flat)
if __name__ == '__main__':
main()
{
"Records.0.Name": "Student1",
"Records.0.Result": "Pass",
"Records.0.Marks.0.Sub1": "50",
"Records.0.Marks.0.Sub2": "40",
"Records.0.Marks.0.YOP.0.prim": "2010",
"Records.0.Marks.0.YOP.0.sch": "abc",
"Records.0.Marks.0.YOP.1.high": "2012",
"Records.0.Marks.0.YOP.1.sch": "abc",
"Records.1.Name": "Stu2",
"Records.1.Result": "Pass",
"Records.1.Marks.0.Sub1": "33",
"Records.1.Marks.0.Sub2": "33",
"Records.1.Marks.0.YOP.0.prim": "210",
"Records.1.Marks.0.YOP.0.sch": "def",
"Records.1.Marks.0.YOP.1.high": "999",
"Records.1.Marks.0.YOP.1.sch": "abc"
}
使用此代码,知道如何删除数字并将其分开
结束预期
{
"Records.Name": "Student1",
"Records.Result": "Pass",
"Records.Marks.Sub1": "50",
"Records.Marks.Sub2": "40",
"Records.Marks.YOP.prim": "2010",
"Records.Marks.YOP.sch": "abc",
"Records.Marks.YOP.high": "2012",
"Records.Marks.YOP.sch": "abc",
},
{
"Records.Name": "Stu2",
"Records.Result": "Pass",
"Records.Marks.Sub1": "33",
"Records.Marks.Sub2": "33",
"Records.Marks.YOP.prim": "210",
"Records.Marks.YOP.sch": "def",
"Records.Marks.YOP.high": "999",
"Records.Marks.YOP.sch": "abc"
}
from flatten_json import flatten
records = flatten(json[0])
输入:
json = [{
"Records": [
{
"Name": "Student1",
"Result": "Pass",
"Marks": [
{
"Sub1": "50",
"Sub2": "40",
"YOP": [
{
"prim": "2010",
"sch": "abc"
},
{
"prim": "2010",
"sch": "abc"
}
]
}
]
},
{
"Name": "Stu2",
"Result": "Pass",
"Marks": [
{
"Sub1": "33",
"Sub2": "33",
"YOP": [
{
"prim": "2010",
"sch": "def"
},
{
"high": "2010",
"sch": "abc"
}
]
}
]
}
]
}]
Output:
{'Records_0_Name': 'Student1',
'Records_0_Result': 'Pass',
'Records_0_Marks_0_Sub1': '50',
'Records_0_Marks_0_Sub2': '40',
'Records_0_Marks_0_YOP_0_prim': '2010',
'Records_0_Marks_0_YOP_0_sch': 'abc',
'Records_0_Marks_0_YOP_1_prim': '2010',
'Records_0_Marks_0_YOP_1_sch': 'abc',
'Records_1_Name': 'Stu2',
'Records_1_Result': 'Pass',
'Records_1_Marks_0_Sub1': '33',
'Records_1_Marks_0_Sub2': '33',
'Records_1_Marks_0_YOP_0_prim': '2010',
'Records_1_Marks_0_YOP_0_sch': 'def',
'Records_1_Marks_0_YOP_1_high': '2010',
'Records_1_Marks_0_YOP_1_sch': 'abc'}
更新:
您正在寻找的结果:
records = [flatten(record, “.”) for record in json[0]['Records']]
Output:
[{'Name': 'Student1',
'Result': 'Pass',
'Marks_0_Sub1': '50',
'Marks_0_Sub2': '40',
'Marks_0_YOP_0_prim': '2010',
'Marks_0_YOP_0_sch': 'abc',
'Marks_0_YOP_1_prim': '2010',
'Marks_0_YOP_1_sch': 'abc'},
{'Name': 'Stu2',
'Result': 'Pass',
'Marks_0_Sub1': '33',
'Marks_0_Sub2': '33',
'Marks_0_YOP_0_prim': '2010',
'Marks_0_YOP_0_sch': 'def',
'Marks_0_YOP_1_high': '2010',
'Marks_0_YOP_1_sch': 'abc'}]
import pandas as pd
tree= {
"Records": [
{
"Name": "Student1",
"Result": "Pass",
"Marks": [
{
"Sub1": "50",
"Sub2": "40",
"YOP": [
{
"prim": "2010",
"sch": "abc"
},
{
"prim": "2010",
"sch": "abc"
}
]
}
]
},
{
"Name": "Stu2",
"Result": "Pass",
"Marks": [
{
"Sub1": "33",
"Sub2": "33",
"YOP": [
{
"prim": "2010",
"sch": "def"
},
{
"high": "2010",
"sch": "abc"
}
]
}
]
}
]
}
import pandas as pd
def traverse_parser_dfs(master_tree):
flatten_tree_node = []
def _process_leaves(tree:dict,prefix:str = "node", tree_node:dict = dict(), update:bool = True):
is_nested = False
if isinstance(tree,dict):
for k in tree.keys():
if type(tree[k]) == str:
colName = prefix + "_" + k
tree_node[colName] = tree[k]
elif type(tree[k]) == dict:
prefix += "_" + k
leave = tree[k]
_process_leaves(leave,prefix = prefix, tree_node = tree_node, update = False)
for k in tree.keys():
if type(tree[k]) == list:
is_nested = True
prefix += "_" + k
for leave in tree[k]:
_process_leaves(leave,prefix = prefix, tree_node = tree_node.copy())
if not is_nested and update:
flatten_tree_node.append(tree_node)
_process_leaves(master_tree)
df = pd.DataFrame(flatten_tree_node)
df.columns = df.columns.str.replace("@", "_")
df.columns = df.columns.str.replace("#", "_")
return df
print(traverse_parser_dfs(tree))
node_Records_Name node_Records_Result node_Records_Marks_Sub1 ... node_Records_Marks_YOP_prim node_Records_Marks_YOP_sch node_Records_Marks_YOP_high
0 Student1 Pass 50 ... 2010 abc NaN
1 Student1 Pass 50 ... 2010 abc NaN
2 Stu2 Pass 33 ... 2010 def NaN
3 Stu2 Pass 33 ... NaN abc 2010
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.