I am trying to convert multi nest JSON to flat, by using the built-in function in panda's, and a few solutions available on stack, was able to normalize up till the first level or the flattening creating unwanted result or modifying them not giving desired output, any insights, welcomed.
Sample JSON:
{
"Records": [
{
"Name": "Student1",
"Result": "Pass",
"Marks": [
{
"Sub1": "50",
"Sub2": "40",
"YOP": [
{
"prim": "2010",
"sch": "abc"
},
{
"prim": "2010",
"sch": "abc"
}
]
}
]
},
{
"Name": "Stu2",
"Result": "Pass",
"Marks": [
{
"Sub1": "33",
"Sub2": "33",
"YOP": [
{
"prim": "2010",
"sch": "def"
},
{
"high": "2010",
"sch": "abc"
}
]
}
]
}
]
}
Current Code
from itertools import chain, starmap
import json
from itertools import islice
from pandas.io.json import json_normalize
from collections import MutableMapping
crumbs = True
def flatten(dictionary, parent_key=False, separator='.'):
"""
Turn a nested dictionary into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
if crumbs: print('checking:',key)
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, MutableMapping):
if crumbs: print(new_key,': dict found')
if not value.items():
if crumbs: print('Adding key-value pair:',new_key,None)
items.append((new_key,None))
else:
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
if crumbs: print(new_key,': list found')
if len(value):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
if crumbs: print('Adding key-value pair:',new_key,None)
items.append((new_key,None))
else:
if crumbs: print('Adding key-value pair:',new_key,value)
items.append((new_key, value))
return dict(items)
def main():
with open("aaa.json", "r") as f:
data = json.loads(f.read())
print(type(data))
flat = flatten(data)
print(flat)
if __name__ == '__main__':
main()
{
"Records.0.Name": "Student1",
"Records.0.Result": "Pass",
"Records.0.Marks.0.Sub1": "50",
"Records.0.Marks.0.Sub2": "40",
"Records.0.Marks.0.YOP.0.prim": "2010",
"Records.0.Marks.0.YOP.0.sch": "abc",
"Records.0.Marks.0.YOP.1.high": "2012",
"Records.0.Marks.0.YOP.1.sch": "abc",
"Records.1.Name": "Stu2",
"Records.1.Result": "Pass",
"Records.1.Marks.0.Sub1": "33",
"Records.1.Marks.0.Sub2": "33",
"Records.1.Marks.0.YOP.0.prim": "210",
"Records.1.Marks.0.YOP.0.sch": "def",
"Records.1.Marks.0.YOP.1.high": "999",
"Records.1.Marks.0.YOP.1.sch": "abc"
}
With this code, any idea how can to remove numbers, and separate it
End Expectation
{
"Records.Name": "Student1",
"Records.Result": "Pass",
"Records.Marks.Sub1": "50",
"Records.Marks.Sub2": "40",
"Records.Marks.YOP.prim": "2010",
"Records.Marks.YOP.sch": "abc",
"Records.Marks.YOP.high": "2012",
"Records.Marks.YOP.sch": "abc",
},
{
"Records.Name": "Stu2",
"Records.Result": "Pass",
"Records.Marks.Sub1": "33",
"Records.Marks.Sub2": "33",
"Records.Marks.YOP.prim": "210",
"Records.Marks.YOP.sch": "def",
"Records.Marks.YOP.high": "999",
"Records.Marks.YOP.sch": "abc"
}
from flatten_json import flatten
records = flatten(json[0])
Input:
json = [{
"Records": [
{
"Name": "Student1",
"Result": "Pass",
"Marks": [
{
"Sub1": "50",
"Sub2": "40",
"YOP": [
{
"prim": "2010",
"sch": "abc"
},
{
"prim": "2010",
"sch": "abc"
}
]
}
]
},
{
"Name": "Stu2",
"Result": "Pass",
"Marks": [
{
"Sub1": "33",
"Sub2": "33",
"YOP": [
{
"prim": "2010",
"sch": "def"
},
{
"high": "2010",
"sch": "abc"
}
]
}
]
}
]
}]
Output:
{'Records_0_Name': 'Student1',
'Records_0_Result': 'Pass',
'Records_0_Marks_0_Sub1': '50',
'Records_0_Marks_0_Sub2': '40',
'Records_0_Marks_0_YOP_0_prim': '2010',
'Records_0_Marks_0_YOP_0_sch': 'abc',
'Records_0_Marks_0_YOP_1_prim': '2010',
'Records_0_Marks_0_YOP_1_sch': 'abc',
'Records_1_Name': 'Stu2',
'Records_1_Result': 'Pass',
'Records_1_Marks_0_Sub1': '33',
'Records_1_Marks_0_Sub2': '33',
'Records_1_Marks_0_YOP_0_prim': '2010',
'Records_1_Marks_0_YOP_0_sch': 'def',
'Records_1_Marks_0_YOP_1_high': '2010',
'Records_1_Marks_0_YOP_1_sch': 'abc'}
Update:
The result you were looking for:
records = [flatten(record, “.”) for record in json[0]['Records']]
Output:
[{'Name': 'Student1',
'Result': 'Pass',
'Marks_0_Sub1': '50',
'Marks_0_Sub2': '40',
'Marks_0_YOP_0_prim': '2010',
'Marks_0_YOP_0_sch': 'abc',
'Marks_0_YOP_1_prim': '2010',
'Marks_0_YOP_1_sch': 'abc'},
{'Name': 'Stu2',
'Result': 'Pass',
'Marks_0_Sub1': '33',
'Marks_0_Sub2': '33',
'Marks_0_YOP_0_prim': '2010',
'Marks_0_YOP_0_sch': 'def',
'Marks_0_YOP_1_high': '2010',
'Marks_0_YOP_1_sch': 'abc'}]
import pandas as pd
tree= {
"Records": [
{
"Name": "Student1",
"Result": "Pass",
"Marks": [
{
"Sub1": "50",
"Sub2": "40",
"YOP": [
{
"prim": "2010",
"sch": "abc"
},
{
"prim": "2010",
"sch": "abc"
}
]
}
]
},
{
"Name": "Stu2",
"Result": "Pass",
"Marks": [
{
"Sub1": "33",
"Sub2": "33",
"YOP": [
{
"prim": "2010",
"sch": "def"
},
{
"high": "2010",
"sch": "abc"
}
]
}
]
}
]
}
import pandas as pd
def traverse_parser_dfs(master_tree):
flatten_tree_node = []
def _process_leaves(tree:dict,prefix:str = "node", tree_node:dict = dict(), update:bool = True):
is_nested = False
if isinstance(tree,dict):
for k in tree.keys():
if type(tree[k]) == str:
colName = prefix + "_" + k
tree_node[colName] = tree[k]
elif type(tree[k]) == dict:
prefix += "_" + k
leave = tree[k]
_process_leaves(leave,prefix = prefix, tree_node = tree_node, update = False)
for k in tree.keys():
if type(tree[k]) == list:
is_nested = True
prefix += "_" + k
for leave in tree[k]:
_process_leaves(leave,prefix = prefix, tree_node = tree_node.copy())
if not is_nested and update:
flatten_tree_node.append(tree_node)
_process_leaves(master_tree)
df = pd.DataFrame(flatten_tree_node)
df.columns = df.columns.str.replace("@", "_")
df.columns = df.columns.str.replace("#", "_")
return df
print(traverse_parser_dfs(tree))
node_Records_Name node_Records_Result node_Records_Marks_Sub1 ... node_Records_Marks_YOP_prim node_Records_Marks_YOP_sch node_Records_Marks_YOP_high
0 Student1 Pass 50 ... 2010 abc NaN
1 Student1 Pass 50 ... 2010 abc NaN
2 Stu2 Pass 33 ... 2010 def NaN
3 Stu2 Pass 33 ... NaN abc 2010
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.