简体   繁体   English

Python 中的多嵌套 Json 到平面 Json

[英]Multi Nested Json to Flat Json in Python

I am trying to convert multi nest JSON to flat, by using the built-in function in panda's, and a few solutions available on stack, was able to normalize up till the first level or the flattening creating unwanted result or modifying them not giving desired output, any insights, welcomed.我正在尝试通过使用熊猫中的内置 function 和堆栈上可用的一些解决方案,将多嵌套 JSON 转换为平面,并且能够正常化直到第一级或扁平化创建不需要的结果或修改它们没有给出期望output,任何见解,欢迎。

Sample JSON:样品 JSON:


{
  "Records": [
    {
      "Name": "Student1",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "50",
          "Sub2": "40",
          "YOP": [
            {
              "prim": "2010",
              "sch": "abc"
            },
            {
              "prim": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    },
    {
      "Name": "Stu2",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "33",
          "Sub2": "33",
          "YOP": [
            {
              "prim": "2010",
              "sch": "def"
            },
            {
              "high": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    }
  ]
}

Current Code当前代码

from itertools import chain, starmap
import json
from itertools import islice
from pandas.io.json import json_normalize



from collections import MutableMapping
crumbs = True

def flatten(dictionary, parent_key=False, separator='.'):
    """
    Turn a nested dictionary into a flattened dictionary
    :param dictionary: The dictionary to flatten
    :param parent_key: The string to prepend to dictionary's keys
    :param separator: The string used to separate flattened keys
    :return: A flattened dictionary
    """

    items = []
    for key, value in dictionary.items():
        if crumbs: print('checking:',key)
        new_key = str(parent_key) + separator + key if parent_key else key
        if isinstance(value, MutableMapping):
            if crumbs: print(new_key,': dict found')
            if not value.items():
                if crumbs: print('Adding key-value pair:',new_key,None)
                items.append((new_key,None))
            else:
                items.extend(flatten(value, new_key, separator).items())
        elif isinstance(value, list):
            if crumbs: print(new_key,': list found')
            if len(value):
                for k, v in enumerate(value):
                    items.extend(flatten({str(k): v}, new_key).items())
            else:
                if crumbs: print('Adding key-value pair:',new_key,None)
                items.append((new_key,None))
        else:
            if crumbs: print('Adding key-value pair:',new_key,value)
            items.append((new_key, value))
    return dict(items)

    
def main():
  with open("aaa.json", "r") as f:
    data = json.loads(f.read())
  print(type(data))
  flat = flatten(data)
  print(flat)


if __name__ == '__main__':
    main()

Output Output

{
  "Records.0.Name": "Student1",
  "Records.0.Result": "Pass",
  "Records.0.Marks.0.Sub1": "50",
  "Records.0.Marks.0.Sub2": "40",
  "Records.0.Marks.0.YOP.0.prim": "2010",
  "Records.0.Marks.0.YOP.0.sch": "abc",
  "Records.0.Marks.0.YOP.1.high": "2012",
  "Records.0.Marks.0.YOP.1.sch": "abc",
  "Records.1.Name": "Stu2",
  "Records.1.Result": "Pass",
  "Records.1.Marks.0.Sub1": "33",
  "Records.1.Marks.0.Sub2": "33",
  "Records.1.Marks.0.YOP.0.prim": "210",
  "Records.1.Marks.0.YOP.0.sch": "def",
  "Records.1.Marks.0.YOP.1.high": "999",
  "Records.1.Marks.0.YOP.1.sch": "abc"
}

With this code, any idea how can to remove numbers, and separate it使用此代码,知道如何删除数字并将其分开

End Expectation结束预期

{
  "Records.Name": "Student1",
  "Records.Result": "Pass",
  "Records.Marks.Sub1": "50",
  "Records.Marks.Sub2": "40",
  "Records.Marks.YOP.prim": "2010",
  "Records.Marks.YOP.sch": "abc",
  "Records.Marks.YOP.high": "2012",
  "Records.Marks.YOP.sch": "abc",
},

{
  "Records.Name": "Stu2",
  "Records.Result": "Pass",
  "Records.Marks.Sub1": "33",
  "Records.Marks.Sub2": "33",
  "Records.Marks.YOP.prim": "210",
  "Records.Marks.YOP.sch": "def",
  "Records.Marks.YOP.high": "999",
  "Records.Marks.YOP.sch": "abc"
}

from flatten_json import flatten

records = flatten(json[0])

Input:输入:

json = [{
  "Records": [
    {
      "Name": "Student1",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "50",
          "Sub2": "40",
          "YOP": [
            {
              "prim": "2010",
              "sch": "abc"
            },
            {
              "prim": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    },
    {
      "Name": "Stu2",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "33",
          "Sub2": "33",
          "YOP": [
            {
              "prim": "2010",
              "sch": "def"
            },
            {
              "high": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    }
  ]
}]

Output: Output:

{'Records_0_Name': 'Student1',
 'Records_0_Result': 'Pass',
 'Records_0_Marks_0_Sub1': '50',
 'Records_0_Marks_0_Sub2': '40',
 'Records_0_Marks_0_YOP_0_prim': '2010',
 'Records_0_Marks_0_YOP_0_sch': 'abc',
 'Records_0_Marks_0_YOP_1_prim': '2010',
 'Records_0_Marks_0_YOP_1_sch': 'abc',
 'Records_1_Name': 'Stu2',
 'Records_1_Result': 'Pass',
 'Records_1_Marks_0_Sub1': '33',
 'Records_1_Marks_0_Sub2': '33',
 'Records_1_Marks_0_YOP_0_prim': '2010',
 'Records_1_Marks_0_YOP_0_sch': 'def',
 'Records_1_Marks_0_YOP_1_high': '2010',
 'Records_1_Marks_0_YOP_1_sch': 'abc'}

Update:更新:

The result you were looking for:您正在寻找的结果:

records = [flatten(record, “.”) for record in json[0]['Records']]

Output: Output:

[{'Name': 'Student1',
  'Result': 'Pass',
  'Marks_0_Sub1': '50',
  'Marks_0_Sub2': '40',
  'Marks_0_YOP_0_prim': '2010',
  'Marks_0_YOP_0_sch': 'abc',
  'Marks_0_YOP_1_prim': '2010',
  'Marks_0_YOP_1_sch': 'abc'},
 {'Name': 'Stu2',
  'Result': 'Pass',
  'Marks_0_Sub1': '33',
  'Marks_0_Sub2': '33',
  'Marks_0_YOP_0_prim': '2010',
  'Marks_0_YOP_0_sch': 'def',
  'Marks_0_YOP_1_high': '2010',
  'Marks_0_YOP_1_sch': 'abc'}]
import pandas as pd
tree= {
  "Records": [
    {
      "Name": "Student1",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "50",
          "Sub2": "40",
          "YOP": [
            {
              "prim": "2010",
              "sch": "abc"
            },
            {
              "prim": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    },
    {
      "Name": "Stu2",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "33",
          "Sub2": "33",
          "YOP": [
            {
              "prim": "2010",
              "sch": "def"
            },
            {
              "high": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    }
  ]
}


import pandas as pd


def traverse_parser_dfs(master_tree):
  flatten_tree_node = []
  def _process_leaves(tree:dict,prefix:str = "node", tree_node:dict = dict(), update:bool = True):
      is_nested = False
      if isinstance(tree,dict):
        for k in tree.keys():
            if type(tree[k]) == str:
                colName = prefix + "_" + k
                tree_node[colName] = tree[k]
            elif type(tree[k]) == dict:
                prefix += "_" + k
                leave = tree[k]
                _process_leaves(leave,prefix = prefix, tree_node = tree_node, update = False)
        for k in tree.keys():
            if type(tree[k]) == list:
                is_nested = True
                prefix += "_" + k
                for leave in tree[k]:
                    _process_leaves(leave,prefix = prefix, tree_node = tree_node.copy())
        if not is_nested and update:
            flatten_tree_node.append(tree_node)
  _process_leaves(master_tree)
  df = pd.DataFrame(flatten_tree_node)
  df.columns = df.columns.str.replace("@", "_")
  df.columns = df.columns.str.replace("#", "_")
  return df


print(traverse_parser_dfs(tree))

  node_Records_Name node_Records_Result node_Records_Marks_Sub1  ... node_Records_Marks_YOP_prim node_Records_Marks_YOP_sch node_Records_Marks_YOP_high
0          Student1                Pass                      50  ...                        2010                        abc                         NaN
1          Student1                Pass                      50  ...                        2010                        abc                         NaN
2              Stu2                Pass                      33  ...                        2010                        def                         NaN
3              Stu2                Pass                      33  ...                         NaN                        abc                        2010

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM