简体   繁体   中英

Get elements from nested list inside JSON file using python

I am reading a JSON file and trying to pull below information from each log and store it in Excel sheet.

["name","TOM DOE"]
["value","132"]

My python code is able to pull couple of other required information from JSON. But, Since the JSON includes multiple nested list and dictionaries I am unable to pull above two information and store it. Please can anyone help me with python code.

This is my python code:

import json
import pandas
class ConvertToExcel:

    def Process(self):
        with open('C:/Users/Desktop/SampleTestFiles/new17.json') as json_file:
            dataarray = json.load(json_file)

        data1 = []
        logcount = 0
        for data in dataarray:
            logcount = logcount + 1
            for i in range(len(data['log'])):
                code = data['log'][i]['code']
                message = data['log'][i]['message']
                try:
                    cid = data['log'][i]['message']['cid']
                except:
                    cid = 0
                text = data['log'][i]['message']['text']
                refs = data['log'][i]['refs']
                for k in range(len(data['log'][i]['refs'])):
                    try:
                        hrefs = data['log'][i]['refs'][k]['href']
                    except:
                        hrefs =''
                    try:
                        hrefsFacts = data['log'][i]['refs'][k]['href']
                    except:
                        hrefsFacts =''

                    # print(type(data['log'][i]['refs'][k]['href']['properties']))
                    # res1 = 'name' in chain(*data['log'][i]['refs'][k]['href']['properties'])

                    # for elem in len(data['log'][i]['refs'][k]['href']['properties']):
                    #     for item in elem:
                    #         if(item == 'val'):
                    #             hrefspropertiesvalue = item
                    #         else:
                    #             pass

                hrefspropertiesvalue = 'a'
                level = data['log'][i]['level']
                data1.append((logcount,i, code, message,cid, text, refs,hrefs,hrefsFacts, hrefspropertiesvalue, level))

        pandas.DataFrame(data1, columns=['Log', 'Innerlog', 'code', 'message','cid','text','refs','hrefs','hrefsFacts','hrefspropertiesvalue', 'level']).to_excel("output.xlsx")

A = ConvertToExcel()
A.Process()

and, JSON:

[{ "log": [
   {
   "code": "nikv.F1.all.1",
   "message": {
    "cid": "61785360",
    "filing_url": "C:\\Users\\farizaleta\\Desktop\\test-428-2016Q4F1.abcd",
    "severity": "error",
    "text": "[nikv.F1.all.1] The values of 6,075,786 for the elements nikv:OtherChargestested is duplicated in the filing 2 times.\n\nElement : nikv:OtherChargestested\nPeriod : 2016-01-01 to 2016-12-31\n\n\nRule Id:nikv.F1.all.1 - test-428-2016Q4F1.abcd 4122"
   },
   "refs": [
    {
     "href": "test-428-2016Q4F1.abcd#f-743",
     "sourceLine": 4122,
     "properties": [
      [
       "label",
       "Other charges, tested"
      ],
      [
       "namespace",
       "http://nikv.com/form/2002-01-01/nikv"
      ],
      [
       "name",
       "TOM DOE"
      ],
      [
       "QName",
       "nikv:OtherChargestested"
      ],
      [
       "contextRef",
       "c-01",
       [
        [
         "entity",
         "C002089",
         [
          [
           "scheme",
           "http://tested.com/entity/identification/scheme"
          ]
         ]
        ],
        [
         "startDate",
         "2016-01-01"
        ],
        [
         "endDate",
         "2016-12-31"
        ],
        [
         "dimensions",
         "(1)",
         [
          [
           "nikv:OfficerAxis",
           "<nikv:OfficerDomain>0-1</nikv:OfficerDomain>\n\t\t\t\t\n"
          ]
         ]
        ]
       ]
      ],
      [
       "unitRef",
       "u-02",
       [
        [
         "measure",
         "iso4217"
        ]
       ]
      ],
      [
       "decimals",
       "INF"
      ],
      [
       "precision",
       "None"
      ],
      [
       "xsi:nil",
       "false"
      ],
      [
       "value",
       "132"
      ]
     ],
     "objectId": "91269"
    }
   ],
   "level": "error"
  }]
  }]

Below Highlighted columns need to be populated in excel output: 在此处输入图片说明

You can loop over the properties like this:

for type, val, *_ in data['log'][i]['refs'][k]['href']['properties']:

*_ ignores any extra elements in the properties sublists.

Then you can test whether type is name or value and set the appropriate variables.

    def Process(self):
        with open('C:/Users/Desktop/SampleTestFiles/new17.json') as json_file:
            dataarray = json.load(json_file)

        data1 = []
        logcount = 0
        for data in dataarray:
            logcount = logcount + 1
            for x in data['log']:
                code = x['code']
                message = x['message']
                try:
                    cid = x['message']['cid']
                except:
                    cid = 0
                text = x['message']['text']
                refs = x['refs']
                name = ''
                value = ''
                for ref in x['refs']:
                    try:
                        hrefs = ref['href']
                    except:
                        hrefs =''
                    try:
                        hrefsFacts = ref['href']
                    except:
                        hrefsFacts =''

                    for type, val, *_ in ref['properties']:
                        if type = 'name':
                            name = val
                        elif type = 'value':
                            value = val

                hrefspropertiesvalue = 'a'
                level = x['level']
                data1.append((logcount,i, code, message,cid, text, refs,hrefs,hrefsFacts, hrefspropertiesvalue, level, name, value))

        pandas.DataFrame(data1, columns=['Log', 'Innerlog', 'code', 'message','cid','text','refs','hrefs','hrefsFacts','hrefspropertiesvalue', 'level', 'name', 'value']).to_excel("output.xlsx")

I've also simplified all the loops to use for <variable> in <list> instead of for <indexvariable> in range(len(<list>)) .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM