I am reading a JSON file and trying to pull below information from each log
and store it in Excel sheet.
["name","TOM DOE"]
["value","132"]
My python code is able to pull couple of other required information from JSON. But, Since the JSON includes multiple nested list and dictionaries I am unable to pull above two information and store it. Please can anyone help me with python code.
This is my python code:
import json
import pandas
class ConvertToExcel:
def Process(self):
with open('C:/Users/Desktop/SampleTestFiles/new17.json') as json_file:
dataarray = json.load(json_file)
data1 = []
logcount = 0
for data in dataarray:
logcount = logcount + 1
for i in range(len(data['log'])):
code = data['log'][i]['code']
message = data['log'][i]['message']
try:
cid = data['log'][i]['message']['cid']
except:
cid = 0
text = data['log'][i]['message']['text']
refs = data['log'][i]['refs']
for k in range(len(data['log'][i]['refs'])):
try:
hrefs = data['log'][i]['refs'][k]['href']
except:
hrefs =''
try:
hrefsFacts = data['log'][i]['refs'][k]['href']
except:
hrefsFacts =''
# print(type(data['log'][i]['refs'][k]['href']['properties']))
# res1 = 'name' in chain(*data['log'][i]['refs'][k]['href']['properties'])
# for elem in len(data['log'][i]['refs'][k]['href']['properties']):
# for item in elem:
# if(item == 'val'):
# hrefspropertiesvalue = item
# else:
# pass
hrefspropertiesvalue = 'a'
level = data['log'][i]['level']
data1.append((logcount,i, code, message,cid, text, refs,hrefs,hrefsFacts, hrefspropertiesvalue, level))
pandas.DataFrame(data1, columns=['Log', 'Innerlog', 'code', 'message','cid','text','refs','hrefs','hrefsFacts','hrefspropertiesvalue', 'level']).to_excel("output.xlsx")
A = ConvertToExcel()
A.Process()
and, JSON:
[{ "log": [
{
"code": "nikv.F1.all.1",
"message": {
"cid": "61785360",
"filing_url": "C:\\Users\\farizaleta\\Desktop\\test-428-2016Q4F1.abcd",
"severity": "error",
"text": "[nikv.F1.all.1] The values of 6,075,786 for the elements nikv:OtherChargestested is duplicated in the filing 2 times.\n\nElement : nikv:OtherChargestested\nPeriod : 2016-01-01 to 2016-12-31\n\n\nRule Id:nikv.F1.all.1 - test-428-2016Q4F1.abcd 4122"
},
"refs": [
{
"href": "test-428-2016Q4F1.abcd#f-743",
"sourceLine": 4122,
"properties": [
[
"label",
"Other charges, tested"
],
[
"namespace",
"http://nikv.com/form/2002-01-01/nikv"
],
[
"name",
"TOM DOE"
],
[
"QName",
"nikv:OtherChargestested"
],
[
"contextRef",
"c-01",
[
[
"entity",
"C002089",
[
[
"scheme",
"http://tested.com/entity/identification/scheme"
]
]
],
[
"startDate",
"2016-01-01"
],
[
"endDate",
"2016-12-31"
],
[
"dimensions",
"(1)",
[
[
"nikv:OfficerAxis",
"<nikv:OfficerDomain>0-1</nikv:OfficerDomain>\n\t\t\t\t\n"
]
]
]
]
],
[
"unitRef",
"u-02",
[
[
"measure",
"iso4217"
]
]
],
[
"decimals",
"INF"
],
[
"precision",
"None"
],
[
"xsi:nil",
"false"
],
[
"value",
"132"
]
],
"objectId": "91269"
}
],
"level": "error"
}]
}]
Below Highlighted columns need to be populated in excel output:
You can loop over the properties like this:
for type, val, *_ in data['log'][i]['refs'][k]['href']['properties']:
*_
ignores any extra elements in the properties
sublists.
Then you can test whether type
is name
or value
and set the appropriate variables.
def Process(self):
with open('C:/Users/Desktop/SampleTestFiles/new17.json') as json_file:
dataarray = json.load(json_file)
data1 = []
logcount = 0
for data in dataarray:
logcount = logcount + 1
for x in data['log']:
code = x['code']
message = x['message']
try:
cid = x['message']['cid']
except:
cid = 0
text = x['message']['text']
refs = x['refs']
name = ''
value = ''
for ref in x['refs']:
try:
hrefs = ref['href']
except:
hrefs =''
try:
hrefsFacts = ref['href']
except:
hrefsFacts =''
for type, val, *_ in ref['properties']:
if type = 'name':
name = val
elif type = 'value':
value = val
hrefspropertiesvalue = 'a'
level = x['level']
data1.append((logcount,i, code, message,cid, text, refs,hrefs,hrefsFacts, hrefspropertiesvalue, level, name, value))
pandas.DataFrame(data1, columns=['Log', 'Innerlog', 'code', 'message','cid','text','refs','hrefs','hrefsFacts','hrefspropertiesvalue', 'level', 'name', 'value']).to_excel("output.xlsx")
I've also simplified all the loops to use for <variable> in <list>
instead of for <indexvariable> in range(len(<list>))
.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.