[英]Complex Python JSON object to custom dictionary conversion
I do have following JSON object - 我确实有以下JSON对象 -
{
"Resource": [
{
"@name": "Bravo",
"@signature": "h#Bravo",
"@type": "ESX_5.x",
"@typeDisplayName": "ESX Server",
"PerfList": {
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": [
{
"@data": "26.00",
"@end": "01:05:00",
"@interval": "60",
"@start": "01:04:00"
},
{
"@data": "24.00",
"@end": "01:04:00",
"@interval": "60",
"@start": "01:03:00"
},
{
"@data": "36.00",
"@end": "01:03:00",
"@interval": "60",
"@start": "01:02:00"
},
{
"@data": "38.00",
"@end": "01:02:00",
"@interval": "60",
"@start": "01:01:00"
},
{
"@data": "37.00",
"@end": "01:01:00",
"@interval": "60",
"@start": "01:00:00"
}
]
},
"Resource": [
{
"@name": "Tango",
"@signature": "vm#Tango",
"@type": "vm",
"@typeDisplayName": "Virtual Machine",
"PerfList": {
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": {
"@data": "12.00",
"@end": "04:05:00",
"@interval": "60",
"@start": "04:04:00"
}
}
},
{
"@name": "Charlie",
"@signature": "vm#Charlie",
"@type": "vm",
"@typeDisplayName": "Virtual Machine",
"PerfList": {
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": [
{
"@data": "12.00",
"@end": "04:20:00",
"@interval": "60",
"@start": "04:19:00"
},
{
"@data": "12.00",
"@end": "04:19:00",
"@interval": "60",
"@start": "04:18:00"
}
]
}
}
]
},
{
"@name": "Alpha",
"@signature": "h#Alpha",
"@type": "ESX_5.x",
"@typeDisplayName": "ESX Server",
"PerfList": [
{
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": {
"@data": "9",
"@end": "06:10:00",
"@interval": "60",
"@start": "06:09:00"
}
},
{
"@attrId": "cpuUsagemhz",
"@attrName": "Usage MHz",
"Data": {
"@data": "479",
"@end": "06:10:00",
"@interval": "60",
"@start": "06:09:00"
}
}
]
}
]
}
I am looking for some JSON Traversal to reach all the keys and convert above to the following expected python dictionary - 我正在寻找一些JSON遍历来达到所有键并将上面的转换为以下预期的python字典 -
d = { 'ESX_5.x' :
{
'Bravo' :
{
"@typeDisplayName" : "ESX Server",
"@signature" : "h#Bravo",
"cpuUsage" :
{
"from_01:04:00_to_01:05:00" : 26.00,
"from_01:03:00_to_01:04:00" : 24.00,
"from_01:02:00_to_01:03:00" : 36.00,
"from_01:01:00_to_01:02:00" : 38.00,
"from_01:00:00_to_01:01:00" : 37.00,
"interval" : 60
},
"vm" :
{
"Tango" :
{
"@typeDisplayName" : "Virtual Machine",
"@signature" : "vm#Tango",
"cpuUsage" :
{
"from_04:04:00_to_04:05:00" : 12.00,
"interval" : 60
}
},
"Charlie" :
{
"@typeDisplayName" : "Virtual Machine",
"@signature": "vm#Charlie",
"cpuUsage" :
{
"from_04:19:00_to_04:20:00" : "12.00",
"from_04:18:00_to_04:19:00" : "12.00",
"@interval": "60",
}
}
},
},
'Alpha' :
{
"@typeDisplayName" : "ESX Server",
"@signature" : "h#Alpha",
"cpuUsage" :
{
"from_06:09:00_to_06:10:00" : 9,
"@interval": "60"
},
"cpuUsagemhz" :
{
"from_06:09:00_to_06:10:00" : 479,
"@interval": "60"
}
}
}
}
Need Recursive Functions to fetch Resources & PerfList & Data and the customize dictionary. 需要递归函数来获取资源和PerfList&Data以及自定义字典。
There could be possible typos/syntax_errs in hand cooked expected dictionary... 手写熟的预期字典中可能存在拼写错误/ syntax_errs ...
HERE IS MY CODE SO FAR -- This is however failing for N number of nested Resources. 这里是我的代码所以 - 这对于N个嵌套资源来说是失败的。
import json
class MQLPrettyPrint():
KEY_RESPONSE = 'Response'
KEY_RESULTS = 'Results'
KEY_RESOURCE = 'Resource'
def __init__(self,file=None):
self._json_file = file
self._json_data = self.read_json_file()
self._json_dict = self.json_to_dict()
def json_file(self):
return self._json_file
def read_json_file(self):
json_data = ""
try:
JSON = open(self._json_file,"r")
json_data = JSON.read()
JSON.close()
except:
raise
return json_data
def json_to_dict(self):
return json.loads(self._json_data)
def json_data(self):
return self._json_data
def json_dict(self):
return self._json_dict
def json2mql(self):
for key in self._json_dict:
if key == self.KEY_RESPONSE:
val = self._json_dict[key]
response = self.fetch_response(val)
def fetch_response(self,dict):
for key in dict:
if key == self.KEY_RESULTS:
val = dict[key]
results = self.fetch_results(val)
def fetch_results(self,dict):
for key in dict:
if key == self.KEY_RESOURCE:
val = dict[key]
resource = self.fetch_resource(val)
def fetch_resource(self,resources,dict={}):
if isinstance(resources,list):
for resource in resources:
print "\n\n",resource
if isinstance(resource,__builtins__.dict):
#header = self.fetch_resource_header(resource)
#perfList = self.fetch_perf_list(resource)
self.fetch_resource(resource)
elif isinstance(resources,dict):
header = self.fetch_resource_header(resource)
perfList = self.fetch_perf_list(resource)
else:
print resources
def fetch_resouce_header(resource):
name = resource['@name']
signature = resource['@signature']
type = resource['@type']
typeDisplayName = resource['@typeDisplayName']
resource_dict = {'@name' : name,
'@signature' : signature,
'@type' : type,
'@typeDisplayName' : typeDisplayName}
return resource_dict
def fetch_perf_list(self,resource,perfDict={}):
perfLists = resource['PerfList']
if isinstance(perfLists,list):
for perf in perfLists:
self.fetch_perf_list(perf,perfDict)
elif isinstance(perfLists,dict):
header = self.fetch_perf_header(perf)
dataList = self.fetch_data(perf)
key = ""
if len(perfDict) == 0:
key = header['@attrId']
perfDict[key] = header
perfDict[key]['Data'] = dataList
else:
if not perfDict.has_key(key):
perfDict[key] = header
perfDict[key]['Data'] = dataList
else:
if perfDict.has_key('Data'):
perfDict[key]['Data'].update(dataList)
else:
perfDict[key]['Data'] = dataList
else:
print perfLists
return perfDict
def fetch_perf_header(self,perfDict):
header = {}
attrID = perfDict['@attrId']
attrName = perfDict['@attrName']
header = {'@attrId' : attrID,
'@attrName' : attrName}
return header
def fetch_data(self,perfDict,dataDict={}):
dataList = perfDict['Data']
if isinstance(dataList,list):
for data in dataList:
#Fetch internal data
self.fetch_data(data,dataDict)
elif isinstance(dataList,dict):
start = dataList['@start']
end = dataList['@end']
interval = dataList['@interval']
data = dataList['@data']
key = "%s_%s" % (start,end)
dataDict[key] = dataList
#data_dict = {key : dataList}
#if len(dataDict) == 0:
# dataDict[key] = data_dict
#else:
# dataDict['Data'].update(data_dict)
else:
print dataList
return dataDict
Sometimes when operating on nested structures using recursive functions, its easier to think in terms of a walking function and an operation function. 有时,当使用递归函数在嵌套结构上操作时,它更容易在行走函数和操作函数方面进行思考。 So we want to target all the dicts contained in the json structure and perform a transformation operation on them.
因此,我们希望定位json结构中包含的所有dicts并对它们执行转换操作。
Transforming a structure in-place, instead of recreating a new one, is significantly easier when dealing with nests. 在处理巢时,就地转换结构而不是重新创建新结构要容易得多。 The more difficult approach of constructing nested dicts from that json structure involves being able to address the specific json elements, place them at correct depth and branch of the new structure;
从json结构构造嵌套dicts的更困难的方法是能够处理特定的json元素,将它们放置在新结构的正确深度和分支处; this involves two parallel walking operations.
这涉及两个平行的步行操作。
One thing to be mindful of though, is modifying the nested structure while walking across it as a transformation operation may change a list that the walking function is currently iterating on. 但要注意的一件事是,在遍历它时修改嵌套结构,因为转换操作可能会更改行走函数当前正在迭代的列表。 In this instance, only children (not siblings) are modified first before walking on the lower branches.
在这种情况下,只有孩子(而不是兄弟姐妹)在走在较低的树枝上之前才会被修改。
from copy import deepcopy
import json
from pprint import pprint
from StringIO import StringIO
json_str = \
'''
{
"Resource": [
{
"@name": "Bravo",
"@signature": "h#Bravo",
"@type": "ESX_5.x",
"@typeDisplayName": "ESX Server",
"PerfList": {
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": [
{
"@data": "26.00",
"@end": "01:05:00",
"@interval": "60",
"@start": "01:04:00"
},
{
"@data": "24.00",
"@end": "01:04:00",
"@interval": "60",
"@start": "01:03:00"
},
{
"@data": "36.00",
"@end": "01:03:00",
"@interval": "60",
"@start": "01:02:00"
},
{
"@data": "38.00",
"@end": "01:02:00",
"@interval": "60",
"@start": "01:01:00"
},
{
"@data": "37.00",
"@end": "01:01:00",
"@interval": "60",
"@start": "01:00:00"
}
]
},
"Resource": [
{
"@name": "Tango",
"@signature": "vm#Tango",
"@type": "vm",
"@typeDisplayName": "Virtual Machine",
"PerfList": {
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": {
"@data": "12.00",
"@end": "04:05:00",
"@interval": "60",
"@start": "04:04:00"
}
}
},
{
"@name": "Charlie",
"@signature": "vm#Charlie",
"@type": "vm",
"@typeDisplayName": "Virtual Machine",
"PerfList": {
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": [
{
"@data": "12.00",
"@end": "04:20:00",
"@interval": "60",
"@start": "04:19:00"
},
{
"@data": "12.00",
"@end": "04:19:00",
"@interval": "60",
"@start": "04:18:00"
}
]
}
}
]
},
{
"@name": "Alpha",
"@signature": "h#Alpha",
"@type": "ESX_5.x",
"@typeDisplayName": "ESX Server",
"PerfList": [
{
"@attrId": "cpuUsage",
"@attrName": "Usage",
"Data": {
"@data": "9",
"@end": "06:10:00",
"@interval": "60",
"@start": "06:09:00"
}
},
{
"@attrId": "cpuUsagemhz",
"@attrName": "Usage MHz",
"Data": {
"@data": "479",
"@end": "06:10:00",
"@interval": "60",
"@start": "06:09:00"
}
}
]
}
]
}
'''
def walk_fun_lim(ilist, func=None):
'''
Recursively walk a nested list and dict structure, running func on all dicts
'''
def walk_fun_lim_helper(ilist, func=None, count=0):
tlist = []
ttlist = []
if(isinstance(ilist, list)):
ttlist = filter(lambda x: x, func(filter(lambda x: isinstance(x, dict), ilist)))
if(ttlist):
tlist += ttlist
for q in ilist:
ttlist = filter(lambda x: x, walk_fun_lim_helper(q, func, count+1))
if(ttlist):
tlist += ttlist
elif(isinstance(ilist, dict)):
ttlist = filter(lambda x: x, func([ilist]))
if(ttlist):
tlist += ttlist
for q in ilist:
ttlist = filter(lambda x: x, walk_fun_lim_helper(ilist[q], func, count+1))
if(ttlist):
tlist += ttlist
return [tlist] if(count != 0) else tlist
if(func != None and hasattr(func, "__call__")):
return walk_fun_lim_helper(ilist, func)
else:
return []
def transformers_robots_in_disguise(x):
for idict in x:
plist = idict.pop("PerfList", [])
plist = plist if(isinstance(plist, list)) else [plist]
for sub_dict in plist:
sub_name = sub_dict.pop("@attrId")
dlist = sub_dict.pop("Data", [])
dlist = dlist if(isinstance(dlist, list)) else [dlist]
new_dict = {}
for sub_dict in dlist:
new_dict["from_%(@start)s_to_%(@end)s" % sub_dict] = sub_dict["@data"]
new_dict["@interval"] = sub_dict["@interval"]
idict[sub_name] = new_dict
rlist = idict.pop("Resource", [])
rlist = rlist if(isinstance(rlist, list)) else [rlist]
for sub_dict in rlist:
sub_type = sub_dict.pop("@type")
sub_name = sub_dict.pop("@name")
idict.setdefault(sub_type, {})[sub_name] = sub_dict
return []
json_data = json.load(StringIO(json_str))
data_copy = deepcopy(json_data)
walk_fun_lim(data_copy, transformers_robots_in_disguise)
pprint(data_copy)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.