复杂的Python JSON对象到自定义字典转换

Question

I do have following JSON object - 我确实有以下JSON对象 -

{
    "Resource": [
        {
            "@name": "Bravo",
            "@signature": "h#Bravo",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": {
                "@attrId": "cpuUsage",
                "@attrName": "Usage",
                "Data": [
                    {
                        "@data": "26.00",
                        "@end": "01:05:00",
                        "@interval": "60",
                        "@start": "01:04:00"
                    },
                    {
                        "@data": "24.00",
                        "@end": "01:04:00",
                        "@interval": "60",
                        "@start": "01:03:00"
                    },
                    {
                        "@data": "36.00",
                        "@end": "01:03:00",
                        "@interval": "60",
                        "@start": "01:02:00"
                    },
                    {
                        "@data": "38.00",
                        "@end": "01:02:00",
                        "@interval": "60",
                        "@start": "01:01:00"
                    },
                    {
                        "@data": "37.00",
                        "@end": "01:01:00",
                        "@interval": "60",
                        "@start": "01:00:00"
                    }
                ]
            },
            "Resource": [
                {
                    "@name": "Tango",
                    "@signature": "vm#Tango",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": {
                            "@data": "12.00",
                            "@end": "04:05:00",
                            "@interval": "60",
                            "@start": "04:04:00"
                        }
                    }
                },
                {
                    "@name": "Charlie",
                    "@signature": "vm#Charlie",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": [
                            {
                                "@data": "12.00",
                                "@end": "04:20:00",
                                "@interval": "60",
                                "@start": "04:19:00"
                            },
                            {
                                "@data": "12.00",
                                "@end": "04:19:00",
                                "@interval": "60",
                                "@start": "04:18:00"
                            }
                        ]
                    }
                }
            ]
        },
        {
            "@name": "Alpha",
            "@signature": "h#Alpha",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": [
                {
                    "@attrId": "cpuUsage",
                    "@attrName": "Usage",
                    "Data": {
                        "@data": "9",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                },
                {
                    "@attrId": "cpuUsagemhz",
                    "@attrName": "Usage MHz",
                    "Data": {
                        "@data": "479",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                }
            ]
        }
    ]
}

I am looking for some JSON Traversal to reach all the keys and convert above to the following expected python dictionary - 我正在寻找一些JSON遍历来达到所有键并将上面的转换为以下预期的python字典 -

d = { 'ESX_5.x' : 
        { 
            'Bravo' :
                {
                    "@typeDisplayName" : "ESX Server",
                    "@signature" : "h#Bravo",
                    "cpuUsage" :
                        {
                            "from_01:04:00_to_01:05:00" : 26.00,
                            "from_01:03:00_to_01:04:00" : 24.00,
                            "from_01:02:00_to_01:03:00" : 36.00,
                            "from_01:01:00_to_01:02:00" : 38.00,
                            "from_01:00:00_to_01:01:00" : 37.00,
                            "interval" : 60
                        },
                    "vm" :
                        {
                            "Tango" :
                                {
                                    "@typeDisplayName" : "Virtual Machine",
                                    "@signature" : "vm#Tango",
                                    "cpuUsage" :
                                        {
                                            "from_04:04:00_to_04:05:00" : 12.00,
                                            "interval" : 60
                                        }
                                },
                            "Charlie" :
                                {
                                    "@typeDisplayName" : "Virtual Machine",
                                    "@signature": "vm#Charlie",
                                    "cpuUsage" : 
                                        {
                                            "from_04:19:00_to_04:20:00" : "12.00",
                                            "from_04:18:00_to_04:19:00" : "12.00",
                                            "@interval": "60",
                                        }
                                }
                        },
                },
            'Alpha' :
                {
                    "@typeDisplayName" : "ESX Server",
                    "@signature" : "h#Alpha",
                    "cpuUsage" :
                        {
                            "from_06:09:00_to_06:10:00" : 9,
                            "@interval": "60"
                        },
                    "cpuUsagemhz" :
                        {
                            "from_06:09:00_to_06:10:00" : 479,
                            "@interval": "60"
                        }
                }
        }
    }

Need Recursive Functions to fetch Resources & PerfList & Data and the customize dictionary. 需要递归函数来获取资源和PerfList＆Data以及自定义字典。

There could be possible typos/syntax_errs in hand cooked expected dictionary... 手写熟的预期字典中可能存在拼写错误/ syntax_errs ...

HERE IS MY CODE SO FAR -- This is however failing for N number of nested Resources. 这里是我的代码所以 - 这对于N个嵌套资源来说是失败的。

import json

class MQLPrettyPrint():
    KEY_RESPONSE = 'Response'
    KEY_RESULTS = 'Results'
    KEY_RESOURCE = 'Resource'

    def __init__(self,file=None):
        self._json_file = file
        self._json_data = self.read_json_file()
        self._json_dict = self.json_to_dict()

    def json_file(self):
        return self._json_file

    def read_json_file(self):
        json_data = ""
        try:
            JSON = open(self._json_file,"r")
            json_data = JSON.read()
            JSON.close()
        except:
            raise

        return json_data

    def json_to_dict(self):
        return json.loads(self._json_data)

    def json_data(self):
        return self._json_data

    def json_dict(self):
        return self._json_dict

    def json2mql(self):
        for key in self._json_dict:
            if key == self.KEY_RESPONSE:
                val = self._json_dict[key]
                response = self.fetch_response(val)

    def fetch_response(self,dict):
        for key in dict:
            if key == self.KEY_RESULTS:
                val = dict[key]
                results = self.fetch_results(val)

    def fetch_results(self,dict):
        for key in dict:
            if key == self.KEY_RESOURCE:
                val = dict[key]
                resource = self.fetch_resource(val)

    def fetch_resource(self,resources,dict={}):
        if isinstance(resources,list):
            for resource in resources:
                print "\n\n",resource
                if isinstance(resource,__builtins__.dict):
                    #header = self.fetch_resource_header(resource)
                    #perfList = self.fetch_perf_list(resource)
                    self.fetch_resource(resource)
        elif isinstance(resources,dict):
            header = self.fetch_resource_header(resource)
            perfList = self.fetch_perf_list(resource)
        else:
            print resources

    def fetch_resouce_header(resource):
        name = resource['@name']
        signature = resource['@signature']
        type = resource['@type']
        typeDisplayName = resource['@typeDisplayName']
        resource_dict = {'@name' : name,
                         '@signature' : signature,
                         '@type' : type,
                         '@typeDisplayName' : typeDisplayName}
        return resource_dict

    def fetch_perf_list(self,resource,perfDict={}):
        perfLists = resource['PerfList']
        if isinstance(perfLists,list):
            for perf in perfLists:
                self.fetch_perf_list(perf,perfDict)
        elif isinstance(perfLists,dict):
            header = self.fetch_perf_header(perf)
            dataList = self.fetch_data(perf)
            key = ""
            if len(perfDict) == 0:
                key = header['@attrId']
                perfDict[key] = header
                perfDict[key]['Data'] = dataList
            else:
                if not perfDict.has_key(key):
                    perfDict[key] = header
                    perfDict[key]['Data'] = dataList
                else:
                    if perfDict.has_key('Data'):
                        perfDict[key]['Data'].update(dataList)
                    else:
                        perfDict[key]['Data'] = dataList
        else:
            print perfLists
        return perfDict


    def fetch_perf_header(self,perfDict):
        header = {}
        attrID = perfDict['@attrId']
        attrName = perfDict['@attrName']
        header = {'@attrId' : attrID,
                  '@attrName' : attrName}
        return header

    def fetch_data(self,perfDict,dataDict={}):
        dataList = perfDict['Data']
        if isinstance(dataList,list):
            for data in dataList:
                #Fetch internal data
                self.fetch_data(data,dataDict)
        elif isinstance(dataList,dict):
            start = dataList['@start']
            end = dataList['@end']
            interval = dataList['@interval']
            data = dataList['@data']
            key = "%s_%s" % (start,end)
            dataDict[key] = dataList
            #data_dict = {key : dataList}
            #if len(dataDict) == 0:
            #    dataDict[key] = data_dict
            #else:
            #    dataDict['Data'].update(data_dict)
        else:
            print dataList
        return dataDict

Answer 1

Sometimes when operating on nested structures using recursive functions, its easier to think in terms of a walking function and an operation function. 有时，当使用递归函数在嵌套结构上操作时，它更容易在行走函数和操作函数方面进行思考。 So we want to target all the dicts contained in the json structure and perform a transformation operation on them. 因此，我们希望定位json结构中包含的所有dicts并对它们执行转换操作。

Transforming a structure in-place, instead of recreating a new one, is significantly easier when dealing with nests. 在处理巢时，就地转换结构而不是重新创建新结构要容易得多。 The more difficult approach of constructing nested dicts from that json structure involves being able to address the specific json elements, place them at correct depth and branch of the new structure; 从json结构构造嵌套dicts的更困难的方法是能够处理特定的json元素，将它们放置在新结构的正确深度和分支处; this involves two parallel walking operations. 这涉及两个平行的步行操作。

One thing to be mindful of though, is modifying the nested structure while walking across it as a transformation operation may change a list that the walking function is currently iterating on. 但要注意的一件事是，在遍历它时修改嵌套结构，因为转换操作可能会更改行走函数当前正在迭代的列表。 In this instance, only children (not siblings) are modified first before walking on the lower branches. 在这种情况下，只有孩子（而不是兄弟姐妹）在走在较低的树枝上之前才会被修改。

from copy import deepcopy
import json
from pprint import pprint
from StringIO import StringIO

json_str = \
'''
{
    "Resource": [
        {
            "@name": "Bravo",
            "@signature": "h#Bravo",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": {
                "@attrId": "cpuUsage",
                "@attrName": "Usage",
                "Data": [
                    {
                        "@data": "26.00",
                        "@end": "01:05:00",
                        "@interval": "60",
                        "@start": "01:04:00"
                    },
                    {
                        "@data": "24.00",
                        "@end": "01:04:00",
                        "@interval": "60",
                        "@start": "01:03:00"
                    },
                    {
                        "@data": "36.00",
                        "@end": "01:03:00",
                        "@interval": "60",
                        "@start": "01:02:00"
                    },
                    {
                        "@data": "38.00",
                        "@end": "01:02:00",
                        "@interval": "60",
                        "@start": "01:01:00"
                    },
                    {
                        "@data": "37.00",
                        "@end": "01:01:00",
                        "@interval": "60",
                        "@start": "01:00:00"
                    }
                ]
            },
            "Resource": [
                {
                    "@name": "Tango",
                    "@signature": "vm#Tango",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": {
                            "@data": "12.00",
                            "@end": "04:05:00",
                            "@interval": "60",
                            "@start": "04:04:00"
                        }
                    }
                },
                {
                    "@name": "Charlie",
                    "@signature": "vm#Charlie",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": [
                            {
                                "@data": "12.00",
                                "@end": "04:20:00",
                                "@interval": "60",
                                "@start": "04:19:00"
                            },
                            {
                                "@data": "12.00",
                                "@end": "04:19:00",
                                "@interval": "60",
                                "@start": "04:18:00"
                            }
                        ]
                    }
                }
            ]
        },
        {
            "@name": "Alpha",
            "@signature": "h#Alpha",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": [
                {
                    "@attrId": "cpuUsage",
                    "@attrName": "Usage",
                    "Data": {
                        "@data": "9",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                },
                {
                    "@attrId": "cpuUsagemhz",
                    "@attrName": "Usage MHz",
                    "Data": {
                        "@data": "479",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                }
            ]
        }
    ]
}
'''

def walk_fun_lim(ilist, func=None):
    '''
    Recursively walk a nested list and dict structure, running func on all dicts
    '''
    def walk_fun_lim_helper(ilist, func=None, count=0):
        tlist = []
        ttlist = []
        if(isinstance(ilist, list)):
            ttlist = filter(lambda x: x, func(filter(lambda x: isinstance(x, dict), ilist)))
            if(ttlist):
                tlist += ttlist
            for q in ilist:
                ttlist = filter(lambda x: x, walk_fun_lim_helper(q, func, count+1))
                if(ttlist):
                    tlist += ttlist
        elif(isinstance(ilist, dict)):
            ttlist = filter(lambda x: x, func([ilist]))
            if(ttlist):
                tlist += ttlist
            for q in ilist:
                ttlist = filter(lambda x: x, walk_fun_lim_helper(ilist[q], func, count+1))
                if(ttlist):
                    tlist += ttlist
        return [tlist] if(count != 0) else tlist
    if(func != None and hasattr(func, "__call__")):
        return walk_fun_lim_helper(ilist, func)
    else:
        return []

def transformers_robots_in_disguise(x):
    for idict in x:
        plist = idict.pop("PerfList", [])
        plist = plist if(isinstance(plist, list)) else [plist]
        for sub_dict in plist:
            sub_name = sub_dict.pop("@attrId")
            dlist = sub_dict.pop("Data", [])
            dlist = dlist if(isinstance(dlist, list)) else [dlist]
            new_dict = {}
            for sub_dict in dlist:
                new_dict["from_%(@start)s_to_%(@end)s" % sub_dict] = sub_dict["@data"]
                new_dict["@interval"] = sub_dict["@interval"]
            idict[sub_name] = new_dict
        rlist = idict.pop("Resource", [])
        rlist = rlist if(isinstance(rlist, list)) else [rlist]
        for sub_dict in rlist:
            sub_type = sub_dict.pop("@type")
            sub_name = sub_dict.pop("@name")
            idict.setdefault(sub_type, {})[sub_name] = sub_dict
    return []

json_data = json.load(StringIO(json_str))
data_copy = deepcopy(json_data)
walk_fun_lim(data_copy, transformers_robots_in_disguise)
pprint(data_copy)

复杂的Python JSON对象到自定义字典转换

问题描述

1 个解决方案

解决方案1
1 已采纳 2013-07-12 04:56:52

复杂的Python JSON对象到自定义字典转换

问题描述

1 个解决方案

解决方案1 1 已采纳 2013-07-12 04:56:52

解决方案1
1 已采纳 2013-07-12 04:56:52