简体   繁体   中英

Complex Python JSON object to custom dictionary conversion

I do have following JSON object -

{
    "Resource": [
        {
            "@name": "Bravo",
            "@signature": "h#Bravo",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": {
                "@attrId": "cpuUsage",
                "@attrName": "Usage",
                "Data": [
                    {
                        "@data": "26.00",
                        "@end": "01:05:00",
                        "@interval": "60",
                        "@start": "01:04:00"
                    },
                    {
                        "@data": "24.00",
                        "@end": "01:04:00",
                        "@interval": "60",
                        "@start": "01:03:00"
                    },
                    {
                        "@data": "36.00",
                        "@end": "01:03:00",
                        "@interval": "60",
                        "@start": "01:02:00"
                    },
                    {
                        "@data": "38.00",
                        "@end": "01:02:00",
                        "@interval": "60",
                        "@start": "01:01:00"
                    },
                    {
                        "@data": "37.00",
                        "@end": "01:01:00",
                        "@interval": "60",
                        "@start": "01:00:00"
                    }
                ]
            },
            "Resource": [
                {
                    "@name": "Tango",
                    "@signature": "vm#Tango",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": {
                            "@data": "12.00",
                            "@end": "04:05:00",
                            "@interval": "60",
                            "@start": "04:04:00"
                        }
                    }
                },
                {
                    "@name": "Charlie",
                    "@signature": "vm#Charlie",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": [
                            {
                                "@data": "12.00",
                                "@end": "04:20:00",
                                "@interval": "60",
                                "@start": "04:19:00"
                            },
                            {
                                "@data": "12.00",
                                "@end": "04:19:00",
                                "@interval": "60",
                                "@start": "04:18:00"
                            }
                        ]
                    }
                }
            ]
        },
        {
            "@name": "Alpha",
            "@signature": "h#Alpha",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": [
                {
                    "@attrId": "cpuUsage",
                    "@attrName": "Usage",
                    "Data": {
                        "@data": "9",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                },
                {
                    "@attrId": "cpuUsagemhz",
                    "@attrName": "Usage MHz",
                    "Data": {
                        "@data": "479",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                }
            ]
        }
    ]
}

I am looking for some JSON Traversal to reach all the keys and convert above to the following expected python dictionary -

d = { 'ESX_5.x' : 
        { 
            'Bravo' :
                {
                    "@typeDisplayName" : "ESX Server",
                    "@signature" : "h#Bravo",
                    "cpuUsage" :
                        {
                            "from_01:04:00_to_01:05:00" : 26.00,
                            "from_01:03:00_to_01:04:00" : 24.00,
                            "from_01:02:00_to_01:03:00" : 36.00,
                            "from_01:01:00_to_01:02:00" : 38.00,
                            "from_01:00:00_to_01:01:00" : 37.00,
                            "interval" : 60
                        },
                    "vm" :
                        {
                            "Tango" :
                                {
                                    "@typeDisplayName" : "Virtual Machine",
                                    "@signature" : "vm#Tango",
                                    "cpuUsage" :
                                        {
                                            "from_04:04:00_to_04:05:00" : 12.00,
                                            "interval" : 60
                                        }
                                },
                            "Charlie" :
                                {
                                    "@typeDisplayName" : "Virtual Machine",
                                    "@signature": "vm#Charlie",
                                    "cpuUsage" : 
                                        {
                                            "from_04:19:00_to_04:20:00" : "12.00",
                                            "from_04:18:00_to_04:19:00" : "12.00",
                                            "@interval": "60",
                                        }
                                }
                        },
                },
            'Alpha' :
                {
                    "@typeDisplayName" : "ESX Server",
                    "@signature" : "h#Alpha",
                    "cpuUsage" :
                        {
                            "from_06:09:00_to_06:10:00" : 9,
                            "@interval": "60"
                        },
                    "cpuUsagemhz" :
                        {
                            "from_06:09:00_to_06:10:00" : 479,
                            "@interval": "60"
                        }
                }
        }
    }

Need Recursive Functions to fetch Resources & PerfList & Data and the customize dictionary.

There could be possible typos/syntax_errs in hand cooked expected dictionary...

HERE IS MY CODE SO FAR -- This is however failing for N number of nested Resources.

import json

class MQLPrettyPrint():
    KEY_RESPONSE = 'Response'
    KEY_RESULTS = 'Results'
    KEY_RESOURCE = 'Resource'

    def __init__(self,file=None):
        self._json_file = file
        self._json_data = self.read_json_file()
        self._json_dict = self.json_to_dict()

    def json_file(self):
        return self._json_file

    def read_json_file(self):
        json_data = ""
        try:
            JSON = open(self._json_file,"r")
            json_data = JSON.read()
            JSON.close()
        except:
            raise

        return json_data

    def json_to_dict(self):
        return json.loads(self._json_data)

    def json_data(self):
        return self._json_data

    def json_dict(self):
        return self._json_dict

    def json2mql(self):
        for key in self._json_dict:
            if key == self.KEY_RESPONSE:
                val = self._json_dict[key]
                response = self.fetch_response(val)

    def fetch_response(self,dict):
        for key in dict:
            if key == self.KEY_RESULTS:
                val = dict[key]
                results = self.fetch_results(val)

    def fetch_results(self,dict):
        for key in dict:
            if key == self.KEY_RESOURCE:
                val = dict[key]
                resource = self.fetch_resource(val)

    def fetch_resource(self,resources,dict={}):
        if isinstance(resources,list):
            for resource in resources:
                print "\n\n",resource
                if isinstance(resource,__builtins__.dict):
                    #header = self.fetch_resource_header(resource)
                    #perfList = self.fetch_perf_list(resource)
                    self.fetch_resource(resource)
        elif isinstance(resources,dict):
            header = self.fetch_resource_header(resource)
            perfList = self.fetch_perf_list(resource)
        else:
            print resources

    def fetch_resouce_header(resource):
        name = resource['@name']
        signature = resource['@signature']
        type = resource['@type']
        typeDisplayName = resource['@typeDisplayName']
        resource_dict = {'@name' : name,
                         '@signature' : signature,
                         '@type' : type,
                         '@typeDisplayName' : typeDisplayName}
        return resource_dict

    def fetch_perf_list(self,resource,perfDict={}):
        perfLists = resource['PerfList']
        if isinstance(perfLists,list):
            for perf in perfLists:
                self.fetch_perf_list(perf,perfDict)
        elif isinstance(perfLists,dict):
            header = self.fetch_perf_header(perf)
            dataList = self.fetch_data(perf)
            key = ""
            if len(perfDict) == 0:
                key = header['@attrId']
                perfDict[key] = header
                perfDict[key]['Data'] = dataList
            else:
                if not perfDict.has_key(key):
                    perfDict[key] = header
                    perfDict[key]['Data'] = dataList
                else:
                    if perfDict.has_key('Data'):
                        perfDict[key]['Data'].update(dataList)
                    else:
                        perfDict[key]['Data'] = dataList
        else:
            print perfLists
        return perfDict


    def fetch_perf_header(self,perfDict):
        header = {}
        attrID = perfDict['@attrId']
        attrName = perfDict['@attrName']
        header = {'@attrId' : attrID,
                  '@attrName' : attrName}
        return header

    def fetch_data(self,perfDict,dataDict={}):
        dataList = perfDict['Data']
        if isinstance(dataList,list):
            for data in dataList:
                #Fetch internal data
                self.fetch_data(data,dataDict)
        elif isinstance(dataList,dict):
            start = dataList['@start']
            end = dataList['@end']
            interval = dataList['@interval']
            data = dataList['@data']
            key = "%s_%s" % (start,end)
            dataDict[key] = dataList
            #data_dict = {key : dataList}
            #if len(dataDict) == 0:
            #    dataDict[key] = data_dict
            #else:
            #    dataDict['Data'].update(data_dict)
        else:
            print dataList
        return dataDict

Sometimes when operating on nested structures using recursive functions, its easier to think in terms of a walking function and an operation function. So we want to target all the dicts contained in the json structure and perform a transformation operation on them.

Transforming a structure in-place, instead of recreating a new one, is significantly easier when dealing with nests. The more difficult approach of constructing nested dicts from that json structure involves being able to address the specific json elements, place them at correct depth and branch of the new structure; this involves two parallel walking operations.

One thing to be mindful of though, is modifying the nested structure while walking across it as a transformation operation may change a list that the walking function is currently iterating on. In this instance, only children (not siblings) are modified first before walking on the lower branches.

from copy import deepcopy
import json
from pprint import pprint
from StringIO import StringIO

json_str = \
'''
{
    "Resource": [
        {
            "@name": "Bravo",
            "@signature": "h#Bravo",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": {
                "@attrId": "cpuUsage",
                "@attrName": "Usage",
                "Data": [
                    {
                        "@data": "26.00",
                        "@end": "01:05:00",
                        "@interval": "60",
                        "@start": "01:04:00"
                    },
                    {
                        "@data": "24.00",
                        "@end": "01:04:00",
                        "@interval": "60",
                        "@start": "01:03:00"
                    },
                    {
                        "@data": "36.00",
                        "@end": "01:03:00",
                        "@interval": "60",
                        "@start": "01:02:00"
                    },
                    {
                        "@data": "38.00",
                        "@end": "01:02:00",
                        "@interval": "60",
                        "@start": "01:01:00"
                    },
                    {
                        "@data": "37.00",
                        "@end": "01:01:00",
                        "@interval": "60",
                        "@start": "01:00:00"
                    }
                ]
            },
            "Resource": [
                {
                    "@name": "Tango",
                    "@signature": "vm#Tango",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": {
                            "@data": "12.00",
                            "@end": "04:05:00",
                            "@interval": "60",
                            "@start": "04:04:00"
                        }
                    }
                },
                {
                    "@name": "Charlie",
                    "@signature": "vm#Charlie",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": [
                            {
                                "@data": "12.00",
                                "@end": "04:20:00",
                                "@interval": "60",
                                "@start": "04:19:00"
                            },
                            {
                                "@data": "12.00",
                                "@end": "04:19:00",
                                "@interval": "60",
                                "@start": "04:18:00"
                            }
                        ]
                    }
                }
            ]
        },
        {
            "@name": "Alpha",
            "@signature": "h#Alpha",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": [
                {
                    "@attrId": "cpuUsage",
                    "@attrName": "Usage",
                    "Data": {
                        "@data": "9",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                },
                {
                    "@attrId": "cpuUsagemhz",
                    "@attrName": "Usage MHz",
                    "Data": {
                        "@data": "479",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                }
            ]
        }
    ]
}
'''

def walk_fun_lim(ilist, func=None):
    '''
    Recursively walk a nested list and dict structure, running func on all dicts
    '''
    def walk_fun_lim_helper(ilist, func=None, count=0):
        tlist = []
        ttlist = []
        if(isinstance(ilist, list)):
            ttlist = filter(lambda x: x, func(filter(lambda x: isinstance(x, dict), ilist)))
            if(ttlist):
                tlist += ttlist
            for q in ilist:
                ttlist = filter(lambda x: x, walk_fun_lim_helper(q, func, count+1))
                if(ttlist):
                    tlist += ttlist
        elif(isinstance(ilist, dict)):
            ttlist = filter(lambda x: x, func([ilist]))
            if(ttlist):
                tlist += ttlist
            for q in ilist:
                ttlist = filter(lambda x: x, walk_fun_lim_helper(ilist[q], func, count+1))
                if(ttlist):
                    tlist += ttlist
        return [tlist] if(count != 0) else tlist
    if(func != None and hasattr(func, "__call__")):
        return walk_fun_lim_helper(ilist, func)
    else:
        return []

def transformers_robots_in_disguise(x):
    for idict in x:
        plist = idict.pop("PerfList", [])
        plist = plist if(isinstance(plist, list)) else [plist]
        for sub_dict in plist:
            sub_name = sub_dict.pop("@attrId")
            dlist = sub_dict.pop("Data", [])
            dlist = dlist if(isinstance(dlist, list)) else [dlist]
            new_dict = {}
            for sub_dict in dlist:
                new_dict["from_%(@start)s_to_%(@end)s" % sub_dict] = sub_dict["@data"]
                new_dict["@interval"] = sub_dict["@interval"]
            idict[sub_name] = new_dict
        rlist = idict.pop("Resource", [])
        rlist = rlist if(isinstance(rlist, list)) else [rlist]
        for sub_dict in rlist:
            sub_type = sub_dict.pop("@type")
            sub_name = sub_dict.pop("@name")
            idict.setdefault(sub_type, {})[sub_name] = sub_dict
    return []

json_data = json.load(StringIO(json_str))
data_copy = deepcopy(json_data)
walk_fun_lim(data_copy, transformers_robots_in_disguise)
pprint(data_copy)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM