简体   繁体   中英

Return a list of all variable names in a python nested dict/json document in dot notation

I'm looking for a function that operates on a python arbitrarily nested dict/array in JSON-esque format and returns a list of strings keying all the variable names it contains, to infinite depth. So, if the object is...

x = {
    'a': 'meow',
    'b': {
        'c': 'asd'
    },
    'd': [
        {
            "e": "stuff",
            "f": 1
        },
        {
            "e": "more stuff",
            "f": 2
        }
    ]
}

mylist = f(x) would return...

>>> mylist
['a', 'b', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']
def dot_notation(obj, prefix=''):
     if isinstance(obj, dict):
         if prefix: prefix += '.'
         for k, v in obj.items():
             for res in dot_notation(v, prefix+str(k)):
                 yield res
     elif isinstance(obj, list):
         for i, v in enumerate(obj):
             for res in dot_notation(v, prefix+'['+str(i)+']'):
                 yield res
     else:
         yield prefix

Example:

>>> list(dot_notation(x))
['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']

This is a fun one. I solved it using recursion.

def parse(d):
    return parse_dict(d)

def parse_dict(d):
    items = []
    for key, val in d.iteritems():
        if isinstance(val, dict):
            # use dot notation for dicts
            items += ['{}.{}'.format(key, vals) for vals in parse_dict(val)]
        elif isinstance(val, list):
            # use bracket notation for lists
            items += ['{}{}'.format(key, vals) for vals in parse_list(val)]
        else:
            # just use the key for everything else
            items.append(key)
    return items

def parse_list(l):
    items = []
    for idx, val in enumerate(l):
        if isinstance(val, dict):
            items += ['[{}].{}'.format(idx, vals) for vals in parse_dict(val)]
        elif isinstance(val, list):
            items += ['[{}]{}'.format(idx, vals) for vals in parse_list(val)]
        else:
            items.append('[{}]'.format(val))
    return items

Here is my result:

>>> parse(x)
['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']

EDIT

Here it is again using generators, because I liked the answer by Fj

def parse(d):
    return list(parse_dict(d))

def parse_dict(d):
    for key, val in d.iteritems():
        if isinstance(val, dict):
            # use dot notation for dicts
            for item in parse_dict(val):
                yield '{}.{}'.format(key, item)
        elif isinstance(val, list):
            # use bracket notation
            for item in parse_list(val):
                yield '{}{}'.format(key, item)
        else:
            # lowest level - just use the key
            yield key

def parse_list(l):
    for idx, val in enumerate(l):
        if isinstance(val, dict):
            for item in parse_dict(val):
                yield '[{}].{}'.format(idx, item)
        elif isinstance(val, list):
            for item in parse_list(val):
                yield '[{}]{}'.format(idx, item)
        else:
            yield '[{}]'.format(val)

The same result:

>>> parse(x)
['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']

If the top level of your object can be a list (array, in JSON terminology), your output format doesn't work: for eg ["foo", "bar"] you'd logically return ['[0]', '[1]'] , which probably isn't what you want. You can solve this by also passing in the name of the object to a slight modification of FJ's answer:

def paths(container, name):
    if isinstance(container, list):
        for i, element in enumerate(container):
            for path in paths(element, "%s[%d]" % (name, i)):
                yield path
    elif isinstance(container, dict):
        for k, element in container.items():
            for path in paths(element, "%s.%s" % (name, k)):
                yield path
    else:
        yield name

Usage:

>>> list(paths(x, "x"))
['x.a', 'x.b.c', 'x.d[0].e', 'x.d[0].f', 'x.d[1].e', 'x.d[1].f']
>>> list(paths(["foo", "bar"], "array"))
['array[0]', 'array[1]']

Python 3.3 introduces a yield from syntax which makes this a little cleaner:

def paths(container, name):
    if isinstance(container, list):
        for i, element in enumerate(container):
            yield from paths(element, "%s[%d]" % (name, i))
    elif isinstance(container, dict):
        for k, element in container.items():
            yield from paths(element, "%s.%s" % (name, k))
    else:
        yield name

Python 3.3+ only:

def f(x, parent=''):
    if isinstance(x, dict):
        if parent:
            parent += '.'
        for key in x:
            yield parent + key
            yield from f(x[key], parent + key)
    elif isinstance(x, list):
        for idx, subx in enumerate(x):
            yield from f(subx, '{}[{}]'.format(parent, idx))

Python 2.x ~ Python 3.2

def f(x, parent=''):
    if isinstance(x, dict):
        if parent:
            parent += '.'
        for key in x:
            yield parent + key
            for y in f(x[key], parent + key): yield y
    elif isinstance(x, list):
        for idx, subx in enumerate(x):
            for y in f(subx, '{}[{}]'.format(parent, idx)): yield y

>>> x = {'a': 'meow', 'b': {'c': 'asd'}, 'd':[{"e":"stuff","f":1}, {"e":"more stuff","f":2}]}
>>> list(f(x))
['d', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f', 'a', 'b', 'b.c']

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM