I have a use case where I need to traverse a dict (which could have strings, dicts and lists as nested values) and create a new one based on predefined mappings by my business team. My first implementation was straightforward when the requirements were:
My code looked something like this:
def recursively_transform(parent_keys='', current_key='', container=None):
container_class = container.__class__
new_container_value = None
if container is not None:
if isinstance(container, basestring):
new_container_value = do_something_and_return(parent_keys, current_key, container)
if current_key in mapping:
populate(parent_keys + current_key, new_container_value)
elif isinstance(container, collections.Mapping):
if parent_keys:
parent_keys = ''.join([parent_keys, ":"])
new_container_value = container_class(
(x, recursively_transform(parent_keys + x, x, container[x])) for x in container if key_required(parent_keys, current_key))
elif isinstance(container, collections.Iterable):
new_container_value = container_class(recursively_transform(
parent_keys + "[]", current_key, x) for x in container)
else:
raise Exception("")
return new_container_value
As you can see, within the method do_something_and_return
, using the parameters parent_key
and current_key
, I do some transformation on the value and return new one. The steps for each parent_keys
plus current_key
combination is specified in an external mappings database.
But, now, the requirement has changed to have complex transformations (not 1:1 anymore). ie, inside my mappings database, the new path for the key will be specified. This could be any structure. For eg, key/value pairs have to be flattened, many times the reverse has to happen, sometimes there wouldn't be any direct correspondence between them.
Examples,
key1:key2:[]:key3 => key2:[]:key3
key1:key2:[]:key4 => key2:[]:key5
which means that an input dict like this:
{key1:{key2:[{key3: "value3", key4: "value4"}, {key3:None}]}}
would become
{key2:[{key3:"value3_after_transformation", key5:"value4_after_transformation"}, {key3:None}]}
:
is a separator in my descriptive language for parent key and child key, []
infers that the parent key has a list as its value.
I am confused as to what should the approach be in this case. The only approach I can think of to handle all these cases is to traverse all the keys recursively and then populate another global dict on the fly by checking for the existence of target keys and populating them appropriately. But this is not easy when it comes to dealing with nested lists. Also, this doesn't sound an elegant solution like I have above using containers and their children. What is the best approach to do this in a generalistic way and in an elegant manner?
Thank you!
Ok, I succeed. This passes your given testcases, but it is very long. It finds all possible path for a given template and then populates a new dict according to the new paths
import re
def prepare_path(path):
# split path
path = re.findall(r"[^:]+?(?=\[|:|$)|\[\d*?\]", path)
# prepare path
for i, element in enumerate(path):
if element[0] == "[" and element[-1] == "]":
element = int(element[1:-1])
path[i] = element
return path
def prepare_template(template):
# split path template
template = re.findall(r"[^:]+?(?=\[|:|$)|\[\d*?\]", template)
# prepare path template
counter = 0
for i, element in enumerate(template):
if element[0] == "[" and element[-1] == "]":
if len(element) > 2:
element = int(element[1:-1])
else:
element = ("ListIndex", counter)
template[i] = element
return template
def fill_template(template, list_indexes):
out = []
for element in template:
if isinstance(element, tuple):
element = f"[{list_indexes[element[1]]}]"
out.append(element)
return ":".join(out)
def populate(result_dict, target_path, value):
target_path = prepare_path(target_path)
current = result_dict
for i, element in enumerate(target_path[:-1]):
if isinstance(element, str): # dict index
if element not in current: # create new entry
if isinstance(target_path[i + 1], str): # next is a dict
current[element] = {}
else: # next is a list
current[element] = []
elif isinstance(element, int): # list index
if element >= len(current): # create new entry
current.extend(None for _ in range(element - len(current) + 1))
if current[element] is None:
if isinstance(target_path[i + 1], str): # next is a dict
current[element] = {}
else: # next is a list
current[element] = []
current = current[element]
if isinstance(target_path[-1], int):
current.append(value)
else:
current[target_path[-1]] = value
def get_value(container, target_path):
target_path = prepare_path(target_path)
current = container
for key in target_path:
current = current[key]
return current
def transform(old_path, new_path, old_container, new_container, transform_value=lambda *args: ' '.join(args)):
value = get_value(old_container, old_path)
new_value = transform_value(old_path, new_path, value)
populate(new_container, new_path, new_value)
def get_all_paths(prepared_template, container):
if not prepared_template:
return [("",())]
key, *rest = prepared_template
if isinstance(key, tuple):
if not isinstance(container, list):
raise ValueError(container, key)
paths = [(f"[{i}]:" + path, (i,) + per) for i, child in enumerate(container) for path, per in get_all_paths(rest, child)]
elif isinstance(key, str):
if key not in container:
return []
child = container[key]
paths = [(f"{key}:" + path, per) for path, per in get_all_paths(rest, child)]
elif isinstance(key, int):
child = container[key]
paths = [(f"[{key}]:" + path, per) for path, per in get_all_paths(rest, child)]
else:
raise ValueError
return paths
def transform_all(old_template, new_template, old_container, new_container, transform_value=lambda op, np, value: value):
new_template = prepare_template(new_template)
old_template = prepare_template(old_template)
all_paths = get_all_paths(old_template, old_container)
for path, per in all_paths:
transform(path, fill_template(new_template, per), old_container, new_container, transform_value)
input_dict = {"key1": {"key2": [{"key3": "value3", "key4": "value4"}, {"key3": None}]}}
output_dict = {}
transform_all("key1:key2:[]:key3", "key2:[]:key3", input_dict, output_dict)
transform_all("key1:key2:[]:key4", "key2:[]:key5", input_dict, output_dict)
print(output_dict)
If you have any questions or other situations that fail, ask! These are fun challenges you are giving us.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.