[英]Python transform dict recursively based on predefined mappings
我有一個用例,我需要遍歷一個字典(可能有字符串,字符串和列表作為嵌套值),並根據業務團隊的預定義映射創建一個新的。 當要求是:我的第一個實現是直截了當的:
我的代碼看起來像這樣:
def recursively_transform(parent_keys='', current_key='', container=None):
container_class = container.__class__
new_container_value = None
if container is not None:
if isinstance(container, basestring):
new_container_value = do_something_and_return(parent_keys, current_key, container)
if current_key in mapping:
populate(parent_keys + current_key, new_container_value)
elif isinstance(container, collections.Mapping):
if parent_keys:
parent_keys = ''.join([parent_keys, ":"])
new_container_value = container_class(
(x, recursively_transform(parent_keys + x, x, container[x])) for x in container if key_required(parent_keys, current_key))
elif isinstance(container, collections.Iterable):
new_container_value = container_class(recursively_transform(
parent_keys + "[]", current_key, x) for x in container)
else:
raise Exception("")
return new_container_value
如您所見,在方法do_something_and_return
,使用參數parent_key
和current_key
,我對值進行了一些轉換並返回新值。 每個parent_keys
和current_key
組合的步驟在外部映射數據庫中指定。
但是,現在,要求已經改變為具有復雜的轉換(不再是1:1)。 即,在我的映射數據庫中,將指定密鑰的新路徑。 這可以是任何結構。 例如,鍵/值對必須被平坦化,反過來必須多次,有時它們之間不會有任何直接的對應關系。
例子,
key1:key2:[]:key3 => key2:[]:key3
key1:key2:[]:key4 => key2:[]:key5
這意味着輸入字典如下:
{key1:{key2:[{key3: "value3", key4: "value4"}, {key3:None}]}}
會成為
{key2:[{key3:"value3_after_transformation", key5:"value4_after_transformation"}, {key3:None}]}
:
是我的父鍵和子鍵的描述語言中的分隔符, []
推斷父鍵有一個列表作為其值。
我很困惑在這種情況下該方法應該是什么。 我能想到處理所有這些情況的唯一方法是遞歸遍歷所有鍵,然后通過檢查目標鍵的存在並適當地填充它們來動態填充另一個全局字典。 但是在處理嵌套列表時這並不容易。 此外,這聽起來不像我上面使用容器和他們的孩子一樣優雅的解決方案。 以一般方式和優雅方式執行此操作的最佳方法是什么?
謝謝!
好的,我成功了。 這通過了你給定的測試用例,但它很長。 它找到給定模板的所有可能路徑,然后根據新路徑填充新的dict
import re
def prepare_path(path):
# split path
path = re.findall(r"[^:]+?(?=\[|:|$)|\[\d*?\]", path)
# prepare path
for i, element in enumerate(path):
if element[0] == "[" and element[-1] == "]":
element = int(element[1:-1])
path[i] = element
return path
def prepare_template(template):
# split path template
template = re.findall(r"[^:]+?(?=\[|:|$)|\[\d*?\]", template)
# prepare path template
counter = 0
for i, element in enumerate(template):
if element[0] == "[" and element[-1] == "]":
if len(element) > 2:
element = int(element[1:-1])
else:
element = ("ListIndex", counter)
template[i] = element
return template
def fill_template(template, list_indexes):
out = []
for element in template:
if isinstance(element, tuple):
element = f"[{list_indexes[element[1]]}]"
out.append(element)
return ":".join(out)
def populate(result_dict, target_path, value):
target_path = prepare_path(target_path)
current = result_dict
for i, element in enumerate(target_path[:-1]):
if isinstance(element, str): # dict index
if element not in current: # create new entry
if isinstance(target_path[i + 1], str): # next is a dict
current[element] = {}
else: # next is a list
current[element] = []
elif isinstance(element, int): # list index
if element >= len(current): # create new entry
current.extend(None for _ in range(element - len(current) + 1))
if current[element] is None:
if isinstance(target_path[i + 1], str): # next is a dict
current[element] = {}
else: # next is a list
current[element] = []
current = current[element]
if isinstance(target_path[-1], int):
current.append(value)
else:
current[target_path[-1]] = value
def get_value(container, target_path):
target_path = prepare_path(target_path)
current = container
for key in target_path:
current = current[key]
return current
def transform(old_path, new_path, old_container, new_container, transform_value=lambda *args: ' '.join(args)):
value = get_value(old_container, old_path)
new_value = transform_value(old_path, new_path, value)
populate(new_container, new_path, new_value)
def get_all_paths(prepared_template, container):
if not prepared_template:
return [("",())]
key, *rest = prepared_template
if isinstance(key, tuple):
if not isinstance(container, list):
raise ValueError(container, key)
paths = [(f"[{i}]:" + path, (i,) + per) for i, child in enumerate(container) for path, per in get_all_paths(rest, child)]
elif isinstance(key, str):
if key not in container:
return []
child = container[key]
paths = [(f"{key}:" + path, per) for path, per in get_all_paths(rest, child)]
elif isinstance(key, int):
child = container[key]
paths = [(f"[{key}]:" + path, per) for path, per in get_all_paths(rest, child)]
else:
raise ValueError
return paths
def transform_all(old_template, new_template, old_container, new_container, transform_value=lambda op, np, value: value):
new_template = prepare_template(new_template)
old_template = prepare_template(old_template)
all_paths = get_all_paths(old_template, old_container)
for path, per in all_paths:
transform(path, fill_template(new_template, per), old_container, new_container, transform_value)
input_dict = {"key1": {"key2": [{"key3": "value3", "key4": "value4"}, {"key3": None}]}}
output_dict = {}
transform_all("key1:key2:[]:key3", "key2:[]:key3", input_dict, output_dict)
transform_all("key1:key2:[]:key4", "key2:[]:key5", input_dict, output_dict)
print(output_dict)
如果您有任何問題或其他情況失敗,請詢問! 這些都是您給我們帶來的有趣挑戰。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.