[英]Comparing Python dictionaries and nested dictionaries
我知道那里有幾個類似的問題,但我的問題對我來說完全不同且困難。 我有兩本詞典:
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
即d1
有鍵'a'
,而d2
有鍵'a'
和'newa'
(換句話說, d1
是我的舊字典, d2
是我的新字典)。
我想迭代這些字典,如果鍵是相同的,檢查它的值(嵌套字典),例如當我在d2
找到鍵'a'
時,我將檢查是否有'b'
,如果是,檢查值'cs'
(從10
更改為30
),如果此值更改,我想打印它。
另一種情況是,我想從d2
獲取密鑰'newa'
作為新添加的密鑰。
因此,在遍歷這 2 個 dicts 之后,這是預期的輸出:
"d2" has new key "newa"
Value of "cs" is changed from 10 to 30 of key "b" which is of key "a"
我有以下代碼,我正在嘗試許多無法正常工作的循環,但也不是一個好的選擇,因此我正在尋找是否可以通過遞歸代碼獲得預期的輸出。
for k, v in d1.iteritems():
for k1, v1 in d2.iteritems():
if k is k1:
print k
for k2 in v:
for k3 in v1:
if k2 is k3:
print k2, "sub key matched"
else:
print "sorry no match found"
使用遞歸比較 2 個字典:
為 python 3 編輯(也適用於 python 2):
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
def findDiff(d1, d2, path=""):
for k in d1:
if k in d2:
if type(d1[k]) is dict:
findDiff(d1[k],d2[k], "%s -> %s" % (path, k) if path else k)
if d1[k] != d2[k]:
result = [ "%s: " % path, " - %s : %s" % (k, d1[k]) , " + %s : %s" % (k, d2[k])]
print("\n".join(result))
else:
print ("%s%s as key not in d2\n" % ("%s: " % path if path else "", k))
print("comparing d1 to d2:")
findDiff(d1,d2)
print("comparing d2 to d1:")
findDiff(d2,d1)
Python 2 舊答案:
def findDiff(d1, d2, path=""):
for k in d1:
if (k not in d2):
print (path, ":")
print (k + " as key not in d2", "\n")
else:
if type(d1[k]) is dict:
if path == "":
path = k
else:
path = path + "->" + k
findDiff(d1[k],d2[k], path)
else:
if d1[k] != d2[k]:
print (path, ":")
print (" - ", k," : ", d1[k])
print (" + ", k," : ", d2[k])
輸出:
comparing d1 to d2:
a -> b:
- cs : 10
+ cs : 30
comparing d2 to d1:
a -> b:
- cs : 30
+ cs : 10
修改 user3 的代碼以使其更好
d1= {'as': 1, 'a':
{'b':
{'cs':10,
'qqq': {'qwe':1}
},
'd': {'csd':30}
}
}
d2= {'as': 3, 'a':
{'b':
{'cs':30,
'qqq': 123
},
'd':{'csd':20}
},
'newa':
{'q':
{'cs':50}
}
}
def compare_dictionaries(dict_1, dict_2, dict_1_name, dict_2_name, path=""):
"""Compare two dictionaries recursively to find non mathcing elements
Args:
dict_1: dictionary 1
dict_2: dictionary 2
Returns:
"""
err = ''
key_err = ''
value_err = ''
old_path = path
for k in dict_1.keys():
path = old_path + "[%s]" % k
if not dict_2.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_2_name)
else:
if isinstance(dict_1[k], dict) and isinstance(dict_2[k], dict):
err += compare_dictionaries(dict_1[k],dict_2[k],'d1','d2', path)
else:
if dict_1[k] != dict_2[k]:
value_err += "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (dict_1_name, path, dict_1[k], dict_2_name, path, dict_2[k])
for k in dict_2.keys():
path = old_path + "[%s]" % k
if not dict_1.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_1_name)
return key_err + value_err + err
a = compare_dictionaries(d1,d2,'d1','d2')
print a
輸出:
Key d2[newa] not in d1
Value of d1[as] (1) not same as d2[as] (3)
Value of d1[a][b][cs] (10) not same as d2[a][b][cs] (30)
Value of d1[a][b][qqq] ({'qwe': 1}) not same as d2[a][b][qqq] (123)
Value of d1[a][d][csd] (30) not same as d2[a][d][csd] (20)
這應該提供您需要的有用功能:
對於 Python 2.7
def isDict(obj):
return obj.__class__.__name__ == 'dict'
def containsKeyRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey or (isDict(vDict[curKey]) and containsKeyRec(vKey, vDict[curKey])):
return True
return False
def getValueRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey:
return vDict[curKey]
elif isDict(vDict[curKey]) and getValueRec(vKey, vDict[curKey]):
return containsKeyRec(vKey, vDict[curKey])
return None
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
for key in d1:
if containsKeyRec(key, d2):
print "dict d2 contains key: " + key
d2Value = getValueRec(key, d2)
if d1[key] == d2Value:
print "values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "values are not equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "dict d2 does not contain key: " + key
對於 Python 3(或更高版本):
def id_dict(obj):
return obj.__class__.__name__ == 'dict'
def contains_key_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key or (id_dict(v_dict[curKey]) and contains_key_rec(v_key, v_dict[curKey])):
return True
return False
def get_value_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key:
return v_dict[curKey]
elif id_dict(v_dict[curKey]) and get_value_rec(v_key, v_dict[curKey]):
return contains_key_rec(v_key, v_dict[curKey])
return None
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
for key in d1:
if contains_key_rec(key, d2):
d2_value = get_value_rec(key, d2)
if d1[key] == d2_value:
print("values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2_value))
pass
else:
print("values are not equal:\n"
"list1: " + str(d1[key]) + "\n" +
"list2: " + str(d2_value))
else:
print("dict d2 does not contain key: " + key)
為什么不使用 deepdiff 庫。
見: https : //github.com/seperman/deepdiff
>>> from deepdiff import DeepDiff
>>> t1 = {1:1, 3:3, 4:4}
>>> t2 = {1:1, 3:3, 5:5, 6:6}
>>> ddiff = DeepDiff(t1, t2)
>>> print(ddiff)
{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}}
當然它更強大,查看文檔了解更多。
對於 python 3 或更高版本,用於比較任何數據的代碼。
def do_compare(data1, data2, data1_name, data2_name, path=""):
if operator.eq(data1, data2) and not path:
log.info("Both data have same content")
else:
if isinstance(data1, dict) and isinstance(data2, dict):
compare_dict(data1, data2, data1_name, data2_name, path)
elif isinstance(data1, list) and isinstance(data2, list):
compare_list(data1, data2, data1_name, data2_name, path)
else:
if data1 != data2:
value_err = "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1, data2_name, path, data2)
print (value_err)
# findDiff(data1, data2)
def compare_dict(data1, data2, data1_name, data2_name, path):
old_path = path
for k in data1.keys():
path = old_path + "[%s]" % k
if k not in data2:
key_err = "Key %s%s not in %s\n" % (data1_name, path, data2_name)
print (key_err)
else:
do_compare(data1[k], data2[k], data1_name, data2_name, path)
for k in data2.keys():
path = old_path + "[%s]" % k
if k not in data1:
key_err = "Key %s%s not in %s\n" % (data2_name, path, data1_name)
print (key_err)
def compare_list(data1, data2, data1_name, data2_name, path):
data1_length = len(data1)
data2_length = len(data2)
old_path = path
if data1_length != data2_length:
value_err = "No: of items in %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1_length, data2_name, path, data2_length)
print (value_err)
for index, item in enumerate(data1):
path = old_path + "[%s]" % index
try:
do_compare(data1[index], data2[index], data1_name, data2_name, path)
except IndexError:
pass
添加一個添加更多功能的版本:
如果您如下定義deep_diff
函數並在@rkatkam 的示例中調用它,您將得到:
>>> deep_diff(d1, d2)
{'newa': (None, {'q': {'cs': 50}}), 'a': {'b': {'cs': (10, 30)}}}
下面是函數定義:
def deep_diff(x, y, parent_key=None, exclude_keys=[], epsilon_keys=[]):
"""
Take the deep diff of JSON-like dictionaries
No warranties when keys, or values are None
"""
# pylint: disable=unidiomatic-typecheck
EPSILON = 0.5
rho = 1 - EPSILON
if x == y:
return None
if parent_key in epsilon_keys:
xfl, yfl = float_or_None(x), float_or_None(y)
if xfl and yfl and xfl * yfl >= 0 and rho * xfl <= yfl and rho * yfl <= xfl:
return None
if not (isinstance(x, (list, dict)) and (isinstance(x, type(y)) or isinstance(y, type(x)))):
return x, y
if isinstance(x, dict):
d = type(x)() # handles OrderedDict's as well
for k in x.keys() ^ y.keys():
if k in exclude_keys:
continue
if k in x:
d[k] = (deepcopy(x[k]), None)
else:
d[k] = (None, deepcopy(y[k]))
for k in x.keys() & y.keys():
if k in exclude_keys:
continue
next_d = deep_diff(
x[k], y[k], parent_key=k, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if next_d is None:
continue
d[k] = next_d
return d if d else None
# assume a list:
d = [None] * max(len(x), len(y))
flipped = False
if len(x) > len(y):
flipped = True
x, y = y, x
for i, x_val in enumerate(x):
d[i] = (
deep_diff(
y[i], x_val, parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if flipped
else deep_diff(
x_val, y[i], parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
)
for i in range(len(x), len(y)):
d[i] = (y[i], None) if flipped else (None, y[i])
return None if all(map(lambda x: x is None, d)) else d
添加非遞歸解決方案。
# Non Recursively traverses through a large nested dictionary
# Uses a queue of dicts_to_process to keep track of what needs to be traversed rather than using recursion.
# Slightly more complex than the recursive version, but arguably better as there is no risk of stack overflow from
# too many levels of recursion
def get_dict_diff_non_recursive(dict1, dict2):
dicts_to_process=[(dict1,dict2,"")]
while dicts_to_process:
d1,d2,current_path = dicts_to_process.pop()
for key in d1.keys():
current_path = os.path.join(current_path, f"{key}")
#print(f"searching path {current_path}")
if key not in d2 or d1[key] != d2[key]:
print(f"difference at {current_path}")
if type(d1[key]) == dict:
dicts_to_process.append((d1[key],d2[key],current_path))
elif type(d1[key]) == list and d1[key] and type(d1[key][0]) == dict:
for i in range(len(d1[key])):
dicts_to_process.append((d1[key][i], d2[key][i],current_path))
我不喜歡我在許多線程中找到的許多答案......他們中的很多人建議使用非常強大的deepdiff
不要誤會我,但它只是沒有給我我想要的輸出,而不僅僅是一個字符串的差異,或者一個新構建的看起來很奇怪的字典,其中包含從原始鍵的嵌套鍵中收集的新鍵......但實際上返回一個帶有原始鍵和增量值的真實字典。
我的用例是發送較小的有效負載,如果 MQTT 網絡沒有區別,則不發送。
我發現的解決方案部分從此鏈接中被盜,但對其進行了修改以僅提供增量。 然后我遞歸地解析它,如果它嵌套來構建最終的 diff 字典,則再次調用diff_dict()
。 結果證明它比那里的許多示例簡單得多。 僅供參考,它不關心排序。
我的解決方案:
def diff_dict(d1, d2):
d1_keys = set(d1.keys())
d2_keys = set(d2.keys())
shared_keys = d1_keys.intersection(d2_keys)
shared_deltas = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
added_keys = d2_keys - d1_keys
added_deltas = {o: (None, d2[o]) for o in added_keys}
deltas = {**shared_deltas, **added_deltas}
return parse_deltas(deltas)
def parse_deltas(deltas: dict):
res = {}
for k, v in deltas.items():
if isinstance(v[0], dict):
tmp = diff_dict(v[0], v[1])
if tmp:
res[k] = tmp
else:
res[k] = v[1]
return res
例子:
original = {
'int': 1,
'float': 0.1000,
'string': 'some string',
'bool': True,
'nested1': {
'int': 2,
'float': 0.2000,
'string': 'some string2',
'bool': True,
'nested2': {
'string': 'some string3'
}
}
}
new = {
'int': 2,
'string': 'some string',
'nested1': {
'int': 2,
'float': 0.5000,
'string': 'new string',
'bool': False,
'nested2': {
'string': 'new string nested 2 time'
}
},
'test_added': 'added_val'
}
print(diff_dict(original, new))
輸出:
{'int': 2, 'nested1': {'string': 'new string', 'nested2': {'string': 'new string nested 2 time'}, 'bool': False, 'float': 0.5}, 'test_added': 'added_val'}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.