I know there are several similar questions out there, but my question is quite different and difficult for me. I have two dictionaries:
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
ie d1
has key 'a'
, and d2
has keys 'a'
and 'newa'
(in other words d1
is my old dict and d2
is my new dict).
I want to iterate over these dictionaries such that, if the key is same check for its value (nested dict), eg when I find key 'a'
in d2
, I will check whether there is 'b'
, if yes check value of 'cs'
(changed from 10
to 30
), if this value is changed I want to print it.
Another case is, I want to get key 'newa'
from d2
as the newly added key.
Hence, after iterating through these 2 dicts, this is the expected output:
"d2" has new key "newa"
Value of "cs" is changed from 10 to 30 of key "b" which is of key "a"
I have the following code with me, I am trying with many loops which are not working though, but is not a good option too, hence I am looking to find whether I can get expected output with a recursive piece of code.
for k, v in d1.iteritems():
for k1, v1 in d2.iteritems():
if k is k1:
print k
for k2 in v:
for k3 in v1:
if k2 is k3:
print k2, "sub key matched"
else:
print "sorry no match found"
comparing 2 dictionaries using recursion:
Edited for python 3 (works for python 2 as well):
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
def findDiff(d1, d2, path=""):
for k in d1:
if k in d2:
if type(d1[k]) is dict:
findDiff(d1[k],d2[k], "%s -> %s" % (path, k) if path else k)
if d1[k] != d2[k]:
result = [ "%s: " % path, " - %s : %s" % (k, d1[k]) , " + %s : %s" % (k, d2[k])]
print("\n".join(result))
else:
print ("%s%s as key not in d2\n" % ("%s: " % path if path else "", k))
print("comparing d1 to d2:")
findDiff(d1,d2)
print("comparing d2 to d1:")
findDiff(d2,d1)
Python 2 old answer:
def findDiff(d1, d2, path=""):
for k in d1:
if (k not in d2):
print (path, ":")
print (k + " as key not in d2", "\n")
else:
if type(d1[k]) is dict:
if path == "":
path = k
else:
path = path + "->" + k
findDiff(d1[k],d2[k], path)
else:
if d1[k] != d2[k]:
print (path, ":")
print (" - ", k," : ", d1[k])
print (" + ", k," : ", d2[k])
Output:
comparing d1 to d2:
a -> b:
- cs : 10
+ cs : 30
comparing d2 to d1:
a -> b:
- cs : 30
+ cs : 10
Modified user3's code to make it even better
d1= {'as': 1, 'a':
{'b':
{'cs':10,
'qqq': {'qwe':1}
},
'd': {'csd':30}
}
}
d2= {'as': 3, 'a':
{'b':
{'cs':30,
'qqq': 123
},
'd':{'csd':20}
},
'newa':
{'q':
{'cs':50}
}
}
def compare_dictionaries(dict_1, dict_2, dict_1_name, dict_2_name, path=""):
"""Compare two dictionaries recursively to find non mathcing elements
Args:
dict_1: dictionary 1
dict_2: dictionary 2
Returns:
"""
err = ''
key_err = ''
value_err = ''
old_path = path
for k in dict_1.keys():
path = old_path + "[%s]" % k
if not dict_2.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_2_name)
else:
if isinstance(dict_1[k], dict) and isinstance(dict_2[k], dict):
err += compare_dictionaries(dict_1[k],dict_2[k],'d1','d2', path)
else:
if dict_1[k] != dict_2[k]:
value_err += "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (dict_1_name, path, dict_1[k], dict_2_name, path, dict_2[k])
for k in dict_2.keys():
path = old_path + "[%s]" % k
if not dict_1.has_key(k):
key_err += "Key %s%s not in %s\n" % (dict_2_name, path, dict_1_name)
return key_err + value_err + err
a = compare_dictionaries(d1,d2,'d1','d2')
print a
Output:
Key d2[newa] not in d1
Value of d1[as] (1) not same as d2[as] (3)
Value of d1[a][b][cs] (10) not same as d2[a][b][cs] (30)
Value of d1[a][b][qqq] ({'qwe': 1}) not same as d2[a][b][qqq] (123)
Value of d1[a][d][csd] (30) not same as d2[a][d][csd] (20)
This should provide what you need with helpful functions:
For Python 2.7
def isDict(obj):
return obj.__class__.__name__ == 'dict'
def containsKeyRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey or (isDict(vDict[curKey]) and containsKeyRec(vKey, vDict[curKey])):
return True
return False
def getValueRec(vKey, vDict):
for curKey in vDict:
if curKey == vKey:
return vDict[curKey]
elif isDict(vDict[curKey]) and getValueRec(vKey, vDict[curKey]):
return containsKeyRec(vKey, vDict[curKey])
return None
d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}
for key in d1:
if containsKeyRec(key, d2):
print "dict d2 contains key: " + key
d2Value = getValueRec(key, d2)
if d1[key] == d2Value:
print "values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "values are not equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
else:
print "dict d2 does not contain key: " + key
For Python 3 (or higher):
def id_dict(obj):
return obj.__class__.__name__ == 'dict'
def contains_key_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key or (id_dict(v_dict[curKey]) and contains_key_rec(v_key, v_dict[curKey])):
return True
return False
def get_value_rec(v_key, v_dict):
for curKey in v_dict:
if curKey == v_key:
return v_dict[curKey]
elif id_dict(v_dict[curKey]) and get_value_rec(v_key, v_dict[curKey]):
return contains_key_rec(v_key, v_dict[curKey])
return None
d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}
for key in d1:
if contains_key_rec(key, d2):
d2_value = get_value_rec(key, d2)
if d1[key] == d2_value:
print("values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2_value))
pass
else:
print("values are not equal:\n"
"list1: " + str(d1[key]) + "\n" +
"list2: " + str(d2_value))
else:
print("dict d2 does not contain key: " + key)
why not use deepdiff library.
see it at: https://github.com/seperman/deepdiff
>>> from deepdiff import DeepDiff
>>> t1 = {1:1, 3:3, 4:4}
>>> t2 = {1:1, 3:3, 5:5, 6:6}
>>> ddiff = DeepDiff(t1, t2)
>>> print(ddiff)
{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}}
of course it is more powerful, check the doc for more.
For python 3 or higher, Code for comparing any data.
def do_compare(data1, data2, data1_name, data2_name, path=""):
if operator.eq(data1, data2) and not path:
log.info("Both data have same content")
else:
if isinstance(data1, dict) and isinstance(data2, dict):
compare_dict(data1, data2, data1_name, data2_name, path)
elif isinstance(data1, list) and isinstance(data2, list):
compare_list(data1, data2, data1_name, data2_name, path)
else:
if data1 != data2:
value_err = "Value of %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1, data2_name, path, data2)
print (value_err)
# findDiff(data1, data2)
def compare_dict(data1, data2, data1_name, data2_name, path):
old_path = path
for k in data1.keys():
path = old_path + "[%s]" % k
if k not in data2:
key_err = "Key %s%s not in %s\n" % (data1_name, path, data2_name)
print (key_err)
else:
do_compare(data1[k], data2[k], data1_name, data2_name, path)
for k in data2.keys():
path = old_path + "[%s]" % k
if k not in data1:
key_err = "Key %s%s not in %s\n" % (data2_name, path, data1_name)
print (key_err)
def compare_list(data1, data2, data1_name, data2_name, path):
data1_length = len(data1)
data2_length = len(data2)
old_path = path
if data1_length != data2_length:
value_err = "No: of items in %s%s (%s) not same as %s%s (%s)\n"\
% (data1_name, path, data1_length, data2_name, path, data2_length)
print (value_err)
for index, item in enumerate(data1):
path = old_path + "[%s]" % index
try:
do_compare(data1[index], data2[index], data1_name, data2_name, path)
except IndexError:
pass
Adding a version that adds some more capabilities:
If you define the deep_diff
function as below and call it on @rkatkam's example you'll get:
>>> deep_diff(d1, d2)
{'newa': (None, {'q': {'cs': 50}}), 'a': {'b': {'cs': (10, 30)}}}
Here's the function definition:
def deep_diff(x, y, parent_key=None, exclude_keys=[], epsilon_keys=[]):
"""
Take the deep diff of JSON-like dictionaries
No warranties when keys, or values are None
"""
# pylint: disable=unidiomatic-typecheck
EPSILON = 0.5
rho = 1 - EPSILON
if x == y:
return None
if parent_key in epsilon_keys:
xfl, yfl = float_or_None(x), float_or_None(y)
if xfl and yfl and xfl * yfl >= 0 and rho * xfl <= yfl and rho * yfl <= xfl:
return None
if not (isinstance(x, (list, dict)) and (isinstance(x, type(y)) or isinstance(y, type(x)))):
return x, y
if isinstance(x, dict):
d = type(x)() # handles OrderedDict's as well
for k in x.keys() ^ y.keys():
if k in exclude_keys:
continue
if k in x:
d[k] = (deepcopy(x[k]), None)
else:
d[k] = (None, deepcopy(y[k]))
for k in x.keys() & y.keys():
if k in exclude_keys:
continue
next_d = deep_diff(
x[k], y[k], parent_key=k, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if next_d is None:
continue
d[k] = next_d
return d if d else None
# assume a list:
d = [None] * max(len(x), len(y))
flipped = False
if len(x) > len(y):
flipped = True
x, y = y, x
for i, x_val in enumerate(x):
d[i] = (
deep_diff(
y[i], x_val, parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
if flipped
else deep_diff(
x_val, y[i], parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
)
)
for i in range(len(x), len(y)):
d[i] = (y[i], None) if flipped else (None, y[i])
return None if all(map(lambda x: x is None, d)) else d
Adding a non-recursive solution.
# Non Recursively traverses through a large nested dictionary
# Uses a queue of dicts_to_process to keep track of what needs to be traversed rather than using recursion.
# Slightly more complex than the recursive version, but arguably better as there is no risk of stack overflow from
# too many levels of recursion
def get_dict_diff_non_recursive(dict1, dict2):
dicts_to_process=[(dict1,dict2,"")]
while dicts_to_process:
d1,d2,current_path = dicts_to_process.pop()
for key in d1.keys():
current_path = os.path.join(current_path, f"{key}")
#print(f"searching path {current_path}")
if key not in d2 or d1[key] != d2[key]:
print(f"difference at {current_path}")
if type(d1[key]) == dict:
dicts_to_process.append((d1[key],d2[key],current_path))
elif type(d1[key]) == list and d1[key] and type(d1[key][0]) == dict:
for i in range(len(d1[key])):
dicts_to_process.append((d1[key][i], d2[key][i],current_path))
I have not liked many of the answers I have found across many threads... A lot of them recommend using deepdiff
which is very powerful dont get me wrong but it just does not give me the output I was desiring which is not just a string of the diffs, or a newly built strange-looking dictionary with new keys collected from the nested keys of the original... but actually return a real dictionary with the original keys and delta values.
My use case for this is to send smaller payloads or none if there is no difference over an MQTT network.
The soluton I found is partially stolen from this link , however modified it to just give me the deltas. Then I recursively parse it, calling diff_dict()
again if its nested to build the final diff dictionary. It turned out to be much simpler than many examples out there. FYI it does not care about sorting.
My Solution:
def diff_dict(d1, d2):
d1_keys = set(d1.keys())
d2_keys = set(d2.keys())
shared_keys = d1_keys.intersection(d2_keys)
shared_deltas = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
added_keys = d2_keys - d1_keys
added_deltas = {o: (None, d2[o]) for o in added_keys}
deltas = {**shared_deltas, **added_deltas}
return parse_deltas(deltas)
def parse_deltas(deltas: dict):
res = {}
for k, v in deltas.items():
if isinstance(v[0], dict):
tmp = diff_dict(v[0], v[1])
if tmp:
res[k] = tmp
else:
res[k] = v[1]
return res
Example:
original = {
'int': 1,
'float': 0.1000,
'string': 'some string',
'bool': True,
'nested1': {
'int': 2,
'float': 0.2000,
'string': 'some string2',
'bool': True,
'nested2': {
'string': 'some string3'
}
}
}
new = {
'int': 2,
'string': 'some string',
'nested1': {
'int': 2,
'float': 0.5000,
'string': 'new string',
'bool': False,
'nested2': {
'string': 'new string nested 2 time'
}
},
'test_added': 'added_val'
}
print(diff_dict(original, new))
Output:
{'int': 2, 'nested1': {'string': 'new string', 'nested2': {'string': 'new string nested 2 time'}, 'bool': False, 'float': 0.5}, 'test_added': 'added_val'}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.