[英]Using python difflib to compare more than two files
我想通過將它們相互比較並突出顯示差異來概述例如多台(3+)計算機的 ldd 依賴項列表。 例如,如果我有一個如下所示的 dict:
my_ldd_outputs = {
01:"<ldd_output>",
02:"<ldd_output>",
...
09:"<ldd_output>",
10:"<ldd_output>"
}
我希望 output 看起來像
<identical line 1>
<identical line 2>
<identical line 3>
<differing line 4> (computer 01 02)
<differing line 4> (computer 04 05 06 07)
<differing line 4> (computer 08 09 10)
<identical line 5>
<identical line 6>
...
我的第一種方法涉及python difflib ,我的想法是首先獲取一個數據結構,其中來自上述my_ldd_outputs
字典的所有ldd_output
列表(只是用\n
拆分的結果)長度相同,並且任何缺失的行都存在於另一個ldd_output
string 加上一個字符串。 因此,如果兩個文件看起來像這樣:
ldd_1 = """
<identical line 1>
<identical line 2>
<differing line 3>
<identical line 4>
<extra line 5>
<identical line 6>
"""
ldd_2 = """
<identical line 1>
<identical line 2>
<differing line 3>
<identical line 4>
<identical line 6>
"""
我的目標是將這些文件存儲為
ldd_1 = """
<identical line 1>
<identical line 2>
<differing line 3>
<identical line 4>
<extra line 5>
<identical line 6>
"""
ldd_2 = """
<identical line 1>
<identical line 2>
<differing line 3>
<identical line 4>
<None>
<identical line 6>
"""
最終只需遍歷轉換文件的每一行(現在都具有相同的長度)並比較每一行的差異並忽略任何<None>
條目,以便可以連續打印差異。
我創建了一個 function ,它使用 python difflib
用<None>
字符串填充其他文件中缺少的行。 但是,我不確定如何擴展此 function 以包含任意數量的差異
def generate_diff(file_1, file_2):
#differing hashvalues from ldd can be ignored, we only care about version and path
def remove_hashvalues(input):
return re.sub("([a-zA-Z0-9_.-]{32}\/|\([a-zA-Z0-9_.-]*\))", "<>", input)
diff = [line.strip() for line in difflib.ndiff(remove_hashvalues(base).splitlines(keepends=True),remove_hashvalues(file_2).splitlines(keepends=True))]
list_1 = []
list_2 = []
i = 0
while i<len(diff):
if diff[i].strip():
if diff[i][0:2]=="- ":
lost = []
gained = []
while diff[i][0:2]=="- " or diff[i][0:2]=="? ":
if diff[i][0:2]=="- ": lost.append(diff[i][1:].strip())
i+=1
while diff[i][0:2]=="+ " or diff[i][0:2]=="? ":
if diff[i][0:2]=="+ ": gained.append(diff[i][1:].strip())
i+=1
while len(lost) != len(gained):
lost.append("<None>") if len(lost)<len(gained) else gained.insert(0,"<None>")
list_1+=lost; list_2+=gained
elif diff[i][0:2]=="+ ":
list_1.append("<None>"); list_2.append(diff[i][1:].strip())
if not diff[i][0:2]=="? ":
list_1.append(diff[i].strip()); list_2.append(diff[i].strip())
i+=1
return list_1, list_2
我還發現 這個工具可以比較多個文件,但不幸的是它不是用來比較代碼的。
編輯:我調整了@AyoubKaanich 的解決方案建議,以創建一個更簡化的版本來滿足我的需求:
from collections import defaultdict
import re
def transform(input):
input = re.sub("([a-zA-Z0-9_.-]{32}\/|\([a-zA-Z0-9_.-]*\))", "<>", input) # differing hashvalues can be ignored, we only care about version and path
return sorted(input.splitlines())
def generate_diff(outputs: dict):
mapping = defaultdict(set)
for target, output in outputs.items():
for line in transform(output):
mapping[line.strip()].add(target)
result = []
current_line = None
color_index = 0
for line in sorted(mapping.keys()):
if len(outputs) == len(mapping[line]):
if current_line: current_line = None
result.append((line))
else:
if current_line != line.split(" ")[0]:
current_line = line.split(" ")[0]
color_index+=1
result.append((f"\033[3{color_index%6+1}m{line}\033[0m",mapping[line]))
return result
唯一的缺點是這不適用於字符串在任意部分變化的差異,這是difflib
擅長檢測的。 但是,對於ldd
的情況,由於總是首先列出依賴項,因此按字母順序排序並獲取字符串的第一部分是可行的。
git 與 Git 不同。
git add ldd.tmp1 ldd.tmp2 ldd.tmp3
git diff -- | grep + > result.tmp
見https://git-scm.com/docs/git-diff
還:
純 Python 解決方案,沒有庫或額外的依賴項。
注意:由於一些假設,此解決方案有效:
from collections import defaultdict
import re
def transform(input):
# differing hashvalues from ldd can be ignored, we only care about version and path
input = re.sub("([a-zA-Z0-9_.-]{32}\/|\([a-zA-Z0-9_.-]*\))", "<>", input)
return sorted(input.splitlines())
def generate_diff(outputs: dict, common_threshold = 0):
"""
common_threshold: how many outputs need to contain line to consider it common
and mark outputs that do not have it as missing
"""
assert(common_threshold <= len(outputs))
mapping = defaultdict(set)
for target, output in outputs.items():
for line in transform(output):
mapping[line].add(target)
for line in sorted(mapping.keys()):
found = mapping[line]
if len(outputs) == len(found):
print(' ' + line)
elif len(found) >= common_threshold:
missed_str = ",".join(map(str, set(outputs.keys()) - found))
print(f'- {line} ({missed_str})')
else:
added_str = ",".join(map(str, found))
print(f'+ {line} ({added_str})')
樣品執行
my_ldd_outputs = {
'A': """
linux-vdso.so.1 (0x00007ffde4f09000)
libtinfo.so.6 => /lib/x86_64-linux-gnu/libtinfo.so.6 (0x00007fe0594f3000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fe0592cb000)
/lib64/ld-linux-x86-64.so.2 (0x00007fe059690000)
""",
'B': """
linux-vdso.so.1 (0x00007fff697b6000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f1c54045000)
/lib64/ld-linux-x86-64.so.2 (0x00007f1c54299000)
""",
'C': """
linux-vdso.so.1 (0x00007fffd61f9000)
libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 (0x00007f08a51a3000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f08a4f7b000)
/lib64/ld-linux-x86-64.so.2 (0x00007f08a5612000)
""",
'D': """
linux-vdso.so.1 (0x00007ffcf9ddd000)
libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 (0x00007fa2e381b000)
libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 (0x00007fa2e37ef000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fa2e35c7000)
libpcre2-8.so.0 => /lib/x86_64-linux-gnu/libpcre2-8.so.0 (0x00007fa2e3530000)
/lib64/ld-linux-x86-64.so.2 (0x00007fa2e3cd7000)
""",
'E': """
linux-vdso.so.1 (0x00007ffc2deab000)
libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 (0x00007f31fed91000)
libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007f31fed75000)
libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 (0x00007f31fed49000)
libgssapi_krb5.so.2 => /lib/x86_64-linux-gnu/libgssapi_krb5.so.2 (0x00007f31fecf5000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f31feacd000)
libpcre2-8.so.0 => /lib/x86_64-linux-gnu/libpcre2-8.so.0 (0x00007f31fea34000)
/lib64/ld-linux-x86-64.so.2 (0x00007f31ff2af000)
libkrb5.so.3 => /lib/x86_64-linux-gnu/libkrb5.so.3 (0x00007f31fe969000)
libk5crypto.so.3 => /lib/x86_64-linux-gnu/libk5crypto.so.3 (0x00007f31fe93a000)
libcom_err.so.2 => /lib/x86_64-linux-gnu/libcom_err.so.2 (0x00007f31fe934000)
libkrb5support.so.0 => /lib/x86_64-linux-gnu/libkrb5support.so.0 (0x00007f31fe926000)
libkeyutils.so.1 => /lib/x86_64-linux-gnu/libkeyutils.so.1 (0x00007f31fe91f000)
libresolv.so.2 => /lib/x86_64-linux-gnu/libresolv.so.2 (0x00007f31fe909000)
"""
}
generate_diff(my_ldd_outputs, 2)
輸出
/lib64/ld-linux-x86-64.so.2 <>
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 <>
+ libcom_err.so.2 => /lib/x86_64-linux-gnu/libcom_err.so.2 <> (E)
- libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 <> (B,A)
+ libgssapi_krb5.so.2 => /lib/x86_64-linux-gnu/libgssapi_krb5.so.2 <> (E)
+ libk5crypto.so.3 => /lib/x86_64-linux-gnu/libk5crypto.so.3 <> (E)
+ libkeyutils.so.1 => /lib/x86_64-linux-gnu/libkeyutils.so.1 <> (E)
+ libkrb5.so.3 => /lib/x86_64-linux-gnu/libkrb5.so.3 <> (E)
+ libkrb5support.so.0 => /lib/x86_64-linux-gnu/libkrb5support.so.0 <> (E)
- libpcre2-8.so.0 => /lib/x86_64-linux-gnu/libpcre2-8.so.0 <> (C,B,A)
+ libresolv.so.2 => /lib/x86_64-linux-gnu/libresolv.so.2 <> (E)
- libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 <> (C,B,A)
+ libtinfo.so.6 => /lib/x86_64-linux-gnu/libtinfo.so.6 <> (A)
+ libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 <> (E)
linux-vdso.so.1 <>
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.