[英]Find the longest repeated subsequence of max length N in Python
我有一些表示事件順序的字符串:
s1 = 'A->B->E->D->A->C->B->D'
s2= 'A->B->C->A->B'
s3 = 'A->B->A
在每個字符串中,我想找到最大長度為 N 的所有重復模式。
import itertools
def find_all_comb(event_list,max_events):
all_combs = []
for j in range(1,max_events+1):
all_combs.extend(list(set(['->'.join(x) for x in list(itertools.combinations(event_list,j))])))
return all_combs
def find_repeating_patterns(x):
split_events = x.split("->")
all_combs = find_all_comb(split_events,int(len(x)/2))
repeating_patterns = []
for comb in all_combs:
c_split_event = [p for p in split_events if p in comb]
if '->'.join(c_split_event).count(comb) > 1:
repeating_patterns.extend([comb])
output_list = []
longest_repeating_patterns = [s for s in repeating_patterns if any(set(s).issuperset(set(i)) and len(s) > len(i) for i in repeating_patterns)]
while output_list != longest_repeating_patterns:
if longest_repeating_patterns == []:
break
output_list = longest_repeating_patterns.copy()
longest_repeating_patterns = [s for s in longest_repeating_patterns if any(set(s).issuperset(set(i)) and len(s) > len(i) for i in longest_repeating_patterns)]
return output_list
對於s1
,它返回正確的模式[A,B,D]
,對於s2
,它返回[A,B]
。 對於s3
,它應該返回[A]
,但它返回一個空列表。 這是因為這一行:
[s for s in repeating_patterns if any(set(s).issuperset(set(i)) and len(s) > len(i) for i in repeating_patterns)]
這不允許len(s) > len(i)
。
我如何能夠在這里捕獲這兩種情況?
這是一個更簡單、更有效的解決方案:
def longest_subsequence(events, limit, sep='->'):
events = list(enumerate(events.split(sep)))
output = {}
seen = {}
for n in range(limit, 0, -1):
for combination in itertools.combinations(events, n):
indexes, key = zip(*combination)
if key in seen:
if key not in output and seen[key].isdisjoint(indexes):
output[key] = sep.join(key)
else:
seen[key] = set(indexes)
if output:
break
return list(output.values())
這首先查看最長的匹配項,如果找到則提前終止。 它通過保存最后匹配的索引並將它們與當前候選進行比較來消除自重疊重復子序列。
演示:
samples = (
'A->B->E->D->A->C->B->D',
'A->B->C->A->B',
'A->B->A',
'A->B->E->D->A->C->B->E->D->A',
'B->B->B->C->C',
'A->B->A->B->C->C',
'A',
'',
)
for index, sample in enumerate(samples, 1):
result = longest_subsequence(sample, 4)
print('(%s) %r\n%s\n' % (index, sample, result))
輸出:
(1) 'A->B->E->D->A->C->B->D'
['A->B->D']
(2) 'A->B->C->A->B'
['A->B']
(3) 'A->B->A'
['A']
(4) 'A->B->E->D->A->C->B->E->D->A'
['A->B->E->D', 'B->E->D->A']
(5) 'B->B->B->C->C'
['B->C']
(6) 'A->B->A->B->C->C'
['A->B->C']
(7) 'A'
[]
(8) ''
[]
添加一個額外的行,將不是子序列的任何內容添加到output_list
中的任何序列是解決此問題的一種方法。
import itertools
def find_all_comb(event_list,max_events):
all_combs = []
for j in range(1,max_events+1):
all_combs.extend(list(set(['->'.join(x) for x in list(itertools.combinations(event_list,j))])))
return all_combs
def find_repeating_patterns(x):
split_events = x.split("->")
all_combs = find_all_comb(split_events,int(len(x)/2))
repeating_patterns = []
for comb in all_combs:
c_split_event = [p for p in split_events if p in comb]
if '->'.join(c_split_event).count(comb) > 1:
repeating_patterns.extend([comb])
output_list = []
longest_repeating_patterns = [s for s in repeating_patterns if any(set(s).issuperset(set(i)) and len(s) > len(i) for i in repeating_patterns)]
while output_list != longest_repeating_patterns:
if longest_repeating_patterns == []:
break
output_list = longest_repeating_patterns.copy()
longest_repeating_patterns = [s for s in longest_repeating_patterns if any(set(s).issuperset(set(i)) and len(s) > len(i) for i in longest_repeating_patterns)]
output_list.extend([s for s in repeating_patterns if not any(set(i).issuperset(set(s)) for i in output_list)]) <--- ADDED LINE FOR SOLUTION
return output_list
s1 = A->B->E->D->A->C->B->D
s2 = A->B->C->A->B
s3 = A->B->A
print(find_repeating_patterns(s1))
output: [A->B->D]
print(find_repeating_patterns(s2))
output: [A->B]
print(find_repeating_patterns(s3))
output: [A]
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.