[英]I want to find the most often sequence of symbols from the list of lists
我想从 [lists] 列表中找到最常用的符号序列
CATEGORIES = ["0","1","2","3","4","5","6","7","8","9",
"A","B","C","D","E","F","G","H","I","J",
"K","L","M","N","O","P","R","S","T","U",
"V","W","X","Y","Z"]
KR8877J = [[0.002,0.006,0.004,0.045,0.002,0.017,0.006,0.077,0.001,0.035,0.042,0.005,0.004,0.039,0.001,0.002,0.001,0.008,0.058,0.352,0.002,0.007,0.017,0.004,0.007,0.007,0.007,0.004,0.005,0.009,0.089,0.036,0.053,0.041,0.004],[0.003,0.007,0.005,0.075,0.001,0.020,0.006,0.044,0.002,0.035,0.026,0.004,0.004,0.033,0.001,0.001,0.003,0.008,0.049,0.360,0.002,0.007,0.021,0.005,0.009,0.003,0.008,0.007,0.003,0.014,0.092,0.048,0.058,0.031,0.004],[0.002,0.000,0.025,0.012,0.006,0.002,0.001,0.627,0.006,0.021,0.022,0.008,0.004,0.006,0.004,0.033,0.000,0.006,0.011,0.009,0.002,0.002,0.009,0.000,0.002,0.040,0.007,0.005,0.015,0.000,0.035,0.001,0.008,0.015,0.053],[0.056,0.008,0.023,0.038,0.015,0.007,0.050,0.006,0.412,0.004,0.005,0.027,0.011,0.005,0.021,0.007,0.073,0.024,0.012,0.005,0.013,0.005,0.027,0.003,0.015,0.001,0.005,0.074,0.002,0.022,0.005,0.011,0.002,0.001,0.006],[0.025,0.011,0.025,0.034,0.018,0.027,0.090,0.008,0.258,0.006,0.007,0.026,0.016,0.008,0.026,0.011,0.079,0.030,0.026,0.008,0.018,0.011,0.033,0.003,0.016,0.001,0.003,0.106,0.004,0.021,0.012,0.013,0.003,0.005,0.014],[0.048,0.027,0.019,0.002,0.028,0.002,0.008,0.017,0.041,0.014,0.012,0.022,0.031,0.005,0.045,0.100,0.004,0.031,0.033,0.002,0.029,0.006,0.021,0.032,0.008,0.038,0.317,0.007,0.017,0.004,0.018,0.005,0.003,0.004,0.002],[0.013,0.002,0.002,0.000,0.164,0.001,0.060,0.004,0.006,0.002,0.018,0.003,0.035,0.002,0.008,0.008,0.001,0.008,0.028,0.005,0.383,0.013,0.063,0.010,0.004,0.002,0.014,0.016,0.002,0.005,0.048,0.011,0.028,0.017,0.012]]
KR8877J_1 = [[0.004,some data]]
KR8877J_2 = [[0.002,some data]]
KR8877J_3 = [[somedata]
KR8877J_4 = [[0.006,some data,0.008]]
KR8877J_5 = [[some data]]
KR8877J_6 = [[some data]]
def readable(x):
tag = []
for lst in x:
index = max(enumerate(lst), key=lambda pair: pair[1])[0]
tag.append(CATEGORIES[index])
tag.reverse()
str = tag
print(str)
#print(tag)
for i in (KR8877J, KR8877J_1,KR8877J_2,KR8877J_3,KR8877J_4,KR8877J_5,KR8877J_6):
readable(i)
def compare_bitwise(a,b):
a_set = set(a)
b_set = set(b)
if (a_set & b_set):
return True
else:
return False
for i in (KR8877J, KR8877J_1,KR8877J_2,KR8877J_3,KR8877J_4,KR8877J_5,KR8877J_6):
print(compare_bitwise(i, i+=1))
迭代的问题在这里: print(compare_bitwise(i, i+=1))
我只在第一个列表中给出了一个数据示例,因为它们都是同一个专家,具有 output ['K', 'R', '8', '8', 'J', '7', 'J'] 代替 ['K', 'R', '8', '8', '7', 'J', 'J']
没有some data
的例子,很难说出预期的结果。 在任何情况下,对于行print(compare_bitwise(i, i+=1))
- 如果您尝试将每个KR8877J_...
序列与下一个序列进行比较,那么KR8877J_1
与KR8877J_2
, KR8877J_2
与KR8877J_3
等 - 然后最小的变化是:将该序列分配给列表或元组,然后进行相应的索引。
seq = (KR8877J_1,KR8877J_2,KR8877J_3,KR8877J_4,KR8877J_5,KR8877J_6)
for i in range(len(seq) - 1):
print(compare_bitwise(seq[i], seq[i+1]))
但是在 Python 中,使用range()
和len()
进行索引并不好或不高效。 相反,最好将zip()
与seq
和seq[1:]
一起使用:
seq = (KR8877J_1,KR8877J_2,KR8877J_3,KR8877J_4,KR8877J_5,KR8877J_6)
for i, j in zip(seq, seq[1:]):
print(compare_bitwise(i, j))
编辑:我不完全理解您的意思,但您可以使用tag_tup = tuple(tag)
将标签列表转换为元组并从readable()
return
值,而不仅仅是打印它。 或者只是return tag
。 然后通过执行counter = collections.Counter()
将其作为collections.Counter
的键,然后在循环中循环readable(i)
然后在循环内使用counter.update({tag_tup: 1})
以及tag_tup
。
def readable(x):
tag = []
for lst in x:
index = max(enumerate(lst), key=lambda pair: pair[1])[0]
tag.append(CATEGORIES[index])
tag.reverse()
print(tag)
return tag
import collections
counter = collections.Counter()
for i in (KR8877J, KR8877J_1,KR8877J_2,KR8877J_3,KR8877J_4,KR8877J_5,KR8877J_6):
tag_tup = tuple(readable(i))
counter.update({tag_tup: 1})
print(counter)
请参阅collections.Counter
文档(上面链接),然后获取most_common
组合。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.