[英]Python3 add colour to specific outputted words from lists in a sentence
我下面的代码当前正在检查一个文本文件,以查看它是否可以从我的词典文件中找到一个句子中的单词,如果找到了,则搜索此行以查看是否可以从二级列表中找到一个单词。一行中满足条件,然后打印此行。
我想做的是将在次要列表中找到的名为CategoryGA的单词的词典单词颜色设置为例如红色和蓝色,我的目的是在打印输出中轻松识别每个找到的单词都已经到来从。
import re
import collections
from collections import defaultdict
from collections import Counter
import sys
from Categories.GainingAccess import GA
Chatpath = "########/Chat1.txt"
Chatfile = Chatpath
lpath = 'Lexicons/######.txt'
lfile = lpath
CategoryGA = GA
Hits = []
"""
text_file = open(path, "r")
lines = text_file.read().split()
c = Counter(lines)
for i, j in c.most_common(50):
print(i, j)
"""
# class LanguageModelling:
def readfile():
Word_Hit = None
with open(Chatfile) as file_read:
content = file_read.readlines()
for line_num, line in enumerate(content):
if any(word in line for word in CategoryGA):
Word_Hit = False
for word in CategoryGA:
if line.find(word) != -1:
Word_Hit = True
Hits.append(word)
Cleanse = re.sub('<.*?>', '', line)
print('%s appeared on Line %d : %s' % (word, line_num, Cleanse))
file_read.close()
count = Counter(Hits)
count.keys()
for key, value in count.items():
print(key, ':', value)
def readlex():
with open(lfile) as l_read:
l_content = l_read.readlines()
for line in l_content:
r = re.compile(r'^\d+\s+\d+\.\d+%\s*')
l_Cleanse = r.sub('', line)
print(l_Cleanse)
l_read.close()
def LanguageDetect():
with open(Chatfile) as c_read, open(lfile) as l_read:
c_content = c_read.readlines()
lex_content = l_read.readlines()
for line in c_content:
Cleanse = re.sub('<.*?>', '', line)
if any(lex_word in line for lex_word in lex_content) \
and \
any(cat_word in line for cat_word in CategoryGA):
lex_word = '\033[1;31m{}\033[1;m'.format(lex_word)
cat_word = '\033[1;44m{}\033[1;m'.format(cat_word)
print(Cleanse)
# print(cat_word)
c_read.close()
l_read.close()
#readfile()
LanguageDetect()
# readlex()
这是我的完整代码,但是问题出现在“ LanguageDetect”方法中,我目前通过分配lex_word和cat_word变量进行尝试的方式无效,坦率地说,我对下一步的尝试感到困惑。
词汇:
31547 4.7072% i
25109 3.7466% u
20275 3.0253% you
10992 1.6401% me
9490 1.4160% do
7681 1.1461% like
6293 0.9390% want
6225 0.9288% my
5459 0.8145% have
5141 0.7671% your
5103 0.7614% lol
4857 0.7247% can
然后在readlex方法中,我使用:
r = re.compile(r'^\d+\s+\d+\.\d+%\s*')
l_Cleanse = r.sub('', line)
删除单词/字符之前的所有内容,我认为这可能是关于为什么我无法为词典单词加上颜色但不确定如何解决此问题的主要问题。
我认为您的问题来自您处理线路数据的方式,但也许我不清楚您的问题。
这应该够了吧 :
lex_content = ['aaa', 'xxx']
CategoryGA = ['ccc', 'ddd']
line = 'abc aaa bbb ccc'
for lex_word in lex_content:
for cat_word in CategoryGA:
if lex_word in line and cat_word in line:
print(lex_word, cat_word)
line = line.replace(lex_word, '\033[1;31m' + lex_word + '\033[1;m')
line = line.replace(cat_word, '\033[1;44m' + cat_word + '\033[1;m')
print(line)
给出输出:
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.