can you help me with this error?
def get_db():
mydb = mysql.connector.connect(
host="localhost",
user="root",
password="",
database="cosmos"
)
mycursor = mydb.cursor()
sql_select="SELECT article FROM crawling_sm"
mycursor.execute(sql_select)
data=mycursor.fetchall()
for z in range(len(data)):
text_sents=sent_tokenize(data[z])
def process_text(text_article):
text_sents=text_article
text_sents_clean = [remove_string_special_characters(s) for s in text_sents] #if s.istitle() == False]
doc_info = get_doc(text_sents_clean)
#freqDict_list = create_freq_dict(text_sents_clean)
#TF_scores = computeTF(doc_info, freqDict_list)
print(text_sents)
get_db()
the error message is the article that I selected from a database cannot be split into some text, I trying to use sent_tokenize but there's some error message expected a string or bytes-like object
the error message:
File "C:\\Users\\HP Laptop\\Anaconda3\\lib\\site-packages\\nltk\\tokenize\\punkt.py", line 1295, in _slices_from_text for match in self._lang_vars.period_context_re().finditer(text):
TypeError: expected string or bytes-like object
The problem is
data = mycursor.fetchall()
returns a list of tuples , even if the query is returning a single column.
So rather than returning something like
['a', 'b', 'c', 'd', 'e', 'f']
it returns
[('a',), ('b',), ('c',), ('d',), ('e',), ('f',)]
The solution is to pass the first element of each tuple to the sent_tokenize
function.
for row in data:
text_sents = sent_tokenize(row[0])
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.