import codes, i've tried importing precision and all the library i still can't get it work
import nltk
import string
import pandas as pd
import nltk.metrics
import collections
from nltk import precision
from nltk.metrics.scores import (accuracy, precision, recall, f_measure,log_likelihood, approxrand)
from nltk import precision
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import classify
from nltk.corpus import stopwords
from nltk import NaiveBayesClassifier
from random import shuffle
from nltk.metrics.scores import (precision, recall)
the codes below are the codes i use to implement the codes
path = os.path.join('c:' + os.sep, 'Users', 'User', 'Documents', 'Reviews_Labeled.csv')
df = pd.read_csv(path)
positive = []
negative = []
for i in range(0,df.shape[0]):
if df.iloc[i][1] == 'Positive':
positive.append(df.iloc[i][0])
else:
negative.append(df.iloc[i][0])
positive_tokens = []
for i in positive:
positive_tokens.append(word_tokenize(i))
negative_tokens = []
for i in negative:
negative_tokens.append(word_tokenize(i))
def is_clean(word: str):
if word in string.punctuation:
return False
if word.isnumeric():
return False
if word in stopwords.words('english'):
return False
return True
def clean_tokens(tokens: list):
return [word.lower() for word in tokens if is_clean(word)]
positive_tokens_cleaned = [clean_tokens(tokens) for tokens in positive_tokens]
negative_tokens_cleaned = [clean_tokens(tokens) for tokens in negative_tokens]
lemmatizer = WordNetLemmatizer()
def lemmatize(word: str, tag: str):
if tag.startswith('NN'):
pos = 'n'
elif tag.startswith('VB'):
pos = 'v'
else:
pos = 'a'
return lemmatizer.lemmatize(word, pos)
def lemmatize_tokens(tokens: list):
return [lemmatize(word, tag) for word, tag in pos_tag(tokens)]
positive_tokens_normalized = [lemmatize_tokens(tokens) for tokens in positive_tokens_cleaned]
negative_tokens_normalized = [lemmatize_tokens(tokens) for tokens in negative_tokens_cleaned]
dataset = positive_dataset + negative_dataset
shuffle(dataset)
train_ds = dataset[:5126]
test_ds = dataset[5126:]
classifier = NaiveBayesClassifier.train(train_ds)
classify.accuracy(classifier, test_ds)
when i'm searching other people question this is the way people call out the precision value and other metrics value
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(test_ds):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
this is the code i use to try and print the precision value
print(nltk.metrics.scores.precision(refsets['pos'],testsets['pos']))
this is the error i got
print(nltk.metrics.precision(refsets['pos'],testsets['pos']))
this also give the same error
so how do i solve the error message what did i do wrong?
I have the same issue as you, and here's my solution:
print(nltk.precision(refsets['pos'],testsets['pos']))
I noticed that the error was the attribute error, showing that it is trying to access to the ntlk.translate.metrics which certainly do not have the precision function you want. Instead, the precision function that you wanted is located in at the ntlk.metrics.score. Which both of the module serves a very different purpose.
My guess is the compiler is just having hard time finding the ntlk.metrics.score module with the calling you did.
You can refer to the ntlk documentation for more detail implementation of the ntlk.metrics.score https://www.nltk.org/api/nltk.metrics.scores.html
nltk.scores.precision 也应该有效。
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.