I'm working on an n-gram language model. My specific question is fairly straight forward, as is my method compute_bigram
is not successfully calling another method, compute_unigram
. I want the former to call the latter.
If I instantiate an object of type, Model, comment out the line that calls compute_unigram, then call compute_unigram, followed by compute_bigram, no errors are raised.
However, if I only call compute_bigram, which should be calling compute_unigram, it raises a None type error
(meaning the method was not called.)
class DataLoader():
def __init__(self,data,train_ratio):
import operator
self.all_sents = [sent for sent in data.sents()]
num_ex = len(self.all_sents)
num_train = int(train_ratio * num_ex)
# processing vocab, adding special tokens to vocabulary
self.full_vocab = [word.lower() for word in set(data.words())]
self.vocab_count = {word:0 for word in self.full_vocab}
for word in data.words():
self.vocab_count[word.lower()] +=1
self.sorted_count = sorted(self.vocab_count.items(),key=operator.itemgetter(1),reverse=True)
self.refined_vocab = [word for (word,count) in self.sorted_count if count > 50]
self.refined_vocab.append('<ukn>')
self.refined_vocab.append('<s>')
self.refined_vocab.append('</s>')
# process sentences, replace unknown characters with <UKN>, add <S> and </S> to start/stop of sentences as appropriate
def process_sent(sent):
sent = [word.lower() for word in sent]
sent.insert(0,'<s>')
sent.append('</s>')
new_sent = [word if word in self.refined_vocab else '<ukn>' for word in sent]
return new_sent
self.sents = [process_sent(sent) for sent in self.all_sents]
# create training and test sets as necessary
self.train = [word.lower() for sent in self.sents[:num_train] for word in sent]
self.test = [word.lower() for sent in self.sents[num_train:] for word in sent]
return None
class Model(DataLoader):
def __init__(self,data,train_ratio,n,lambda_):
self.n = n
self.lambda_ = lambda_
super().__init__(data,train_ratio)
self.root = {word:0 for word in self.refined_vocab}
def compute_unigram(self):
self.unigrams = {word:0 for word in self.refined_vocab}
for i in self.train:
self.unigrams[i] +=1
word_count = 0
for w1,count in self.unigrams.items():
word_count += count
for w1, count in self.unigrams.items():
self.unigrams[w1] = count/word_count
return None
def compute_bigram(self):
import copy
self.unigrams = self.compute_unigram()
self.bigrams = {word:copy.deepcopy(self.root) for word in self.refined_vocab}
for idx, i in enumerate(self.train):
try:
pair = (i, self.train[idx+1])
self.bigrams[pair[0]][pair[1]] +=1
except:
break
for w1,w2_dict in self.bigrams.items():
word_count = 0
for w2, count in w2_dict.items():
word_count += count
for w2, count in w2_dict.items():
total_mass = self.lambda_ * len(self.bigrams[w1].values())
self.bigrams[w1][w2] = count/word_count + (total_mass * self.unigrams[w2])
return None
Can anyone explain what I should be doing to enable one method to successfully call the other?
self.unigrams = self.compute_unigram()
cleans self.unigrams
, as compute_unigram()
ends with return None
.
Remove the self.unigrams =
part in that call.
If I instantiate an object of type, Model, comment out the line that calls compute_unigram, then call compute_unigram, followed by compute_bigram, no errors are raised.
Yes, when you commented that line and called the method manually, the self.unigrams = None
step did not happen.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.