I need to read a text file, strip the unnecessary punctuation, lowercase the words and use binary search tree function to make a word binary search tree that consists of the words in the file.
We are asked to count the frequency of recurring words and asked for a total word count and total unique word count.
So far I've got the punctuation resolved, file read done, lowercase done, binary search tree basically done and I just need to figure out how to implement the "frequency" counter in the code.
My code is as follows:
class BSearchTree :
class _Node :
def __init__(self, word, left = None, right = None) :
self._word = word
self._count = 0
self._left = left
self._right = right
def __init__(self) :
self._root = None
self._wordc = 0
self._each = 0
def isEmpty(self) :
return self._root == None
def search(self, word) :
probe = self._root
while (probe != None) :
if word == probe._word :
return probe
if word < probe._value :
probe = probe._left
else :
probe = probe._right
return None
def insert(self, word) :
if self.isEmpty() :
self._root = self._Node(word)
self._root._freq += 1 <- is this correct?
return
parent = None #to keep track of parent
#we need above information to adjust
#link of parent of new node later
probe = self._root
while (probe != None) :
if word < probe._word : # go to left tree
parent = probe # before we go to child, save parent
probe = probe._left
elif word > probe._word : # go to right tree
parent = probe # before we go to child, save parent
probe = probe._right
if (word < parent._word) : #new value will be new left child
parent._left = self._Node(word)
else : #new value will be new right child
parent._right = self._Node(word)
cause formatting is killing me, this is the latter part of it.
class NotPresent(Exception) :
pass
def main():
t=BST()
file = open("sample.txt")
line = file.readline()
file.close()
#for word in line:
# t.insert(word)
# Line above crashes program because there are too many
# words to add. Lines on bottom tests BST class
t.insert('all')
t.insert('high')
t.insert('fly')
t.insert('can')
t.insert('boars')
#t.insert('all') <- how do i handle duplicates by making
t.inOrder() #extras add to the nodes frequency?
Thank you for helping/trying to help!
Firstly, it's better to initialize a Node
's _freq
by 1 than doing that in in BST
's insert()
(1 more: In python coding convention, white spaces in writing default argument values are not recommended.)
def __init__(self, word, left=None, right=None) :
self._word = word
self._freq = 1
self._left = left
self._right = right
and just add the last 3 lines:
probe = self._root
while (probe != None) :
if word < probe._word : # go to left tree
parent = probe # before we go to child, save parent
probe = probe._left
elif word > probe._word : # go to right tree
parent = probe # before we go to child, save parent
probe = probe._right
else:
probe._freq += 1
return
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.