简体   繁体   中英

Python coding for opening and saving data to a file

I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.

Here is my code segment I am working with:

with open("PosList") as OpenPos:
    lines = OpenPos.readlines()
    print lines
    if movieWordCount[z] in lines:
        print "found"

    #Now use tokenize to split it apart by space and set to new array for me to call column2
    else:
        print "not found"
        lines.append(movieWordCount[z] + " 1" + "\n")

Here is my full code:

#!/usr/bin/python

#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re

class Bayes_Classifier:

 def __init__(self, trainDirectory = "movie_reviews/"):

    #If file listing exists skip to train
    if os.path.isfile('iFileList'):
        print "file found"
        self.train()
        #self.classify()

    #If file listing does not exist skip to train
    if not os.path.isfile('iFileList'):
        print "no file"
        newfile = 'iFileList'
        tempList = set()
        subDir = './movie_reviews'
        for filenames in os.listdir(subDir):
            my_sub_path = os.path.join(os.sep,subDir,filenames)
            tempList.add(filenames)
            self.save("filenames", "try3")
        f = []
        for fFileObj in os.walk("movie_reviews/"):
            f.extend(fFileObj)
            break
        pickle.dump(f, open( "save.p", "wb" ))
        self.save(f, "try4")

        with open(newfile, 'wb') as fi:
            pickle.dump(tempList, fi)
            #print tempList

        self.train()
        #self.classify()

 def train(self):    
    '''Trains the Naive Bayes Sentiment Classifier.'''
    print "File ready for training"
    #Open iFileList to use as input for opening movie files
    x = 0
    OpenIFileList = open('iFileList','r')
    print "iFileList now Open"
    #Loop through the file
    for line in OpenIFileList:
        #print "Ready to read lines"
        #print "reading line " + line
        if x > 4:
            if x % 2 == 0:
                #print line
                s = line
                if '-' in s:
                    comp = s.split("'")
                    #print comp[2]
                    print comp[1] #This is What you need for t he movie file
                    compValue1 = comp[1]
                    #Determine Positive/Negative.
                    #compType is the variable I am storing it to.
                    compType = compValue1.split("-",2)[1]
                    #print compType  #Prints that middle value like 5 or 1
                    #  This will do the work based on the value.
                    if compType == '5':
                    #    print "you have a five"  #Confirms the loop I am in.
                        #If file does not exists create it
                        if not os.path.exists('PosList'):
                            print "no file"
                            file('PosList', 'w').close()
                        #Open file that needs to be reviewed for word count
                        compValue2 = "movie_reviews/" + compValue1
                        print compValue2  #Prints the directory and file path
                        OpenMovieList = open(compValue2,'r')
                        for commentLine in OpenMovieList:
                            commentPositive = commentLine.split(" ")
                            commentPositiveCounter = Counter(commentPositive)
                            #print commentPositiveCounter # " Comment Pos goes here"
                            #if commentLine != '' or commentLine != ' ':
                            #Get first word, second word, ....
                            if commentLine and (not commentLine.isspace()):
                                movieWordCount = self.tokenize(commentLine)
                                y = len(movieWordCount) #determines length of string
                                print y
                                z = 0
                                #print movieWordCount[0]  # Shows the zero position in the file.
                                while z < y:
                                    print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id

                                    with open("PosList") as OpenPos:
                                        lines = OpenPos.readlines()
                                        print lines
                                        if movieWordCount[z] in lines:
                                            print "found"
                                        else:
                                            print "not found"
                                            lines.append(movieWordCount)


                                    z = z + 1

                        #Close the files
                        OpenMovieList.close()
                        OpenPos.close()


        x += 1
        #for line2 in OpenIFileList.readlines():
        #for line in open('myfile','r').readlines():
            #do_something(line)

    #Save results
    #Close the File List
    OpenIFileList.close()



 def loadFile(self, sFilename):
    '''Given a file name, return the contents of the file as a string.'''

    f = open(sFilename, "r")
    sTxt = f.read()
    f.close()
    return sTxt

 def save(self, dObj, sFilename):
    '''Given an object and a file name, write the object to the file using pickle.'''

    f = open(sFilename, "w")
    p = pickle.Pickler(f)
    p.dump(dObj)
    f.close()

 def load(self, sFilename):
    '''Given a file name, load and return the object stored in the file.'''

    f = open(sFilename, "r")
    u = pickle.Unpickler(f)
    dObj = u.load()
    f.close()
    return dObj

 def tokenize(self, sText): 
    '''Given a string of text sText, returns a list of the individual tokens that 
    occur in that string (in order).'''

    lTokens = []
    sToken = ""
    for c in sText:
        if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
            sToken += c
        else:
            if sToken != "":
                lTokens.append(sToken)
                sToken = ""
            if c.strip() != "":
                lTokens.append(str(c.strip()))

    if sToken != "":
        lTokens.append(sToken)

    return lTokens

To open a file for writing, you can use

with open('PosList', 'w') as Open_Pos

As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.

So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close() , and append 2 lines to your code:

with open("PosList") as OpenPos:
    lines = OpenPos.readlines()
    print lines
    if movieWordCount[z] in lines:
        print "found"
    else:
        print "not found"
        lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
    OpenPos.write(lines)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM