简体   繁体   中英

Keep getting this error, local variable not referenced before assignment (python)

I get there are many solutions on here, but I have literally gone through them all and still can't fix my problem. Im trying to print the rSquared for each graph but I am getting the error 'the local variable "m" referenced before assignment. Please help! I know the spacing is off on here. I have all of that right in my runner. Thanks!

def readData(fileName):


    hsGPA = []   #High School GPA
    mathSAT = []  #Math SAT scores
    crSAT = []  #Verbal SAT scores
    collegeGPA = []  #College GPA
    compGPA=[]  #ComputerScience GPA
    FullList=[] 
    inputFile = open(fileName, 'r', encoding = 'utf-8')

    for line in inputFile:
        FullList=line.split(',')
        hsGPA.append(float(FullList[0]))
        mathSAT.append(int(FullList[1]))
        crSAT.append(int(FullList[2]))
        collegeGPA.append(float(FullList[3]))
        compGPA.append(float(FullList[4]))
    return hsGPA, mathSAT, crSAT, collegeGPA, compGPA



def plotData(hsGPA, mathSAT, crSAT, collegeGPA,compGPA):

    GPA1 = []   #High School GPA
    Score1 = []  #Math SAT scores
    Score2= []  #Verbal SAT scores
    GPA2 = []  #College GPA
    GPA3=[]    #ComputerScience GPA 

    hsGPA, mathGPA, crSAT, collegeGPA,compGPA = readData('satFINAL.txt')
    pyplot.figure(1)

    pyplot.subplot(5,1,1)
    for line in range(len(hsGPA)):
        GPA1.append(line)
    pyplot.plot(GPA1,hsGPA)

    pyplot.subplot(5,1,2)
    for line in range(len(mathSAT)):
        Score1.append(line)
    pyplot.plot(Score1,mathSAT)

    pyplot.subplot(5,1,3)
    for line in range(len(crSAT)):
        Score2.append(line)       
    pyplot.plot(Score2,crSAT)

    pyplot.subplot(5,1,4)
    for line in range(len(collegeGPA)):
        GPA2.append(line)
    pyplot.plot(GPA2,collegeGPA)

    pyplot.subplot(5,1,5)
    for line in range(len(compGPA)):
        GPA3.append(line)
    pyplot.plot(GPA3,compGPA)

    pyplot.show()

def LinearRegression(xList, yList):
    '''
This function finds the constants in the y = mx+b, or linear regression
forumula

xList - a list of the x values
yList - a list of the y values
m - the slope f the line
b - where the line intercepts the y axis
'''

    n = len(xList)
    sumX = 0
    sumXX = 0
    sumXY = 0
    sumY = 0

    for index in range(n):
        sumX += xList[index]
        sumXY += xList[index] * yList[index]
        sumXX += xList[index]**2
        sumY += yList[index]
        #the components needed to find m and b

    m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
    b = (sumY - (m*sumX))/n
    #actually implements formula

    return m, b


def plotRegression(x,y, xLabel, yLabel):


    pyplot.scatter(x,y)
    m,b = LinearRegression(x,y)
    minX = min(x)
    maxX = max(x)
    pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
    pyplot.xlabel(xLabel)
    pyplot.ylabel(yLabel)
    pyplot.show()




def rSquared(x,y):

    n = len(x)
    R=0
    sumS=0
    sumT=0
    sumY=0

    for index in range(n):
        a=(y[index]-((m*x[index])+b))**2
        sumS = sumS+a


    for index in range(len(y)):
        sumY = sumY= y[index]
        MeanY= sumY/(len(y))
        e=(y[index]-MeanY)**2
        sumT = sumT+e




    m,b= LinearRegression(x, y)

    RG=1-(sumS/sumT)



def main():
    data = readData('satFINAL.txt')
    print(data)
    plotData(*data)
    hsGPA, mathSAT, crSAT, collegeGPA,compGPA = data
    # added ScoreT calculation here
    ScoreT = [sum(x) for x in zip(mathSAT, crSAT)]
    plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
    plotRegression(mathSAT,collegeGPA, 'mathSAT' , 'collegeGPA')
    plotRegression(crSAT,collegeGPA, 'crSAT' , 'collegeGPA')
    plotRegression(ScoreT,collegeGPA, 'Math and CR SAT' , 'collegeGPA')
    plotRegression(mathSAT,crSAT, 'mathSAT', 'CR SAT')
    plotRegression(mathSAT,compGPA, 'mathSAT', 'CompGPA')
    plotRegression(hsGPA,compGPA, 'HsGPA', 'CompGPA')
    plotRegression(ScoreT,compGPA, 'SATscore ', 'CompGPA')
    print(rSquared(hsGPA,collegeGPA))





main()

It's very hard to tell - your indentation is messed up, and you've got an awful lot of code, and you haven't actually given the error trace (which would actually identify the line the error is on!) - but it looks like, in the definition of rSquared , you call a=(y[index]-((m*x[index])+b))**2 before assigning a value to m .


Edit : I went through and refactored a lot of your repeated code into loops; it is hopefully more readable now. I also cross-checked the linear_regression function against scipy.stats.linregress and got identical results; I have not verified r_squared , so you should check that.

import matplotlib.pyplot as plt

# column indices
HS, MATH, VERBAL, COLLEGE, COMPSCI = range(5)
# column labels
LABELS = ["High school GPA", "Math SAT", "Verbal SAT", "College GPA", "CompSci GPA"]
# column data types
DTYPES = [ float,             int,        int,          float,         float       ]

def read_columns(fname, encoding="utf-8", separator=",", dtypes=None):
    """
    Return columns of data from a file

    If dtypes is specified, convert each column to the given data type
    """
    # read rows from data file
    with open(fname, encoding=encoding) as inf:
        rows = [line.split(separator) for line in inf]
    # transpose to columns
    cols = zip(*rows)
    # apply data types
    if dtypes is not None:
        cols = [[dtype(cell) for cell in col] for dtype,col in zip(dtypes,cols)]
    return cols

def linear_regression(xs, ys):
    """
    Return the linear regression constants m,b
      in the least-squares best fit to y = m*x+b
    """
    # if you have SciPy you can use scipy.stats.linregress instead
    n = len(xs)
    xsum  = sum(xs)
    ysum  = sum(ys)
    xxsum = sum(x*x for x in xs)
    xysum = sum(x*y for x,y in zip(xs, ys))
    m = (n * xysum - xsum * ysum) / (n * xxsum - xsum * xsum)
    b = (ysum - m * xsum) / n
    return m, b    

def r_squared(xs, ys):
    m, b = linear_regression(xs, ys)
    ysum, n = sum(ys), len(ys)
    ymean = ysum / n
    ssum = sum((y - (m * x + b))**2 for x,y in zip(xs, ys))
    tsum = sum((y - ymean)**2 for y in ys)
    return 1 - ssum / tsum

def plot_regression(xs, xlabel, ys, ylabel):
    m, b = linear_regression(xs, ys)
    min_, max_ = min(xs), max(xs)
    plt.scatter(xs, ys)
    plt.plot([min_, max_], [m * min_ + b, m * max_ + b], "r")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

def main():
    # read data
    scores = read_columns("satFINAL.txt", dtypes=DTYPES)
    # add composite math-and-verbal score
    MATH_VERBAL = 5
    LABELS.append("Math+Verbal SAT")
    DTYPES.append(int)
    scores.append([math+verbal for math,verbal in zip(scores[MATH], scores[VERBAL])])

    # do raw score plots
    plt.figure(1)
    num_figs = len(LABELS)
    # draw subplots
    for fig, column, nums in zip(range(num_figs), LABELS, scores):
        plt.subplot(num_figs, 1, fig+1)
        plt.plot(range(len(nums)), nums)
        plt.xlabel(LABELS[fig])
    # display results
    plt.show()

    # do regression plots
    regressions = [
        (HS,          COLLEGE),
        (MATH,        COLLEGE),
        (VERBAL,      COLLEGE),
        (MATH_VERBAL, COLLEGE),
        (MATH,        VERBAL),
        (MATH,        COMPSCI),
        (HS,          COMPSCI),
        (MATH_VERBAL, COMPSCI)
    ]
    for x,y in regressions:
        print("r**2 for {} and {}: {}".format(LABELS[x], LABELS[y], r_squared(scores[x], scores[y])))
        plot_regression(scores[x], LABELS[x], scores[y], LABELS[y])

if __name__=="__main__":
    main()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM