Python 中的多重線性回歸?

[英]Multiple Linear Regression in Python?

我正在嘗試對虛擬數據建立多元線性回歸,但我不斷收到溢出錯誤。 假設這是一個虛擬數據。

col1 col2 target 
0.18 0.89 109.85
1.0  0.26 155.72
0.92 0.11 137.66
0.07 0.37 76.17
0.85 0.16 139.75
0.99 0.41 162.6
0.87 0.47 151.77

0.49 0.18
0.57 0.83
0.56 0.64
0.76 0.18

這是我為實現多個特征的線性回歸而編寫的代碼。 任何人都可以讓我知道我的 LINEAR REGRESSION 實現是否正確? 如果它是正確的,那么為什么我不斷收到溢出錯誤。

import numpy as np

def data():
    # prepare data
    x_train = np.array(train_data)[:, :-1]
    y_train = np.array(train_data)[:, -1]
    x_test = np.array(test_data)
    return x_train, y_train, x_test

def normalize(y):
    return (y - y.min()) / (y.max() - y.min()) 

def linear_regression(x_train, y_train, epochs=300):
    y_train = normalize(y_train)
    rows, columns = x_train.shape
    weights = np.zeros((columns))
    intercept = 0
    for x in range(epochs):
        for i in range(len(x_train)):
            prev_weights = weights
            weights += intercept + prev_weights * x_train[i] - y_train[i]
            intercept += (intercept+(prev_weights*x_train[i])-y_train[i]).dot(x_train[i])
    return weights, intercept

def predict(x_test, weights, intercept):
    y_pred = []
    for i in range(len(x_test)):
        y_pred.append(weights.dot(x_test[i]) + intercept)
    return y_pred

def main():
    x_train, y_train, x_test = data()
    weights, intercept = linear_regression(x_train, y_train, epochs=300)
    y_pred = predict(x_test, weights, intercept)
    for i in y_pred:

if __name__=='__main__':



/srv/conda/lib/python3.6/site-packages/ipykernel_launcher.py:25: RuntimeWarning: overflow encountered in add

Here is a different approach, a Python 3D surface fitter using your data with 3D scatter plot, 3D surface plot, and contour plot. 您應該能夠在 3 空間中單擊並旋轉 3D 圖以進行目視檢查。 這里擬合的曲面是一個平面,不需要進行測試和訓練拆分,因為直接給出了 RMSE 和 R 平方,您可以看到曲面。 只需重新擬合所有數據。




import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import  Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt

graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels

# 3D contour plot lines
numberOfContourLines = 16

# x, y, z = col1, col2, target
xData = numpy.array([0.18, 1.0, 0.92, 0.07, 0.85, 0.99, 0.87])
yData = numpy.array([0.89, 0.26, 0.11, 0.37, 0.16, 0.41, 0.47])
zData = numpy.array([109.85, 155.72, 137.66, 76.17, 139.75, 162.6, 151.77])

def func(data, a, b, c):
    x = data[0]
    y = data[1]
    return (a * x) + (y * b) + c

def SurfacePlot(func, data, fittedParameters):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    axes = Axes3D(f)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = func(numpy.array([X, Y]), *fittedParameters)

    axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)

    axes.scatter(x_data, y_data, z_data) # show data along with plotted surface

    axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label
    axes.set_zlabel('Z Data') # Z axis data label

    plt.close('all') # clean up after using pyplot or else there can be memory and process problems

def ContourPlot(func, data, fittedParameters):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = func(numpy.array([X, Y]), *fittedParameters)

    axes.plot(x_data, y_data, 'o')

    axes.set_title('Contour Plot') # add a title for contour plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
    matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours

    plt.close('all') # clean up after using pyplot or else there can be memory and process problems

def ScatterPlot(data):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    axes = Axes3D(f)
    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    axes.scatter(x_data, y_data, z_data)

    axes.set_title('Scatter Plot (click-drag with mouse)')
    axes.set_xlabel('X Data')
    axes.set_ylabel('Y Data')
    axes.set_zlabel('Z Data')

    plt.close('all') # clean up after using pyplot or else there can be memory and process problems

if __name__ == "__main__":

    data = [xData, yData, zData]

    initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example

    # here a non-linear surface fit is made with scipy's curve_fit()
    fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)

    SurfacePlot(func, data, fittedParameters)
    ContourPlot(func, data, fittedParameters)

    print('fitted prameters', fittedParameters)

    modelPredictions = func(data, *fittedParameters) 

    absError = modelPredictions - zData

    SE = numpy.square(absError) # squared errors
    MSE = numpy.mean(SE) # mean squared errors
    RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
    Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)


