简体   繁体   English

Python 中的多重线性回归?

[英]Multiple Linear Regression in Python?

I am trying to build a multiple linear regression on a dummy data and I keep getting overflow error.我正在尝试对虚拟数据建立多元线性回归,但我不断收到溢出错误。 Assume this as a dummy data.假设这是一个虚拟数据。

print(x_train)
col1 col2 target 
0.18 0.89 109.85
1.0  0.26 155.72
0.92 0.11 137.66
0.07 0.37 76.17
0.85 0.16 139.75
0.99 0.41 162.6
0.87 0.47 151.77

print(x_test)
0.49 0.18
0.57 0.83
0.56 0.64
0.76 0.18

This is the code I wrote for implementation of linear regression for multiple features.这是我为实现多个特征的线性回归而编写的代码。 Can anyone let me know if my implementation of LINEAR REGRESSION is correct?任何人都可以让我知道我的 LINEAR REGRESSION 实现是否正确? If it's correct then why am I keep getting overflow error.如果它是正确的,那么为什么我不断收到溢出错误。

import numpy as np

def data():
    # prepare data
    x_train = np.array(train_data)[:, :-1]
    y_train = np.array(train_data)[:, -1]
    x_test = np.array(test_data)
    return x_train, y_train, x_test

def normalize(y):
    return (y - y.min()) / (y.max() - y.min()) 

def linear_regression(x_train, y_train, epochs=300):
    y_train = normalize(y_train)
    rows, columns = x_train.shape
    weights = np.zeros((columns))
    intercept = 0
    for x in range(epochs):
        for i in range(len(x_train)):
            prev_weights = weights
            weights += intercept + prev_weights * x_train[i] - y_train[i]
            intercept += (intercept+(prev_weights*x_train[i])-y_train[i]).dot(x_train[i])
    return weights, intercept

def predict(x_test, weights, intercept):
    y_pred = []
    for i in range(len(x_test)):
        y_pred.append(weights.dot(x_test[i]) + intercept)
    return y_pred

def main():
    x_train, y_train, x_test = data()
    weights, intercept = linear_regression(x_train, y_train, epochs=300)
    y_pred = predict(x_test, weights, intercept)
    for i in y_pred:
        print(str(i))

if __name__=='__main__':
    main()

Results:结果:

-inf
-inf
-inf
-inf

/srv/conda/lib/python3.6/site-packages/ipykernel_launcher.py:25: RuntimeWarning: overflow encountered in add

Here is a different approach, a Python 3D surface fitter using your data with 3D scatter plot, 3D surface plot, and contour plot. Here is a different approach, a Python 3D surface fitter using your data with 3D scatter plot, 3D surface plot, and contour plot. You should be able to click-drag and rotate the 3D plots in 3-space for visual inspection.您应该能够在 3 空间中单击并旋转 3D 图以进行目视检查。 Here the fitted surface is a flat plane, and there is no need for test and train split as the RMSE and R-squared are given directly and you can see the surface.这里拟合的曲面是一个平面,不需要进行测试和训练拆分,因为直接给出了 RMSE 和 R 平方,您可以看到曲面。 Just re-fit with all data.只需重新拟合所有数据。

分散

表面

轮廓

import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import  Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt

graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels

# 3D contour plot lines
numberOfContourLines = 16

# x, y, z = col1, col2, target
xData = numpy.array([0.18, 1.0, 0.92, 0.07, 0.85, 0.99, 0.87])
yData = numpy.array([0.89, 0.26, 0.11, 0.37, 0.16, 0.41, 0.47])
zData = numpy.array([109.85, 155.72, 137.66, 76.17, 139.75, 162.6, 151.77])


def func(data, a, b, c):
    x = data[0]
    y = data[1]
    return (a * x) + (y * b) + c


def SurfacePlot(func, data, fittedParameters):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    matplotlib.pyplot.grid(True)
    axes = Axes3D(f)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = func(numpy.array([X, Y]), *fittedParameters)

    axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)

    axes.scatter(x_data, y_data, z_data) # show data along with plotted surface

    axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label
    axes.set_zlabel('Z Data') # Z axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot or else there can be memory and process problems


def ContourPlot(func, data, fittedParameters):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = func(numpy.array([X, Y]), *fittedParameters)

    axes.plot(x_data, y_data, 'o')

    axes.set_title('Contour Plot') # add a title for contour plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
    matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours

    plt.show()
    plt.close('all') # clean up after using pyplot or else there can be memory and process problems


def ScatterPlot(data):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    matplotlib.pyplot.grid(True)
    axes = Axes3D(f)
    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    axes.scatter(x_data, y_data, z_data)

    axes.set_title('Scatter Plot (click-drag with mouse)')
    axes.set_xlabel('X Data')
    axes.set_ylabel('Y Data')
    axes.set_zlabel('Z Data')

    plt.show()
    plt.close('all') # clean up after using pyplot or else there can be memory and process problems



if __name__ == "__main__":

    data = [xData, yData, zData]

    initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example

    # here a non-linear surface fit is made with scipy's curve_fit()
    fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)

    ScatterPlot(data)
    SurfacePlot(func, data, fittedParameters)
    ContourPlot(func, data, fittedParameters)

    print('fitted prameters', fittedParameters)

    modelPredictions = func(data, *fittedParameters) 

    absError = modelPredictions - zData

    SE = numpy.square(absError) # squared errors
    MSE = numpy.mean(SE) # mean squared errors
    RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
    Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM