如何修复 ValueError: x 和 y 必须是相同的大小？

Question

import numpy as np
import pandas as pd
import sklearn
from sklearn import linear_model
from sklearn.utils import shuffle
import matplotlib.pyplot as pyplot
import pickle
from matplotlib import style

data = pd.read_csv("student-mat.csv", sep=";")


data = data[["G1", "G3", "G3", "studytime", "failures", "absences", "freetime"]]

predict = "G3"

X = np.array(data.drop([predict], 1))
Y = np.array(data[predict])
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, Y, test_size = 0.1)

best = 0
for _ in range(3000):
    x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.1)

    linear = linear_model.LinearRegression()

    linear.fit(x_train, y_train)

    acc = linear.score(x_test, y_test)
    print(acc)

    if acc > best:
        best = acc
        with open("studentmodel.pickle", "wb") as f:
            pickle.dump(linear, f)

pickle_in = open("studentmodel.pickle", "rb")
linear = pickle.load(pickle_in)

print('Co:  \n', linear.coef_)
print('Intercept:  \n', linear.intercept_)

predictions = linear.predict(x_test)
for x in range(len(predictions)):
    print(predictions[x], x_test[x], y_test[x])

p = 'G1'
style.use("ggplot")
pyplot.scatter(data[p],data["G3"])
pyplot.xlabel(p)
pyplot.ylabel("Final Grade")
pyplot.show()

错误： raise ValueError ("X 和 y 必须是相同的大小")任何人都可以向我解释我做错了什么吗？因为我是编程新手并且正在学习教程并且直到最后 5 行的所有内容都工作正常但是当我尝试制作图表时它给了我这个错误“raise ValueError（“X 和 y 必须是相同的大小” )" 如果我写这样的代码，它只允许我制作图表

style.use("ggplot")
pyplot.scatter(data["G3"],data["G3"])
pyplot.xlabel(p)
pyplot.ylabel("Final Grade")
pyplot.show()

这只给我一个图表上的直线

感谢您的任何帮助！

Answer 1

我已经使用这些数据运行了以下代码。

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
import pickle
from matplotlib import style

data = pd.read_csv("student-mat.csv")

# Here, I have changed columns because "G2" was occurring twice.
data = data[["G1", "G2", "G3", "studytime", "failures", "absences", "freetime"]]

predict = "G3"
print(data.head())
X = np.array(data.drop([predict], 1))
print(X)
y = np.array(data[predict])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

best = 0
for _ in range(3000):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

    linear = LinearRegression()

    linear.fit(X_train, y_train)

    acc = linear.score(X_test, y_test)
    print(acc)

    if acc > best:
        best = acc
        with open("studentmodel.pickle", "wb") as f:
            pickle.dump(linear, f)

pickle_in = open("studentmodel.pickle", "rb")
linear = pickle.load(pickle_in)

print('Co:  \n', linear.coef_)
print('Intercept:  \n', linear.intercept_)

predictions = linear.predict(X_test)
for x in range(len(predictions)):
    print(predictions[x], X_test[x], y_test[x])

p = 'G1'
style.use("ggplot")
plt.scatter(data[p], data["G3"])
plt.xlabel(p)
plt.ylabel("Final Grade")
plt.show()

这将产生以下图像。

如何修复 ValueError: x 和 y 必须是相同的大小？

问题描述

1 个解决方案

解决方案1
0 已采纳 2021-07-24 20:26:12

如何修复 ValueError: x 和 y 必须是相同的大小？

问题描述

1 个解决方案

解决方案1 0 已采纳 2021-07-24 20:26:12

解决方案1
0 已采纳 2021-07-24 20:26:12