[英]How do fix ValueError: x and y must be the same size?
import numpy as np
import pandas as pd
import sklearn
from sklearn import linear_model
from sklearn.utils import shuffle
import matplotlib.pyplot as pyplot
import pickle
from matplotlib import style
data = pd.read_csv("student-mat.csv", sep=";")
data = data[["G1", "G3", "G3", "studytime", "failures", "absences", "freetime"]]
predict = "G3"
X = np.array(data.drop([predict], 1))
Y = np.array(data[predict])
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, Y, test_size = 0.1)
best = 0
for _ in range(3000):
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.1)
linear = linear_model.LinearRegression()
linear.fit(x_train, y_train)
acc = linear.score(x_test, y_test)
print(acc)
if acc > best:
best = acc
with open("studentmodel.pickle", "wb") as f:
pickle.dump(linear, f)
pickle_in = open("studentmodel.pickle", "rb")
linear = pickle.load(pickle_in)
print('Co: \n', linear.coef_)
print('Intercept: \n', linear.intercept_)
predictions = linear.predict(x_test)
for x in range(len(predictions)):
print(predictions[x], x_test[x], y_test[x])
p = 'G1'
style.use("ggplot")
pyplot.scatter(data[p],data["G3"])
pyplot.xlabel(p)
pyplot.ylabel("Final Grade")
pyplot.show()
错误: raise ValueError ("X 和 y 必须是相同的大小")任何人都可以向我解释我做错了什么吗? 因为我是编程新手并且正在学习教程并且直到最后 5 行的所有内容都工作正常但是当我尝试制作图表时它给了我这个错误“raise ValueError(“X 和 y 必须是相同的大小” )" 如果我写这样的代码,它只允许我制作图表
style.use("ggplot")
pyplot.scatter(data["G3"],data["G3"])
pyplot.xlabel(p)
pyplot.ylabel("Final Grade")
pyplot.show()
这只给我一个图表上的直线
感谢您的任何帮助!
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
import pickle
from matplotlib import style
data = pd.read_csv("student-mat.csv")
# Here, I have changed columns because "G2" was occurring twice.
data = data[["G1", "G2", "G3", "studytime", "failures", "absences", "freetime"]]
predict = "G3"
print(data.head())
X = np.array(data.drop([predict], 1))
print(X)
y = np.array(data[predict])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
best = 0
for _ in range(3000):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
linear = LinearRegression()
linear.fit(X_train, y_train)
acc = linear.score(X_test, y_test)
print(acc)
if acc > best:
best = acc
with open("studentmodel.pickle", "wb") as f:
pickle.dump(linear, f)
pickle_in = open("studentmodel.pickle", "rb")
linear = pickle.load(pickle_in)
print('Co: \n', linear.coef_)
print('Intercept: \n', linear.intercept_)
predictions = linear.predict(X_test)
for x in range(len(predictions)):
print(predictions[x], X_test[x], y_test[x])
p = 'G1'
style.use("ggplot")
plt.scatter(data[p], data["G3"])
plt.xlabel(p)
plt.ylabel("Final Grade")
plt.show()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.