[英]Tensorflow Linear Regression predictions returning [nan]
我正在嘗試使用Tensor Flow創建我的第一個線性回歸器(沒有估計器的幫助),並且在每次迭代中,我只看到了NaN
的cost
值。 我認為我做的不正確,但是無法對此問題歸零。 有人可以幫我解決問題嗎?
我正在使用CA住房數據集
# Common imports
import math
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn import metrics
california_housing_dataframe = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv", sep=",")
我預測中median_house_value
列
data_X = california_housing_dataframe.iloc[:, :8]
data_y = california_housing_dataframe.iloc[:, 8]
print('Features (X):\n', data_X.head(), '\n')
print('Target (y):\n', data_y.head(), '\n')
創建培訓和驗證集
from sklearn.model_selection import train_test_split
data_X_train, data_X_validate = train_test_split(data_X, test_size=0.2, random_state=42)
data_y_train, data_y_validate = train_test_split(data_y, test_size=0.2, random_state=42)
設置超空間參數和TensorFlow變量
# Hyperspace Params
learning_rate = 0.01
training_epochs = 1 #40
batch_size = 500 #50
totalBatches = len(data_X_train)/batch_size
n, m = data_X_train.shape # 17,000 Rows + 9 Features
print('n=', n, ', m=', m)
W = tf.Variable(tf.random_uniform([m, 1], -1.0, 1.0, dtype = tf.float64), name="theta") # Random initialization
b = tf.Variable(np.random.randn(), name = "b", dtype = tf.float64)
X = tf.placeholder(tf.float64, shape=(None, m), name="X")
y = tf.placeholder(tf.float64, shape=(None, 1), name="y")
print('X.shape :\n', X.shape, '\n')
print('y.shape :\n', y.shape, '\n')
print('b.shape :\n', b.shape, '\n')
print('Thetha.shape (W):\n', W.shape, '\n')
y_pred = tf.add(tf.matmul(X, W), b, name="predictions")
error = y_pred - y
cost = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Global Variables Initializer
init = tf.global_variables_initializer()
現在,訓練模型只返回NaN
值
def get_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X.iloc[batch_idx, :], y[batch_idx]
yield X_batch, y_batch
# Global Variables Initializer
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
for X_batch, y_batch in get_batch(data_X_train, data_y_train, batch_size):
y_batch = np.array(y_batch).reshape(-1, 1)
sess.run(optimizer, feed_dict={X: X_batch, y: y_batch})
curr_y_pred, curr_error, curr_cost = sess.run([y_pred, error, cost], {X: X_batch, y: y_batch})
print('Training... batch.shape: ', X_batch.shape,'curr_error:', curr_error)
結果看起來像
Training... batch.shape: (504, 8) curr_error: [[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
...
您的問題來自pd.read_csv(...)
函數。 我將其替換為NumPy
版本(我不熟悉Pandas
),它的工作原理很吸引人。 這是整個代碼段:
import math
import numpy as np
import tensorflow as tf
from sklearn import metrics
california_housing_dataframe = np.genfromtxt('https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv', delimiter=',', skip_header=1)
data_X = california_housing_dataframe[:, :8]
data_y = california_housing_dataframe[:, 8]
from sklearn.model_selection import train_test_split
data_X_train, data_X_validate = train_test_split(data_X, test_size=0.2, random_state=42)
data_y_train, data_y_validate = train_test_split(data_y, test_size=0.2, random_state=42)
# Hyperspace Params
learning_rate = 0.01
training_epochs = 1 #40
batch_size = 500 #50
totalBatches = len(data_X_train)/batch_size
n, m = data_X_train.shape # 17,000 Rows + 9 Features
print('n=', n, ', m=', m)
W = tf.Variable(tf.random_uniform([m, 1], -1.0, 1.0, dtype = tf.float64), name="theta") # Random initialization
b = tf.Variable(np.random.randn(), name = "b", dtype = tf.float64)
X = tf.placeholder(tf.float64, shape=(None, m), name="X")
y = tf.placeholder(tf.float64, shape=(None, 1), name="y")
print('X.shape :\n', X.shape, '\n')
print('y.shape :\n', y.shape, '\n')
print('b.shape :\n', b.shape, '\n')
print('Thetha.shape (W):\n', W.shape, '\n')
y_pred = tf.add(tf.matmul(X, W), b, name="predictions")
error = y_pred - y
cost = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Global Variables Initializer
init = tf.global_variables_initializer()
def get_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx, :], y[batch_idx]
yield X_batch, y_batch
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
for X_batch, y_batch in get_batch(data_X_train, data_y_train, batch_size):
y_batch = np.array(y_batch).reshape(-1, 1)
sess.run(optimizer, feed_dict={X: X_batch, y: y_batch})
curr_y_pred, curr_error, curr_cost = sess.run([y_pred, error, cost], {X: X_batch, y: y_batch})
print('Training... batch.shape: ', X_batch.shape,'curr_error:', curr_error)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.