[英]Nan Loss when training Deep neural Recommender model using tensorflow
[英]Loss is always nan when training a deep learning model from tabular data
我正在嘗試從大約數千個具有 51 個數字特征和標記列的條目的數據集中訓練 model,例如:
在訓練 model 以預測 3 個標簽(候選、誤報、確認)時,損失始終為 nan,並且准確度在特定值上穩定得非常快。 編碼:
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, RobustScaler
from sklearn.preprocessing import OrdinalEncoder
from tensorflow import optimizers
from tensorflow.python.keras.layers import Dense, Dropout, Normalization
from tensorflow.python.keras.models import Sequential, Model
def load_dataset(data_folder_csv):
# load the dataset as a pandas DataFrame
data = pd.read_csv(data_folder_csv, header=0)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:, -1]
print(y)
# format all fields as floats
X = X.astype(np.float)
# reshape the output variable to be one column (e.g. a 2D shape)
y = y.reshape((len(y), 1))
return X, y
# prepare input data using min/max scaler.
def prepare_inputs(X_train, X_test):
oe = RobustScaler().fit_transform(X_train)
X_train_enc = oe.transform(X_train)
X_test_enc = oe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
le.fit(y_train)
le.fit(y_test)
y_train_enc = ohe.fit_transform(y_train).toarray()
y_test_enc = ohe.fit_transform(y_test).toarray()
return y_train_enc, y_test_enc
X, y = load_dataset("csv_ready.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
X_train_enc, X_test_enc = X_train, X_test
print('Finished preparing inputs.'
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
norm_layer = Normalization()
norm_layer.adapt(X)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="tanh", kernel_initializer='he_normal'))
model.add(Dropout(0.2))
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(3, activation='sigmoid'))
opt = optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()
model.fit(X_train, y_train_enc, epochs=20, batch_size=128, verbose=1, use_multiprocessing=True)
_, accuracy = model.evaluate(X_test, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy * 100))
我嘗試增加/減少學習率,更改優化器,降低和增加神經元和層的數量,以及使用批量大小,但似乎沒有什么能讓 model 獲得好的結果。 我想我在這里遺漏了一些東西,但不能指望它。 結果示例:
EDIT2:也嘗試了l2正則化並且沒有做任何事情。
原因之一:檢查您的數據集是否具有NaN
值。 NaN
值可能會在學習時導致 model 出現問題。
您的代碼中的一些主要錯誤:
sigmoid
激活 function 而不是softmax
用於具有 3 個神經元的 output 層fit_transform
並且只對測試集使用transform
X_train
和X_test
使用prepare_inputs
functionX_train_enc
而不是X_train
改用這個
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.preprocessing import OrdinalEncoder
from tensorflow import optimizers
from tensorflow.python.keras.layers import Dense, Dropout, Normalization
from tensorflow.python.keras.models import Sequential, Model
def load_dataset(data_folder_csv):
# load the dataset as a pandas DataFrame
data = pd.read_csv(data_folder_csv, header=0)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:, -1]
print(y)
# format all fields as floats
X = X.astype(np.float)
# reshape the output variable to be one column (e.g. a 2D shape)
y = y.reshape((len(y), 1))
return X, y
# prepare input data using min/max scaler.
def prepare_inputs(X_train, X_test):
oe = MinMaxScaler()
X_train_enc = oe.fit_transform(X_train)
X_test_enc = oe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
y_train_enc = ohe.fit_transform(y_train).toarray()
y_test_enc = ohe.transform(y_test).toarray()
return y_train_enc, y_test_enc
X, y = load_dataset("csv_ready.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
#prepare_input function missing here
X_train_enc, X_test_enc = prepare_inputs(X_train, X_test)
print('Finished preparing inputs.')
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))
#opt = optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(X_train_enc, y_train_enc, epochs=20, batch_size=32, verbose=1, use_multiprocessing=True)
_, accuracy = model.evaluate(X_test_enc, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy * 100))
您想將 model 定義更改為:
model = Sequential()
model.add(Dense(128, input_shape=X_train.shape[1:], activation="tanh", kernel_initializer='he_normal'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))
您只需定義第一層的輸入形狀,Keras 將自動確定后續層的正確形狀。 在定義第一個維度 input_shape 時,您忽略了批量大小,因此input_shape=X_train.shape[1:]
。
sigmoid
激活實際上會起作用(因為 output 將在 0 和 1 之間變化),但您真正想要的是softmax
激活(確保所有輸出總和為 1,這是概率所決定的 - 發生某事的概率是 100%,而不是sigmoid
最終可能給你的 120%)。
此外,您沒有在任何地方使用您的LabelEncoder
。 我想你的意思是這樣的:
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
# teach the label encoder our labels
le.fit(y_train)
# turn our strings into integers
y_train_transformed = le.transform(y_train)
y_test_transformed = le.transform(y_test)
# turn our integers into one-hot-encoded arrays
y_train_enc = ohe.fit_transform(y_train_transformed).toarray()
y_test_enc = ohe.transform(y_test_transformed).toarray()
return y_train_enc, y_test_enc
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.