Hello I have some problem with my task. I try to train model but in vain. I see the error like this "Input contains NaN, infinity or a value too large for dtype('float32')." I think it can be connected with Mse function, bacause with MAE it works someho also with RMSE it works somehow (on the second epoch i have RMSE = 10.***). I can figure out what i do wrong
df = pd.read_csv('data.txt.zip', header=None)
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
train_size = 463715
X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:]
#ToTensor
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)
val_num = 92743
train_num = 370972
train_ds, val_ds = random_split(train_ds, [train_num, val_num])
def accuracy(y_true, y_pred):
return r2_score(y_true, y_pred)
class BaselineModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(BaselineModel, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.linear1 = nn.Linear(90, 45)
self.linear2 = nn.Linear(45, 1)
self.linear3 = nn.Linear(45, 15)
self.linear4 = nn.Linear(15, 1)
self.batch = nn.BatchNorm2d(hidden_size)
self.relu = nn.ReLU()
self.lreku = nn.LeakyReLU()
self.elu = nn.ELU()
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.elu(self.linear1(x))
return self.linear2(x)
def training_step(self, criterion, batch):
x_train, y_train = batch
y_pred = self(x_train)
loss = (criterion(y_pred, y_train.unsqueeze(1)))
return loss
def validation_step(self, criterion, batch):
x_val, y_val = batch
y_pred = self(x_val)
loss = (criterion(y_pred, y_val.unsqueeze(1)))
acc = accuracy(y_val, y_pred)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, y_pred):
batch_losses = [x['val_loss'] for x in y_pred]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in y_pred]
epoch_acc = np.mean(batch_accs)
#epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print(f"Epoch {epoch}, val_loss: {result['val_loss']}, val_acc: {result['val_acc']} ")
model = BaselineModel(input_size = 90, hidden_size = 45, output_size = 1)
def evaluate(model, criterion, val_loader):
with torch.no_grad():
y_pred = [model.validation_step(criterion, batch) for batch in val_loader]
return model.validation_epoch_end(y_pred)
def train(model, criterion, optimizer, train_loader, val_loader, lr, epochs):
history = []
for epoch in range(epochs):
for batch in train_loader:
optimizer.zero_grad()
loss = model.training_step(criterion, batch)
loss.backward()
optimizer.step()
result = evaluate(model, criterion, val_loader)
model.epoch_end(epoch, result)
history.append(result)
#return history
batch_size = 128
train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size, shuffle = True)
lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr, momentum = 0.9)
criterion = F.mse_loss
epochs = 10
train(model, criterion, optimizer, train_loader, val_loader, lr, epochs)
Yes, it is because of your loss of function. if the value of the loss function after some epoch becomes very small or very large then when you want to use it in backpropagation to train the model, you face this error. To handle that, you should use Early Stopping to Halt the Training. so you should implement Callback , Callbacks provide a way to execute code and interact with the training model process automatically.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.