繁体   English   中英

输入包含 NaN、无穷大或对于 dtype('float32') 来说太大的值。 毕索克

[英]Input contains NaN, infinity or a value too large for dtype('float32'). Pythorch

您好,我的任务有问题。 我尝试训练 model 但徒劳无功。 我看到这样的错误“输入包含 NaN、无穷大或对于 dtype('float32') 来说太大的值。” 我认为它可以与 Mse function 连接,因为它与 MAE 一起工作,它也与 RMSE 一起工作(在第二个时代我有 RMSE = 10.***)。 我可以弄清楚我做错了什么

南伯爵

在此处输入图像描述

df = pd.read_csv('data.txt.zip', header=None)
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

train_size = 463715
X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:]

#ToTensor

X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)

创建张量数据集

train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)

val_num = 92743
train_num = 370972

将训练数据分为训练数据和验证数据

train_ds, val_ds = random_split(train_ds, [train_num, val_num])

评估准确性

def accuracy(y_true, y_pred):
  return r2_score(y_true, y_pred)

创建 Class

class BaselineModel(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(BaselineModel, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.linear1 = nn.Linear(90, 45)
    self.linear2 = nn.Linear(45, 1)
    self.linear3 = nn.Linear(45, 15)
    self.linear4 = nn.Linear(15, 1)
    self.batch = nn.BatchNorm2d(hidden_size)
    self.relu = nn.ReLU()
    self.lreku = nn.LeakyReLU()
    self.elu = nn.ELU()
    self.dropout = nn.Dropout(0.5)
  
  def forward(self, x):
    x = self.elu(self.linear1(x))
    return self.linear2(x)
  
  def training_step(self, criterion, batch):
    x_train, y_train = batch
    y_pred = self(x_train)
    loss = (criterion(y_pred, y_train.unsqueeze(1)))
    return loss
  
  def validation_step(self, criterion, batch):
    x_val, y_val = batch
    y_pred = self(x_val)
    loss = (criterion(y_pred, y_val.unsqueeze(1)))
    acc = accuracy(y_val, y_pred)
    return {'val_loss': loss, 'val_acc': acc}
  
  def validation_epoch_end(self, y_pred):
    batch_losses = [x['val_loss'] for x in y_pred]
    epoch_loss = torch.stack(batch_losses).mean()

    batch_accs = [x['val_acc'] for x in y_pred]
    epoch_acc = np.mean(batch_accs)
    #epoch_acc = torch.stack(batch_accs).mean()

    return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
  
  def epoch_end(self, epoch, result):
    print(f"Epoch {epoch}, val_loss: {result['val_loss']}, val_acc: {result['val_acc']} ")

model = BaselineModel(input_size = 90, hidden_size = 45, output_size = 1)

评估

def evaluate(model, criterion, val_loader):
  with torch.no_grad():
    y_pred = [model.validation_step(criterion, batch) for batch in val_loader]
    return model.validation_epoch_end(y_pred)

火车

def train(model, criterion, optimizer, train_loader, val_loader, lr, epochs):
  history = []

  for epoch in range(epochs):
    
    for batch in train_loader:
      optimizer.zero_grad()
      loss = model.training_step(criterion, batch)
      loss.backward()
      optimizer.step()
    
    result = evaluate(model, criterion, val_loader)
    model.epoch_end(epoch, result)
    history.append(result)
  #return history

创建 train_loader 和 val_loader

batch_size = 128

train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size, shuffle = True)

创建参数并训练

lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr, momentum = 0.9)
criterion = F.mse_loss
epochs = 10

train(model, criterion, optimizer, train_loader, val_loader, lr, epochs)

在此处输入图像描述

是的,这是因为您丢失了 function。 如果在某个时期之后损失 function 的值变得非常小或非常大,那么当您想在反向传播中使用它来训练 model 时,您将面临此错误。 为了解决这个问题,您应该使用Early Stopping来停止训练。 所以你应该实现Callback ,Callbacks 提供了一种自动执行代码并与训练 model 进程交互的方法。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM