PyTorch minibatch 訓練很慢

Question

在成人收入數據集上訓練我的 model 並使用小批量訓練時，無論我使用 PyTorch 的 DataLoader 還是小批量訓練的基本實現，都非常慢。 我的代碼有問題，還是有其他方法可以加快成人收入數據集的訓練速度？ 我想使用 one-hot 編碼和交叉熵損失 + softmax。 我是否必須使用不同的損失 function 或刪除 softmax 層？

import pandas as pd
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import warnings

warnings.filterwarnings('ignore')
device = torch.device("cpu")


class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim, 12)
        self.layer2 = nn.Linear(12, 2)

    def forward(self, x):
        x = F.sigmoid(self.layer1(x))
        x = F.softmax(self.layer2(x))  # To check with the loss function
        return x


# load dataset
filename = './datasets/adult-all.csv'
dataframe = read_csv(filename, header=None, na_values='?')
# drop rows with missing
dataframe = dataframe.dropna()
# summarize the class distribution
target = dataframe.values[:, -1]
# split into inputs and outputs
last_ix = len(dataframe.columns) - 1
X_, y = dataframe.drop(last_ix, axis=1), dataframe[last_ix]
# select categorical and numerical features
cat_ix = X_.select_dtypes(include=['object', 'bool']).columns
num_ix = X_.select_dtypes(include=['int64', 'float64']).columns
# label encode the target variable to have the classes 0 and 1
y = LabelEncoder().fit_transform(y)
# one-hot encoding of categorical features
df_cat = pd.get_dummies(X_[cat_ix])
# binning of numerical features
x = X_.drop(columns=cat_ix, axis=1)
est = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
df_num = est.fit_transform(x)
X = pd.concat([df_cat.reset_index(drop=True), pd.DataFrame(df_num).reset_index(drop=True)], axis=1)
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_tr = Variable(torch.tensor(X_train.values, dtype=torch.float))
X_te = Variable(torch.tensor(X_test.values, dtype=torch.float))
y_tr = Variable(torch.tensor(y_train, dtype=torch.long))
y_te = Variable(torch.tensor(y_test, dtype=torch.long))


def binary_cross_entropy_one_hot(input, target):
    return torch.nn.CrossEntropyLoss()(input, target)


def _accuracy(y_pred, y_true):
    classes = torch.argmax(y_pred, dim=1)
    labels = y_true
    accuracy = torch.mean((classes == labels).float())
    return accuracy


model = Model(X.shape[1])
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1000
accuracy = 0.0
minibatch = True

# training loop
train_loss = []
for epoch in range(epochs):
    if minibatch:
        batch_size = 128  # or whatever
        permutation = torch.randperm(X_tr.size()[0])
        for i in range(0, X_tr.size()[0], batch_size):
            optimizer.zero_grad()
            indices = permutation[i:i + batch_size]
            batch_x, batch_y = X_tr[indices], y_tr[indices]
            # in case you wanted a semi-full example
            outputs = model.forward(batch_x)
            loss = binary_cross_entropy_one_hot(outputs, batch_y)
            loss.backward()
            optimizer.step()
        if epoch % 100 == 0:
            print(f'epoch: {epoch:2}  loss: {loss:10.8f}')
        # train_ds = TensorDataset(X_tr, y_tr)
        # train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
        # batch_loss = 0.0
        # batch_accuracy = 0.0
        # for nb, (x_batch, y_batch) in enumerate(train_dl):  # manually set number of batches?
        #     optimizer.zero_grad()
        #     y_pred_train = model(x_batch)
        #     loss = binary_cross_entropy_one_hot(y_pred_train, y_batch)
        #     loss.backward()
        #     optimizer.step()
        #     batch_loss += loss.item()
        #     batch_accuracy += _accuracy(y_pred_train, y_batch)
        # train_loss.append(batch_loss / (nb + 1))
        # accuracy = batch_accuracy / (nb + 1)
        # if epoch % 100 == 0:
        #     print(f'epoch: {epoch:2}  loss: {train_loss[epoch]:10.8f}')
    else:
        optimizer.zero_grad()
        y_pred = model(X_tr)
        # computing the loss function
        loss = binary_cross_entropy_one_hot(y_pred, y_tr)
        if epoch % 100 == 0:
            print(f'epoch: {epoch:2}  loss: {loss.item():10.8f}')
        loss.backward()
        optimizer.step()
        accuracy = _accuracy(y_pred, y_tr)
# evaluation on test data
with torch.no_grad():
    model.eval()
    y_pred = model(X_te)
    test_loss = binary_cross_entropy_one_hot(y_pred, y_te)
    test_acc = _accuracy(y_pred, y_te)
print("Loss on test data: {:.4}".format(test_loss))
print("Accuracy on test data: {:.4}".format(test_acc))

Answer 1

時間取決於您的input_dim 、數據集的大小以及每個時期的更新次數（// 批量大小）。 根據您與我們分享的內容，我不確定問題是什么，以及是否真的存在任何瓶頸。 但是，我要指出以下幾點，它們可能會對您有所幫助（排名不分先后）：

無需使用torch.autograd.Variable包裝您的數據。 它已被棄用並且不再需要， Autograd自動支持torch.tensors並將requires_grad設置為True 。
如果您使用的是torch.nn.CrossEntropyLoss ，則不應在模型的 output 上使用F.softmax 。 這是因為CrossEntropyLoss包括nn.LogSoftmax()和nn.NLLLoss() 。 也不需要每次調用它時都初始化模塊：
```
 criterion = torch.nn.CrossEntropyLoss() def binary_cross_entropy_one_hot(input, target): return criterion(input, target)
```

我看到您正在重新定義每個時代的數據加載器。 那是你真正想要的嗎？ 如果不是，您可以在訓練循環之外定義它：

 train_ds = TensorDataset(X_tr, y_tr) train_dl = DataLoader(train_ds, batch_size=256, shuffle=True) for epoch in range(epochs): for x, y in train_dl: #...

我會根據您的准確性調用.item() （調用_accuracy時），不要將其附加到計算圖上，並在准備好時從 memory 中釋放它。

PyTorch minibatch 訓練很慢

問題描述

1 個解決方案

解決方案1
0 2021-01-17 10:00:28

PyTorch minibatch 訓練很慢

問題描述

1 個解決方案

解決方案1 0 2021-01-17 10:00:28

解決方案1
0 2021-01-17 10:00:28