在 pytorch 中將 return_indices=True 添加到 nn.MaxPool2d 時出現 TypeError

Question

我是 pyotch 的新手，我正在嘗試在 pytorch 中創建一個自動編碼器，這是我的代碼

編碼器：

# B = Batch size
# encoder (B, 3, 224, 224) => (B, 8)
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder_cnn = nn.Sequential(
            # input shape: (B, 3, 224, 224) =>
            nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=0),
            nn.ReLU(True),
            nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=0),
            nn.ReLU(True),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2,return_indices=True)
            # shape: (B, 16, 55, 55) =>
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            # shape: (B, 32, 28, 28) =>
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2,return_indices=True),
            # output shape: (B, 64, 7, 7)
        )
        self.flat = nn.Flatten(start_dim=1) # shape: (B, 64*7*7)
        self.encoder_fc = nn.Sequential(
            # input shape: (B, 64*7*7)
            nn.Linear(64*7*7, 1024),
            nn.ReLU(True),
            # shape: (B, 1024)
            nn.Linear(1024, 8),
            nn.Sigmoid()
            # output shape: (B, 8)
        )
    def forward(self, x):
        x = self.encoder_cnn(x)
        x = self.flat(x)
        x = self.encoder_fc(x)
        return x

解碼器

# B = Batch size
# decoder (B, 8) => (B, 3, 224, 224)
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.decoder_fc = nn.Sequential(
            nn.Linear(8, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 64*7*7),
            nn.ReLU(True)
        )
        self.unflat = nn.Unflatten(dim=1, unflattened_size=(64, 7, 7))
        self.decoder_cnn = nn.Sequential(
            nn.MaxUnpool2d(2),
            nn.BatchNorm2d(64),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1),
            nn.MaxUnpool2d(2),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, kernel_size=3, stride=2, padding=0),
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 3, kernel_size=3, stride=1, padding=0)
        )
    def forward(self, x):
        x = self.decoder_fc(x)
        x = self.unflat(x)
        x = self.decoder_cnn(x)
        return x

當我測試編碼器時，出現此錯誤

encoder = Encoder().to(device)
decoder = Decoder().to(device)
test_img = torch.unsqueeze(train_data[0], dim=0)
print(encoder(test_img))

感謝您的幫助：P

ps：我試圖刪除return_indices=True nn.MaxPool2d(2,return_indices=True)的 return_indices=True 並且編碼器將成功運行但是當我運行它時會出現另一個錯誤：

encoder = Encoder().to(device)
decoder = Decoder().to(device)
test_img = torch.unsqueeze(train_data[0], dim=0)
codes = encoder(test_img)
print(decoder(codes).shape)

錯誤：TypeError：forward() 缺少 1 個必需的位置參數：'indices'

Answer 1

問題

當return_indices=True時， nn.MaxPool2d.forward返回一個元組(out, indices) 。 稍后您需要nn.MaxUnpool2d的indices 。 但是，您將Encoder中的第一個nn.MaxPool2d放在nn.Sequential之前的nn.Conv2d中。 當return_indices=True時這是有問題的，因為返回的元組作為nn.Conv2d的輸入，但nn.Conv2d.forward期望張量作為其第一個參數。 這就是你得到TypeError: conv2d() received an invalid combination of arguments的原因。 因此，您需要既保留indices又確保只out提供給下一層。

解決方案

修復方法是使用nn.MaxPool2d作為分隔符來拆分順序模塊。 您還需要對Decoder執行相同的操作。 它看起來像下面

# B = Batch size
# encoder (B, 3, 224, 224) => (B, 8)
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder_cnn1 = nn.Sequential(
            # input shape: (B, 3, 224, 224) =>
            nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=0),
            nn.ReLU(True),
            nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=0),
            nn.ReLU(True),
            nn.BatchNorm2d(16),
        )
        self.max_pool1 = nn.MaxPool2d(2,return_indices=True)
        self.encoder_cnn2 = nn.Sequential(
            # shape: (B, 16, 55, 55) =>
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            # shape: (B, 32, 28, 28) =>
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),
        )
        self.max_pool2 = nn.MaxPool2d(2,return_indices=True)
        self.flat = nn.Flatten(start_dim=1) # shape: (B, 64*7*7)
        self.encoder_fc = nn.Sequential(
            # input shape: (B, 64*7*7)
            nn.Linear(64*7*7, 1024),
            nn.ReLU(True),
            # shape: (B, 1024)
            nn.Linear(1024, 8),
            nn.Sigmoid()
            # output shape: (B, 8)
        )
    def forward(self, x):
        x = self.encoder_cnn1(x)
        x, indices1 = self.max_pool1(x)
        x = self.encoder_cnn2(x)
        x, indices2 = self.max_pool2(x)
        x = self.flat(x)
        x = self.encoder_fc(x)
        return x, indices1, indices2  # also return the indices

# B = Batch size
# decoder (B, 8) => (B, 3, 224, 224)
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.decoder_fc = nn.Sequential(
            nn.Linear(8, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 64*7*7),
            nn.ReLU(True)
        )
        self.unflat = nn.Unflatten(dim=1, unflattened_size=(64, 7, 7))
        self.max_unpool1 = nn.MaxUnpool2d(2)
        self.decoder_cnn1 = nn.Sequential(
            nn.BatchNorm2d(64),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1),
        )
        self.max_unpool2 = nn.MaxUnpool2d(2)
        self.decoder_cnn2 = nn.Sequential(
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, kernel_size=3, stride=2, padding=0),
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 3, kernel_size=3, stride=1, padding=0)
        )
    def forward(self, x, indices1, indices2):  # accept the indices
        x = self.decoder_fc(x)
        x = self.unflat(x)
        x = self.max_unpool1(x, indices2)
        x = self.decoder_cnn1(x)
        x = self.max_unpool2(x, indices1)
        x = self.decoder_cnn2(x)
        return x

然后你可以像這樣運行編碼器和解碼器

encoder = Encoder()
decoder = Decoder()
test_img = torch.unsqueeze(torch.rand(3, 224, 224), dim=0)
codes, indices1, indices2 = encoder(test_img)
print(decoder(codes, indices1, indices2).shape)

警告

上面的代碼運行時沒有遇到與問題中的錯誤參數相關的TypeError 。 但是，它引發了關於indices1形狀的不同錯誤。 我懷疑它與解碼器中最大解池層中的內核大小或步幅或填充有關，但坦率地說，我對計算機視覺還不夠熟悉，無法調試此錯誤。 也就是說，上面的代碼確實解決了問題中的錯誤，所以我認為這篇文章仍然算作答案。

在 pytorch 中將 return_indices=True 添加到 nn.MaxPool2d 時出現 TypeError

問題描述

1 個解決方案

解決方案1
0 2022-12-20 08:46:12

問題

解決方案

警告

在 pytorch 中將 return_indices=True 添加到 nn.MaxPool2d 時出現 TypeError

問題描述

1 個解決方案

解決方案1 0 2022-12-20 08:46:12

問題

解決方案

警告

解決方案1
0 2022-12-20 08:46:12