简体   繁体   中英

Torch: How to inspect weights after training?

I am wondering what I am doing wrong when looking to see how the weights changed during training.

My loss goes down considerably but it appears that the initialized weights are the same as trained weights. Am I looking in the wrong location? I would appreciate any insight that you might have!

import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

# setup GPU/CPU processing
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# initialize model
class mlp1(torch.nn.Module):
    def __init__(self, num_features, num_hidden, num_classes):
        super(mlp1, self).__init__()
        self.num_classes = num_classes
        self.input_layer = torch.nn.Linear(num_features, num_hidden)
        self.out_layer = torch.nn.Linear(num_hidden, num_classes)

    def forward(self, x):
        x = self.input_layer(x)
        x = torch.sigmoid(x)
        logits = self.out_layer(x)
        probas = torch.softmax(logits, dim=1)
        return logits, probas

# instantiate model
model = mlp1(num_features=28*28, num_hidden=100, num_classes=10).to(device)

# check initial weights
weight_check_pre = model.state_dict()['input_layer.weight'][0][0:25]

# optim
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)


# download data
train_dataset = datasets.MNIST(root='data',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)
# data loader
train_dataloader = DataLoader(dataset=train_dataset,
                          batch_size=100,
                          shuffle=True)

# train
NUM_EPOCHS = 1
for epoch in range(NUM_EPOCHS):
    model.train()
    for batch_idx, (features, targets) in enumerate(train_dataloader):
        # send data to device
        features = features.view(-1, 28*28).to(device)
        targets = targets.to(device)
        # forward
        logits, probas = model(features)
        # loss
        loss = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        loss.backward()
        # now update weights
        optimizer.step()
        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Loss: %.4f'
                   %(epoch+1, NUM_EPOCHS, batch_idx,
                     len(train_dataloader), loss))

# check post training
weight_check_post = model.state_dict()['input_layer.weight'][0][0:25]

# compare
weight_check_pre == weight_check_post  # all equal

That is because both variables are referencing the same object (dictionary) in memory and so will always equal to each other.

You can do this to get actual copies of the state_dict .

import copy

# check initial weights
weight_check_pre = copy.deepcopy(model.state_dict()['input_layer.weight'][0][0:25])
...
# check post training
weight_check_post = copy.deepcopy(model.state_dict()['input_layer.weight'][0][0:25])

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM