I'm a beginner at PyTorch and I've been trying to set up a PyTorch model in Azure ML Studio. The code runs fine in Google Colab but in Azure ML notebook, I get this error:
File /anaconda/envs/azureml_py38_PT_TF/lib/python3.8/site-packages/pytorch_lightning/trainer/training_io.py:268, in TrainerIOMixin.save_checkpoint(self, filepath, weights_only)
267 def save_checkpoint(self, filepath, weights_only: bool = False):
--> 268 checkpoint = self.dump_checkpoint(weights_only)
270 if self.is_global_zero:
271 # do the actual save
272 try:
File /anaconda/envs/azureml_py38_PT_TF/lib/python3.8/site-packages/pytorch_lightning/trainer/training_io.py:362, in TrainerIOMixin.dump_checkpoint(self, weights_only)
360 # save native amp scaling
361 if self.use_amp and NATIVE_AMP_AVALAIBLE and not self.use_tpu:
--> 362 checkpoint['native_amp_scaling_state'] = self.scaler.state_dict()
364 # add the module_arguments and state_dict from the model
365 model = self.get_model()
AttributeError: 'NoneType' object has no attribute 'state_dict'
Model code:
class EvaluationModel(pl.LightningModule):
def __init__(self,learning_rate=1e-3,batch_size=1024,layer_count=10):
super().__init__()
self.batch_size = batch_size
self.learning_rate = learning_rate
layers = []
for i in range(layer_count-1):
layers.append((f"linear-{i}", nn.Linear(808, 808)))
layers.append((f"relu-{i}", nn.ReLU()))
layers.append((f"linear-{layer_count-1}", nn.Linear(808, 1)))
self.seq = nn.Sequential(OrderedDict(layers))
def forward(self, x):
return self.seq(x)
def training_step(self, batch, batch_idx):
x, y = batch['binary'], batch['eval']
y_hat = self(x)
loss = F.l1_loss(y_hat, y)
self.log("train_loss", loss)
return loss
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
def train_dataloader(self):
dataset = EvaluationDataset(count=LABEL_COUNT)
return DataLoader(dataset, batch_size=self.batch_size, num_workers=2, pin_memory=True)
configs = [
{"layer_count": 4, "batch_size": 512},
# {"layer_count": 6, "batch_size": 1024},
]
for config in configs:
version_name = f'{int(time.time())}-batch_size-{config["batch_size"]}-layer_count-{config["layer_count"]}'
logger = pl.loggers.TensorBoardLogger("lightning_logs", name="chessml", version=version_name)
trainer = pl.Trainer(gpus=1,precision=16,max_epochs=1,auto_lr_find=True,logger=logger)
model = EvaluationModel(layer_count=config["layer_count"],batch_size=config["batch_size"],learning_rate=1e-3)
# trainer.tune(model)
# lr_finder = trainer.tuner.lr_find(model, min_lr=1e-6, max_lr=1e-3, num_training=25)
# fig = lr_finder.plot(suggest=True)
# fig.show()
trainer.fit(model)
break
Model code is from https://towardsdatascience.com/train-your-own-chess-ai-66b9ca8d71e4
From the error, it seems like there is some issue with your checkpoint. Could you try loading your model using torch.load
API and see if that works?
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.