[英]the derivative for 'target' is not implemented
I added two VAEs to the original model, so I need to add optimizer and loss.我在原始模型中添加了两个 VAE,因此需要添加优化器和损失。 However, the following errors are reported.
但是,会报告以下错误。 How can I modify them?
我该如何修改它们?
Traceback (most recent call last):
File "train.py", line 320, in <module>
main()
File "train.py", line 315, in main
ImgCla.TrainingData()
File "train.py", line 201, in TrainingData
lossv1 = self.loss_function(recon_audio, audio1, mean1, logstd1)
File "train.py", line 135, in loss_function
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
File "/home/user1/.conda/envs/tyz/lib/python3.6/site-packages/torch/nn/functional.py", line 2762, in binary_cross_entropy
return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
RuntimeError: the derivative for 'target' is not implemented
The train.py is as follows: train.py 如下:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import dataloader
import pandas
import os
import imp
import model
import math
import time
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm
from sklearn.metrics import classification_report,accuracy_score
import training_plot
from sklearn.metrics import confusion_matrix
import torch.nn.functional as F
from model import VAE1,VAE2
config = imp.load_source("config","config/Resnet50.py").config
device_ids = config["device_ids"]
data_train_opt = config['data_train_opt']
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print("======================================")
print("Device: {}".format(device_ids))
def fix_bn(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.eval()
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print('\t'.join(entries))
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def adjust_learning_rate(optimizer, epoch, args):
"""Decay the learning rate based on schedule"""
lr = args.lr
if args.cos: # cosine lr schedule
lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
else: # stepwise lr schedule
for milestone in args.schedule:
lr *= 0.1 if epoch >= milestone else 1.
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k]
correct_k = torch.sum(correct_k).float()
res.append(correct_k.mul_(100.0 / batch_size))
return res
class ImageClassify(object):
def __init__(self):
self.name_list = []
self.model = model.Mixed_model(data_train_opt["dim"])
self.model = torch.nn.DataParallel(self.model, device_ids=device_ids)
self.model = self.model.cuda(device=device_ids[0])
self.save = data_train_opt["final_model_file"]
self.training_save = data_train_opt["feat_training_file"]
self.training_log = data_train_opt["training_log"]
self.loss = 9999
self.best = 0
self.train_dataset = dataloader.Load_Data(config["data_dir"],"train")
self.trainloader = DataLoader(self.train_dataset, batch_size=data_train_opt['batch_size']*len(device_ids),num_workers=8,shuffle=True,drop_last=False)
self.valid_dataset = dataloader.Load_Data(config["data_dir"],"val")
self.validloader = DataLoader(self.valid_dataset,batch_size=data_train_opt['batch_size']*len(device_ids),num_workers=8,shuffle=True)
self.LossFun()
print("Trainloader: {}".format(len(self.trainloader)))
print("Validloader: {}".format(len(self.validloader)))
self.vae1 = VAE1().cuda()
self.vae2 = VAE2().cuda()
def loss_function(self,recon_x, x, mean, std):
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
var = torch.pow(torch.exp(std), 2)
KLD = -0.5 * torch.sum(1 + torch.log(var) - torch.pow(mean, 2) - var)
return BCE+KLD
def loss_function2(self,recon_x, x, mean, std):
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
var = torch.pow(torch.exp(std), 2)
KLD = -0.5 * torch.sum(1 + torch.log(var) - torch.pow(mean, 2) - var)
return BCE + KLD
def LossFun(self):
print("lossing...")
self.criterion = nn.CrossEntropyLoss()
self.optimizer = optim.Adam(self.model.parameters(), lr=data_train_opt['lr'])
VAE needs to introduce reconstruction error, which is added to my previous model, so I first updated the parameters of the model and trained the previous model. VAE需要引入reconstruction error,这是我之前的模型加进去的,所以我先更新了模型的参数,训练了之前的模型。 Update the parameters of VAE and train VAE.
更新VAE的参数并训练VAE。 When training VAE, I want to fix the parameters of other parts, so I add this part:
训练VAE的时候,想固定其他部分的参数,所以加了这部分:
for name,param in model.Mixed_model().named_parameters():
if 'video' in name:
param.requires_grad=False
if 'audio_net' in name:
param.requires_grad=False
if 'classifier' in name:
param.requires_grad=False
self.optimizer2 = optim.Adam(filter(lambda param:param.requires_grad,model.Mixed_model().parameters()), lr=data_train_opt['lr'])
def TrainingData(self):
self.model.train()
log = []
for epoch in range(data_train_opt['epoch']):
if (epoch+1) % data_train_opt["decay_epoch"] == 0 :
for param_group in self.optimizer.param_groups:
param_group['lr'] = param_group['lr']*data_train_opt["decay_rate"]
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
progress = ProgressMeter(
len(self.trainloader),
[batch_time, data_time, losses,top1],
prefix="Epoch: [{}]".format(epoch+1))
# switch to train mode
self.model.train()
end = time.time()
for i, (img,audio, class_id) in enumerate(self.trainloader):
# measure data loading time
data_time.update(time.time() - end)
img,audio,class_id = img.cuda(device=device_ids[0]),audio.cuda(device=device_ids[0]),class_id.cuda(device=device_ids[0])
predict,audio1,img1= self.model(img,audio)
loss = self.criterion(predict, class_id)
# acc1/acc5 are (K+1)-way contrast classifier accuracy
# measure accuracy and record loss
acc1= accuracy(predict, class_id, topk=(1,))
losses.update(loss.item(), img.size(0))
top1.update(acc1[0], img.size(0))
self.optimizer.zero_grad()
loss.backward(retain_graph=True)
self.optimizer.step()
z1, logstd1, mean1, eps1,recon_audio = self.vae1(audio1)
z2, logstd2, mean2, eps2,recon_img = self.vae2(img1)
lossv1 = self.loss_function(recon_audio, audio1, mean1, logstd1)
lossv2 = self.loss_function2(recon_img, img1, mean2, logstd2)
lossv = lossv2 + lossv1
lossv.backward()
self.optimizer2.zero_grad()
lossv.backward()
self.optimizer2.step()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if (i+1) % data_train_opt["log_step"] == 0:
loss_avg = losses.avg
acc_avg = top1.avg
log.append([epoch, i + 1, loss.item(), acc1[0], loss_avg, acc_avg])
progress.display(i+1)
if (epoch+1) % data_train_opt["save_epoch"] == 0:
acc, a = self.ValidingData(epoch+1)
if losses.avg <self.loss:
self.loss = losses.avg
a = 1
np.save(data_train_opt["training_log"], log)
if a == 1:
self.save_checkpoint({
'epoch': epoch + 1,
'state_dict': self.model.state_dict(),
'optimizer' : self.optimizer.state_dict(),
'acc':acc
}, filename=os.path.join(data_train_opt["feat_training_file"],'Epoch_{}_acc_{}_loss_{}.pth'.format(epoch+1,acc,losses.avg)))
# }, filename=os.path.join(data_train_opt["feat_training_file"],'checkpoint_{:04d}.pth'.format(epoch+1)))
# }, filename=os.path.join(data_train_opt["feat_training_file"],'best.pth'))
def save_checkpoint(self,state,filename='checkpoint.pth.tar'):
torch.save(state, filename)
def ValidingData(self,epoch):
self.model.eval()
a = 0
with torch.no_grad():
y_pre = []
y_true = []
with tqdm(total=len(self.validloader), desc='Example', leave=True, ncols=100, unit='batch', unit_scale=True) as pbar:
for i, (img,audio,class_id) in enumerate(self.validloader):
img,audio, class_id = img.cuda(device=device_ids[0]),audio.cuda(device=device_ids[0]), class_id.cuda(device=device_ids[0])
predict = self.model(img, audio)
_, pre = torch.max(predict,dim=1)
y_pre.append(pre.cpu())
y_true.append(class_id.cpu())
pbar.update(1)
y_pre = torch.cat(y_pre).cpu().detach().numpy()
y_true = torch.cat(y_true).cpu().detach().numpy()
cm = confusion_matrix(y_true, y_pre)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
cm.diagonal()
keys = ['airport',
'bus',
'metro',
'metro_station',
'park',
'public_square',
'shopping_mall',
'street_pedestrian',
'street_traffic',
'tram']
values = [np.round(i, decimals=3) for i in list(cm.diagonal())]
df_cm = pandas.DataFrame(cm, index=[i for i in keys], columns=[i for i in keys])
plt.figure(figsize=(15, 12))
sn.heatmap(df_cm, annot=True)
plt.savefig('confusion.png')
report = classification_report(y_true, y_pre, target_names=
['airport',
'bus',
'metro',
'metro_station',
'park',
'public_square',
'shopping_mall',
'street_pedestrian',
'street_traffic',
'tram'], digits=4)
acc = accuracy_score(y_true, y_pre)
if acc>self.best:
a = 1
self.best=acc
print(report)
print("==================")
with open(data_train_opt["txt"],"a") as f:
f.write("========= {} =======\n".format(epoch))
f.write("classification_report".format(epoch))
f.write(report)
f.write("\n")
self.model.train()
if a ==1:
with open(data_train_opt["best"], "a") as f:
f.write("========= {} =======\n".format(epoch))
f.write("classification_report".format(epoch))
f.write(report)
f.write("================\n")
return acc,a
def main():
ImgCla = ImageClassify()
ImgCla.TrainingData()
training_plot.draw(data_train_opt["training_log"])
acc, a = ImgCla.ValidingData(epoch=0)
if __name__ == '__main__':
main()
The error message refers to the fact you are requiring gradient computation on the target tensor which is not supported by nn.functional.binary_cross_entropy
.错误消息是指您需要对
nn.functional.binary_cross_entropy
不支持的目标张量进行梯度计算的事实。 In other words you need to detach the target before computing the loss term:换句话说,您需要在计算损失项之前分离目标:
BCE = F.binary_cross_entropy(recon_x, x.detach(), reduction='sum')
In both loss_function
and loss_function2
.在
loss_function
和loss_function2
中。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.