简体   繁体   English

“目标”的导数未实现

[英]the derivative for 'target' is not implemented

I added two VAEs to the original model, so I need to add optimizer and loss.我在原始模型中添加了两个 VAE,因此需要添加优化器和损失。 However, the following errors are reported.但是,会报告以下错误。 How can I modify them?我该如何修改它们?

Traceback (most recent call last):
  File "train.py", line 320, in <module>
    main()
  File "train.py", line 315, in main
    ImgCla.TrainingData()
  File "train.py", line 201, in TrainingData
    lossv1 = self.loss_function(recon_audio, audio1, mean1, logstd1)
  File "train.py", line 135, in loss_function
    BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
  File "/home/user1/.conda/envs/tyz/lib/python3.6/site-packages/torch/nn/functional.py", line 2762, in binary_cross_entropy
    return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
RuntimeError: the derivative for 'target' is not implemented

The train.py is as follows: train.py 如下:

from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import dataloader
import pandas
import os
import imp
import model
import math
import time
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm
from sklearn.metrics import classification_report,accuracy_score
import training_plot
from sklearn.metrics import confusion_matrix
import torch.nn.functional as F
from model import VAE1,VAE2

config = imp.load_source("config","config/Resnet50.py").config
device_ids = config["device_ids"]
data_train_opt = config['data_train_opt']
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print("======================================")
print("Device: {}".format(device_ids))

def fix_bn(m):


 classname = m.__class__.__name__
   if classname.find('BatchNorm') != -1:
       m.eval()

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def adjust_learning_rate(optimizer, epoch, args):
    """Decay the learning rate based on schedule"""
    lr = args.lr
    if args.cos:  # cosine lr schedule
        lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
    else:  # stepwise lr schedule
        for milestone in args.schedule:
            lr *= 0.1 if epoch >= milestone else 1.
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k]
            correct_k = torch.sum(correct_k).float()
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


class ImageClassify(object):
    def __init__(self):
        self.name_list = []
        self.model = model.Mixed_model(data_train_opt["dim"])
        self.model = torch.nn.DataParallel(self.model, device_ids=device_ids)
        self.model = self.model.cuda(device=device_ids[0])
        self.save = data_train_opt["final_model_file"]
        self.training_save = data_train_opt["feat_training_file"]
        self.training_log = data_train_opt["training_log"]
        self.loss = 9999
        self.best = 0
        self.train_dataset = dataloader.Load_Data(config["data_dir"],"train")
        self.trainloader = DataLoader(self.train_dataset, batch_size=data_train_opt['batch_size']*len(device_ids),num_workers=8,shuffle=True,drop_last=False)


        self.valid_dataset = dataloader.Load_Data(config["data_dir"],"val")
        self.validloader = DataLoader(self.valid_dataset,batch_size=data_train_opt['batch_size']*len(device_ids),num_workers=8,shuffle=True)
        self.LossFun()
        print("Trainloader: {}".format(len(self.trainloader)))
        print("Validloader: {}".format(len(self.validloader)))
        self.vae1 = VAE1().cuda()
        self.vae2 = VAE2().cuda()

    def loss_function(self,recon_x, x, mean, std):
        BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
        var = torch.pow(torch.exp(std), 2)
        KLD = -0.5 * torch.sum(1 + torch.log(var) - torch.pow(mean, 2) - var)
        return BCE+KLD

    def loss_function2(self,recon_x, x, mean, std):
        BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
        var = torch.pow(torch.exp(std), 2)
        KLD = -0.5 * torch.sum(1 + torch.log(var) - torch.pow(mean, 2) - var)
        return BCE + KLD

    def LossFun(self):
        print("lossing...")
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=data_train_opt['lr'])

VAE needs to introduce reconstruction error, which is added to my previous model, so I first updated the parameters of the model and trained the previous model. VAE需要引入reconstruction error,这是我之前的模型加进去的,所以我先更新了模型的参数,训练了之前的模型。 Update the parameters of VAE and train VAE.更新VAE的参数并训练VAE。 When training VAE, I want to fix the parameters of other parts, so I add this part:训练VAE的时候,想固定其他部分的参数,所以加了这部分:

        for name,param in model.Mixed_model().named_parameters():
            if 'video' in name:
                param.requires_grad=False
            if 'audio_net' in name:
                param.requires_grad=False
            if 'classifier' in name:
                param.requires_grad=False
        self.optimizer2 = optim.Adam(filter(lambda param:param.requires_grad,model.Mixed_model().parameters()), lr=data_train_opt['lr'])



    def TrainingData(self):
        self.model.train()
        log = []
        for epoch in range(data_train_opt['epoch']):
            if (epoch+1) % data_train_opt["decay_epoch"] == 0 :
                for param_group in self.optimizer.param_groups:
                    param_group['lr'] = param_group['lr']*data_train_opt["decay_rate"]

            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            top1 = AverageMeter('Acc@1', ':6.2f')
            progress = ProgressMeter(
                len(self.trainloader),
                [batch_time, data_time, losses,top1],
                prefix="Epoch: [{}]".format(epoch+1))

            # switch to train mode
            self.model.train()
            end = time.time()
            for i, (img,audio, class_id) in enumerate(self.trainloader):
                # measure data loading time
                data_time.update(time.time() - end)
                img,audio,class_id = img.cuda(device=device_ids[0]),audio.cuda(device=device_ids[0]),class_id.cuda(device=device_ids[0])
                predict,audio1,img1= self.model(img,audio)
                loss = self.criterion(predict, class_id)

                # acc1/acc5 are (K+1)-way contrast classifier accuracy
                # measure accuracy and record loss
                acc1= accuracy(predict, class_id, topk=(1,))
                losses.update(loss.item(), img.size(0))
                top1.update(acc1[0], img.size(0))
                self.optimizer.zero_grad()
                loss.backward(retain_graph=True)
                self.optimizer.step()

                z1, logstd1, mean1, eps1,recon_audio = self.vae1(audio1)
                z2, logstd2, mean2, eps2,recon_img = self.vae2(img1)

                lossv1 = self.loss_function(recon_audio, audio1, mean1, logstd1)
                lossv2 = self.loss_function2(recon_img, img1, mean2, logstd2)
                lossv = lossv2 + lossv1
                lossv.backward()
                self.optimizer2.zero_grad()
                lossv.backward()
                self.optimizer2.step()



                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                if (i+1) % data_train_opt["log_step"] == 0:
                    loss_avg = losses.avg
                    acc_avg = top1.avg
                    log.append([epoch, i + 1, loss.item(), acc1[0], loss_avg, acc_avg])
                    progress.display(i+1)



            if (epoch+1) % data_train_opt["save_epoch"] == 0:

                acc, a = self.ValidingData(epoch+1)
                if losses.avg <self.loss:
                    self.loss = losses.avg
                    a = 1
                np.save(data_train_opt["training_log"], log)
                if a == 1:
                    self.save_checkpoint({
                        'epoch': epoch + 1,
                        'state_dict': self.model.state_dict(),
                        'optimizer' : self.optimizer.state_dict(),
                        'acc':acc
                    }, filename=os.path.join(data_train_opt["feat_training_file"],'Epoch_{}_acc_{}_loss_{}.pth'.format(epoch+1,acc,losses.avg)))
                    # }, filename=os.path.join(data_train_opt["feat_training_file"],'checkpoint_{:04d}.pth'.format(epoch+1)))
                    # }, filename=os.path.join(data_train_opt["feat_training_file"],'best.pth'))

    def save_checkpoint(self,state,filename='checkpoint.pth.tar'):
        torch.save(state, filename)
    def ValidingData(self,epoch):

        self.model.eval()
        a = 0
        with torch.no_grad():
            y_pre = []
            y_true = []
            with tqdm(total=len(self.validloader), desc='Example', leave=True, ncols=100, unit='batch', unit_scale=True) as pbar:
                for i, (img,audio,class_id) in enumerate(self.validloader):
                    img,audio, class_id = img.cuda(device=device_ids[0]),audio.cuda(device=device_ids[0]), class_id.cuda(device=device_ids[0])
                    predict = self.model(img, audio)
                    _, pre = torch.max(predict,dim=1)
                    y_pre.append(pre.cpu())
                    y_true.append(class_id.cpu())
                    pbar.update(1)

            y_pre = torch.cat(y_pre).cpu().detach().numpy()
            y_true = torch.cat(y_true).cpu().detach().numpy()
            cm = confusion_matrix(y_true, y_pre)
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            cm.diagonal()
            keys = ['airport',
                    'bus',
                    'metro',
                    'metro_station',
                    'park',
                    'public_square',
                    'shopping_mall',
                    'street_pedestrian',
                    'street_traffic',
                    'tram']
            values = [np.round(i, decimals=3) for i in list(cm.diagonal())]
            df_cm = pandas.DataFrame(cm, index=[i for i in keys], columns=[i for i in keys])
            plt.figure(figsize=(15, 12))
            sn.heatmap(df_cm, annot=True)
            plt.savefig('confusion.png')
            report = classification_report(y_true, y_pre, target_names=
                        ['airport',
                       'bus',
                       'metro',
                       'metro_station',
                       'park',
                       'public_square',
                       'shopping_mall',
                       'street_pedestrian',
                       'street_traffic',
                       'tram'], digits=4)
            acc = accuracy_score(y_true, y_pre)
            if acc>self.best:
                a = 1
                self.best=acc
            print(report)

            print("==================")
            with open(data_train_opt["txt"],"a") as f:
                f.write("========= {} =======\n".format(epoch))
                f.write("classification_report".format(epoch))
                f.write(report)
                f.write("\n")
        self.model.train()

        if a ==1:
            with open(data_train_opt["best"], "a") as f:
                f.write("========= {} =======\n".format(epoch))
                f.write("classification_report".format(epoch))
                f.write(report)
                f.write("================\n")

        return acc,a


def main():

    ImgCla = ImageClassify()
    ImgCla.TrainingData()
    training_plot.draw(data_train_opt["training_log"])
    acc, a = ImgCla.ValidingData(epoch=0)

if __name__ == '__main__':
    main()

The error message refers to the fact you are requiring gradient computation on the target tensor which is not supported by nn.functional.binary_cross_entropy .错误消息是指您需要对nn.functional.binary_cross_entropy不支持的目标张量进行梯度计算的事实。 In other words you need to detach the target before computing the loss term:换句话说,您需要在计算损失项之前分离目标:

BCE = F.binary_cross_entropy(recon_x, x.detach(), reduction='sum')

In both loss_function and loss_function2 .loss_functionloss_function2中。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM