简体   繁体   中英

OpenAIGPTModel PyTorch Error- ValueError: too many values to unpack (expected 2)

I am having issues getting the Persona-Dialogue-Generation model(from github) to run. It seems that it should have only one output parameter but I provided two.How to modify the code?

The problem code is

self.transformer_module = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt',num_special_tokens=special_token_len)

and lm_logits, hidden_states = self.transformer_module(input_seq, None, dis_seq)

ValueError: too many values to unpack (expected 2)

Full code:

import random

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_pretrained_bert import OpenAIGPTLMHeadModel


class Gpt2SeqModel(nn.Module):
    def __init__(self, opt, vocab_size, pad_idx, start_idx, end_idx, special_token_len, dict, longest_label=1,
                 length_penalty=1.0, diversity_groups=1, diversity_coef=0.2, annealing_topk=None, annealing=0, sample=False,
                 temperature=0.7):
        super().__init__()
        # original vocab size plus special vocab
        self.vocab_size = vocab_size + 40478
        self.token_type_dict = {}
        # max is 30
        for i in range(29):
            self.token_type_dict['dis'+str(i)] = self.vocab_size + i
        # pred for prediction turn embedding
        self.token_type_dict['pred'] = self.vocab_size + 29
        # the remaining 30 is the distance size
        special_token_len += 30
        self.vocab_size += 29
        # regard input and output as one sentence, given the input as context, generate the next sentence.
        self.transformer_module = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt',
                                                                       num_special_tokens=special_token_len)
        self.pad_idx = pad_idx
        self.start_idx = start_idx
        self.end_idx = end_idx
        self.register_buffer('start_tensor', torch.LongTensor([start_idx]))
        self.register_buffer('pred_turn_tensor', torch.LongTensor([self.token_type_dict['pred']]))
        # default beam equal to 1
        self.beam_size = opt.get('beam_size', 1)
        self.rank = opt.get('rank_candidates', False)

        self.use_turn = opt.get('encoder_turn_use', False)
        self.use_dis = opt.get('encoder_dis_use', False)
        # longest label
        self.longest_label = min(longest_label, opt.get('decode_max_seq_len', 100))
        self.length_penalty_coef = length_penalty
        self.diversity_groups = diversity_groups
        self.diversity_coef = diversity_coef
        self.annealing_topk = annealing_topk
        self.annealing = annealing
        self.temperature = temperature
        self.topk = opt.get('top_k', 0)
        self.dict = dict
        self.no_repeat_ngram_size = 2
        self.dropout = nn.Dropout(p=0.2)
        self.linear = nn.Linear(768, 2, bias=False)
        nn.init.normal_(self.linear.weight, std=0.02)

    def forward(self, src_seq, src_seq_turn=None, src_seq_dis=None, tgt_seq=None, tgt_seq_turn=None, cands=None, valid_cands=None, prev_enc=None,
                rank_during_training=False, sampling=False, sampling_cands=None):
        # concat src_seq and tgt_seq as one sentence, use start token to separate them.
        if tgt_seq is not None:
            # keep track of longest label we've ever seen
            # we'll never produce longer ones than that during prediction
            self.longest_label = max(self.longest_label, tgt_seq.size(1))

        batch_size, src_seq_len = src_seq.size()

        if src_seq_dis is not None:
            src_seq_dis = np.array(src_seq_dis)

        # evaluation return none scores
        scores = None

        negative_score = None
        start_tensor = self.start_tensor.detach().expand(batch_size, 1)
        # whether training or evaluation
        if tgt_seq is not None:
            input_seq = torch.cat([src_seq, start_tensor, tgt_seq], dim=1)
            # TODO: manually construct the position ids for input & output
            if self.use_dis and src_seq_dis is not None:
                # create numpy
                token_type_ids = np.zeros(input_seq.size())
                # map str to id
                for row_ind, row in enumerate(token_type_ids):
                    for ind, _ in enumerate(row):
                        if ind < src_seq_len:
                            str_ind = 'dis' + str(src_seq_dis[row_ind, ind])
                            row[ind] = self.token_type_dict[str_ind]
                        else:
                            row[ind] = self.token_type_dict['pred']
                dis_seq = torch.tensor(token_type_ids, device=input_seq.device, dtype=torch.long)
            else:
                dis_seq = None
            lm_logits, hidden_states = self.transformer_module(input_seq, None, dis_seq)
            # lm labels should mask the source sentence language model
            shift_logits = lm_logits[..., src_seq_len:-1, :].contiguous()
            # lm_labels = tgt_seq.clone()[..., 1:].contiguous()
            # predict answers
            scores = shift_logits
            pos_seq_len = src_seq_len + tgt_seq.ne(self.pad_idx).sum(dim=1, keepdim=True)
            pos_seq_len_expand = pos_seq_len.unsqueeze(dim=2).repeat(1, 1, 768)
            last_state = hidden_states.gather(dim=1, index=pos_seq_len_expand).squeeze(dim=1)
            positive_score = self.linear(self.dropout(last_state))
            predictions = shift_logits.argmax(dim=-1)
        else:
            prior_context = torch.cat([src_seq, start_tensor], dim=1)
            if self.use_dis and src_seq_dis is not None:
                # create numpy
                token_type_ids = np.zeros(prior_context.size())
                # map str to id
                for row_ind, row in enumerate(token_type_ids):
                    for ind, _ in enumerate(row):
                        if ind < src_seq_len:
                            str_ind = 'dis' + str(src_seq_dis[row_ind, ind])
                            row[ind] = self.token_type_dict[str_ind]
                        else:
                            row[ind] = self.token_type_dict['pred']
                prior_dis = torch.tensor(token_type_ids, device=prior_context.device, dtype=torch.long)
            else:
                prior_dis = None

It looks like OpenAIGPTLMHeadModel will only return lm_logits , so just remove hidden_states from model outputs. lm_logits = self.transformer_module(input_seq, None, dis_seq)

    def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None):
        hidden_states = self.transformer(input_ids, position_ids, token_type_ids)
        lm_logits = self.lm_head(hidden_states)
        if lm_labels is not None:
            # Shift so that tokens < n predict n
            shift_logits = lm_logits[..., :-1, :].contiguous()
            shift_labels = lm_labels[..., 1:].contiguous()
            # Flatten the tokens
            loss_fct = CrossEntropyLoss(ignore_index=-1)
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
                            shift_labels.view(-1))
            return loss
        return lm_logits

src: https://github.com/LuoweiZhou/pytorch-pretrained-BERT/blob/1cd358b1af021b6494f71fe646d1aeb0ffe514a9/pytorch_pretrained_bert/modeling_openai.py#L717

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM