简体   繁体   中英

one of the variables needed for gradient computation has been modified by an inplace operation:

I am trying to computer a loss of policy target network in Deep Deterministic Policy Gradient Algorithms with pytorch 1.5, and I get the following error.

File "F:\agents\ddpg.py", line 128, in train_model
    policy_loss.backward()
  File "E:\conda\envs\pytorch\lib\site-packages\torch\tensor.py", line 198, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "E:\conda\envs\pytorch\lib\site-packages\torch\autograd\__init__.py", line 100, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [128, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead
. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

There are my networks and training progress. In the actor network, the lenght of output vector is 20 which means a continuous action. The input of critic net consists of state vector and action vector.

"""
ddpg actor
"""
class MLP(nn.Module):
    def __init__(self,
                 input_size,
                 output_size,
                 output_limit=1.0,
                 hidden_sizes=(64, 64),
                 activation=torch.relu,
                 output_activation=identity,
                 use_output_layer=True,
                 use_actor=False,
                 ):
        super(MLP, self).__init__()

        self.input_size = input_size
        self.output_size = output_size
        self.output_limit = output_limit
        self.hidden_sizes = hidden_sizes
        self.activation = activation
        self.output_activation = output_activation
        self.use_output_layer = use_output_layer
        self.use_actor = use_actor

        # Set hidden layers
        self.hidden_layers = nn.ModuleList()
        in_size = self.input_size
        for next_size in self.hidden_sizes:
            fc = nn.Linear(in_size, next_size)
            in_size = next_size
            self.hidden_layers.append(fc)

        # Set output layers
        if self.use_output_layer:
            self.output_layer1 = nn.Linear(in_size, self.output_size // 2)
            self.output_layer2 = nn.Linear(in_size, self.output_size // 2)
        else:
            self.output_layer = identity

    def forward(self, x):

        for hidden_layer in self.hidden_layers:
            x = self.activation(hidden_layer(x))
        x1 = torch.sigmoid(self.output_layer1(x))
        x2 = F.softmax(self.output_layer2(x), dim=0)
        out = torch.cat((x1, x2), dim=-1)


        # If the network is used as actor network, make sure output is in correct range
        out = out * self.output_limit if self.use_actor else out
        return out



"""
DDPG critic, TD3 critic, SAC qf, TAC qf
"""

class critic(nn.Module):
    def __init__(self,
                 input_size,
                 output_size,
                 output_limit=1.0,
                 hidden_sizes=(64, 64),
                 activation=torch.relu,
                 output_activation=identity,
                 use_output_layer=True,
                 use_actor=False,
                 ):
        super().__init__()

        self.input_size = input_size
        self.output_size = output_size
        self.output_limit = output_limit
        self.hidden_sizes = hidden_sizes
        self.activation = activation
        self.output_activation = output_activation
        self.use_output_layer = use_output_layer
        self.use_actor = use_actor

        # Set hidden layers
        self.hidden_layers = nn.ModuleList()
        in_size = self.input_size
        for next_size in self.hidden_sizes:
            fc = nn.Linear(in_size, next_size)
            in_size = next_size
            self.hidden_layers.append(fc)

        # Set output layers
        if self.use_output_layer:
            self.output_layer = nn.Linear(in_size, self.output_size)
        else:
            self.output_layer = identity

    def forward(self, x, a):
        q= torch.cat([x, a], dim=1)

        for hidden_layer in self.hidden_layers:
            q = self.activation(hidden_layer(q))
        q = torch.tanh(self.output_layer(q))

        return q
    def train_model(self):
        batch = self.replay_buffer.sample(self.batch_size)
        obs1 = batch['obs1']
        obs2 = batch['obs2']
        acts = batch['acts']
        rews = batch['rews']
        done = batch['done']
        # Check shape of experiences

        # Prediction Q(s,𝜇(s)), Q(s,a), Q‾(s',𝜇‾(s'))
        with torch.autograd.set_detect_anomaly(True):
            print("obs1",obs1.shape) #(64,22)
            print("a1",self.policy(obs1).shape) #(64,20)
            q_pi = self.qf(obs1, self.policy(obs1))
            q = self.qf(obs1, acts).squeeze(1)
            q_pi_target = self.qf_target(obs2, self.policy_target(obs2)).squeeze(1)

            # Target for Q regression
            q_backup = rews + self.gamma * (1 - done) * q_pi_target
            q_backup.to(self.device)

            # DDPG losses
            policy_loss = -q_pi.mean()
            qf_loss = F.mse_loss(q, q_backup.detach())

            # Update Q-function network parameter

            self.qf_optimizer.zero_grad()
            qf_loss.backward()
            nn.utils.clip_grad_norm_(self.qf.parameters(), self.gradient_clip_qf)
            self.qf_optimizer.step()

            # Update policy network parameter
            self.policy_optimizer.zero_grad()
            # here is the error
            policy_loss.backward()
            nn.utils.clip_grad_norm_(self.policy.parameters(), self.gradient_clip_policy)
            self.policy_optimizer.step()

            # Polyak averaging for target parameter
            soft_target_update(self.policy, self.policy_target)
            soft_target_update(self.qf, self.qf_target)

            # Save losses
            self.policy_losses.append(policy_loss.item())
            self.qf_losses.append(qf_loss.item())

I also take the advice given by the hint which uses with torch.autograd.set_detect_anomaly(True). The result is

  File "main.py", line 31, in <module>
    agent.run(100)
  File "F:\agents\ddpg.py", line 184, in run
    self.train_model()
  File "F:\agents\ddpg.py", line 109, in train_model
    q_pi = self.qf(obs1, self.policy(obs1))
  File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "F:\agents\common\networks.py", line 115, in forward
    q = torch.tanh(self.output_layer(q))
  File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\modules\linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\functional.py", line 1610, in linear
    ret = torch.addmm(bias, input, weight.t())
 (print_stack at ..\torch\csrc\autograd\python_anomaly_mode.cpp:60)
Traceback (most recent call last):
  File "main.py", line 31, in <module>
    agent.run(100)
  File "F:/agents\ddpg.py", line 184, in run
    self.train_model()
  File "F:/agents\ddpg.py", line 130, in train_model
    policy_loss.backward()
  File "E:\conda\envs\pytorch\lib\site-packages\torch\tensor.py", line 198, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "E:\conda\envs\pytorch\lib\site-packages\torch\autograd\__init__.py", line 100, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [128, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead
. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

I can't find what causes failure to compute its gradient in my code.

Just try to avoid that particular inplace operation and transform it as non inplace.

I saw cases (it has been confirmed) that PyTorch reverse mode AD struggles when creating the computational graph for specific inplace operation.

This is the current limitation.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM