I am trying to computer a loss of policy target network in Deep Deterministic Policy Gradient Algorithms with pytorch 1.5, and I get the following error.
File "F:\agents\ddpg.py", line 128, in train_model
policy_loss.backward()
File "E:\conda\envs\pytorch\lib\site-packages\torch\tensor.py", line 198, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "E:\conda\envs\pytorch\lib\site-packages\torch\autograd\__init__.py", line 100, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [128, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead
. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
There are my networks and training progress. In the actor network, the lenght of output vector is 20 which means a continuous action. The input of critic net consists of state vector and action vector.
"""
ddpg actor
"""
class MLP(nn.Module):
def __init__(self,
input_size,
output_size,
output_limit=1.0,
hidden_sizes=(64, 64),
activation=torch.relu,
output_activation=identity,
use_output_layer=True,
use_actor=False,
):
super(MLP, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.output_limit = output_limit
self.hidden_sizes = hidden_sizes
self.activation = activation
self.output_activation = output_activation
self.use_output_layer = use_output_layer
self.use_actor = use_actor
# Set hidden layers
self.hidden_layers = nn.ModuleList()
in_size = self.input_size
for next_size in self.hidden_sizes:
fc = nn.Linear(in_size, next_size)
in_size = next_size
self.hidden_layers.append(fc)
# Set output layers
if self.use_output_layer:
self.output_layer1 = nn.Linear(in_size, self.output_size // 2)
self.output_layer2 = nn.Linear(in_size, self.output_size // 2)
else:
self.output_layer = identity
def forward(self, x):
for hidden_layer in self.hidden_layers:
x = self.activation(hidden_layer(x))
x1 = torch.sigmoid(self.output_layer1(x))
x2 = F.softmax(self.output_layer2(x), dim=0)
out = torch.cat((x1, x2), dim=-1)
# If the network is used as actor network, make sure output is in correct range
out = out * self.output_limit if self.use_actor else out
return out
"""
DDPG critic, TD3 critic, SAC qf, TAC qf
"""
class critic(nn.Module):
def __init__(self,
input_size,
output_size,
output_limit=1.0,
hidden_sizes=(64, 64),
activation=torch.relu,
output_activation=identity,
use_output_layer=True,
use_actor=False,
):
super().__init__()
self.input_size = input_size
self.output_size = output_size
self.output_limit = output_limit
self.hidden_sizes = hidden_sizes
self.activation = activation
self.output_activation = output_activation
self.use_output_layer = use_output_layer
self.use_actor = use_actor
# Set hidden layers
self.hidden_layers = nn.ModuleList()
in_size = self.input_size
for next_size in self.hidden_sizes:
fc = nn.Linear(in_size, next_size)
in_size = next_size
self.hidden_layers.append(fc)
# Set output layers
if self.use_output_layer:
self.output_layer = nn.Linear(in_size, self.output_size)
else:
self.output_layer = identity
def forward(self, x, a):
q= torch.cat([x, a], dim=1)
for hidden_layer in self.hidden_layers:
q = self.activation(hidden_layer(q))
q = torch.tanh(self.output_layer(q))
return q
def train_model(self):
batch = self.replay_buffer.sample(self.batch_size)
obs1 = batch['obs1']
obs2 = batch['obs2']
acts = batch['acts']
rews = batch['rews']
done = batch['done']
# Check shape of experiences
# Prediction Q(s,𝜇(s)), Q(s,a), Q‾(s',𝜇‾(s'))
with torch.autograd.set_detect_anomaly(True):
print("obs1",obs1.shape) #(64,22)
print("a1",self.policy(obs1).shape) #(64,20)
q_pi = self.qf(obs1, self.policy(obs1))
q = self.qf(obs1, acts).squeeze(1)
q_pi_target = self.qf_target(obs2, self.policy_target(obs2)).squeeze(1)
# Target for Q regression
q_backup = rews + self.gamma * (1 - done) * q_pi_target
q_backup.to(self.device)
# DDPG losses
policy_loss = -q_pi.mean()
qf_loss = F.mse_loss(q, q_backup.detach())
# Update Q-function network parameter
self.qf_optimizer.zero_grad()
qf_loss.backward()
nn.utils.clip_grad_norm_(self.qf.parameters(), self.gradient_clip_qf)
self.qf_optimizer.step()
# Update policy network parameter
self.policy_optimizer.zero_grad()
# here is the error
policy_loss.backward()
nn.utils.clip_grad_norm_(self.policy.parameters(), self.gradient_clip_policy)
self.policy_optimizer.step()
# Polyak averaging for target parameter
soft_target_update(self.policy, self.policy_target)
soft_target_update(self.qf, self.qf_target)
# Save losses
self.policy_losses.append(policy_loss.item())
self.qf_losses.append(qf_loss.item())
I also take the advice given by the hint which uses with torch.autograd.set_detect_anomaly(True).
The result is
File "main.py", line 31, in <module>
agent.run(100)
File "F:\agents\ddpg.py", line 184, in run
self.train_model()
File "F:\agents\ddpg.py", line 109, in train_model
q_pi = self.qf(obs1, self.policy(obs1))
File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "F:\agents\common\networks.py", line 115, in forward
q = torch.tanh(self.output_layer(q))
File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\modules\linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "E:\conda\envs\pytorch\lib\site-packages\torch\nn\functional.py", line 1610, in linear
ret = torch.addmm(bias, input, weight.t())
(print_stack at ..\torch\csrc\autograd\python_anomaly_mode.cpp:60)
Traceback (most recent call last):
File "main.py", line 31, in <module>
agent.run(100)
File "F:/agents\ddpg.py", line 184, in run
self.train_model()
File "F:/agents\ddpg.py", line 130, in train_model
policy_loss.backward()
File "E:\conda\envs\pytorch\lib\site-packages\torch\tensor.py", line 198, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "E:\conda\envs\pytorch\lib\site-packages\torch\autograd\__init__.py", line 100, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [128, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead
. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I can't find what causes failure to compute its gradient in my code.
Just try to avoid that particular inplace operation and transform it as non inplace.
I saw cases (it has been confirmed) that PyTorch reverse mode AD struggles when creating the computational graph for specific inplace operation.
This is the current limitation.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.