I'm trying to implement my own version of the Graph-Attention-network
The customized GAT layer is as following
class GATLayer(nn.Module):
def __init__(self, input_dim: int, output_dim: int, adj: torch.tensor):
super().__init__()
self.W = nn.Parameter(torch.zeros(size=(output_dim, input_dim)))
self.a = nn.Parameter(torch.zeros(size=(2 * output_dim,)))
self.adj = adj
self.n_points = adj.shape[0]
#print(f"input dim:{input_dim}")
def forward(self, h: torch.Tensor):
B, T, N, F = h.size()
hh = functional.linear(h, self.W)
output = torch.zeros_like(hh)
for i in range(self.n_points):
# print(i)
hhj = hh[:, :, self.adj[i], :]
hhi = torch.cat([hh[:, :, i:i + 1, :]] * hhj.size(2), 2)
hhij = torch.cat([hhi, hhj], 3)
e = torch.mm(hhij.reshape(B * T * hhj.size(2), -1), self.a.reshape(self.a.size(0), 1)).reshape(B, T, -1)
alpha = functional.softmax(e, dim=2)
output[:, :, i, :] = torch.sum(hhj * torch.cat([torch.unsqueeze(alpha, 3)] * hhj.size(3), 3), dim=2)
return output
And the whole network is defined as:
class AQIP(nn.Module):
def __init__(self, adj: torch.tensor, seq_len: int, with_aqi: bool = True):
super().__init__()
self.hid_size = 128
self.seq_len = seq_len
self.gat_layers = [
GATLayer(input_dim=16 + int(with_aqi), output_dim=128, adj=adj),
GATLayer(input_dim=128, output_dim=128, adj=adj),
]
self.rnns = [
nn.LSTM(input_size=128, hidden_size=128, num_layers=4, bias=True, batch_first=True),
]
self.linear = nn.Linear(in_features=128 * 4, out_features=1, bias=True)
def forward(self, x: torch.Tensor, site_idx: int):
h = torch.zeros(size=(4, x.size(0), 128))
c = torch.zeros(size=(4, x.size(0), 128))
for gat in self.gat_layers:
x = gat(x)
for rnn in self.rnns:
x[:, :, site_idx, :], (h, c) = rnn(x[:, :, site_idx, :], (h, c))
h = h.permute(1, 0, 2)
h = h.reshape(h.size(0), -1)
return self.linear(h).squeeze()
When I independently test the customized GAT layer with the following code, it turned out that the GAT layer worked fine even on GPU
model = GATLayer(3, 1024, torch.tensor(np.array([[1, 0, 1], [0, 0, 1], [1, 0, 1]], dtype='bool')))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(model(torch.randn(5, 5, 3, 3)).shape)
Which outputs torch.Size([5, 5, 3, 1024])
When I test the whole network with CPU and the following code, it worked fine as well
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
adj = torch.tensor(np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1]], dtype="bool"))
exp = torch.randn(3, 8, 3, 17)
gpus = [0]
model = AQIP(adj, seq_len=8)
#model = model.to(device, non_blocking=True)
print(model(exp, 1))
Which outputs tensor([-0.0320, -0.0320, -0.0320], grad_fn=<SqueezeBackward0>)
But as long as I try to move the model to GPU and de-comment the device and to lines, I get the following error with trace back to some Formatter class that is irrelevant to my code:
RuntimeError: copy_if failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered
When using CUDA_LAUNCH_BLOCKING=1
to run the code, I get:
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`
Which did not help me to locate error at all
I had also run official examples on the same machine and tried to move them to GPU, it turned out that they all worked fine. So I guess it's not about the CUDA and the Cudnn or the GPU driver version incompatible. But I can not locate the problem in my code as well. PLEASE HELP! I'll be much appreciated if you could save me out of this.
After numerous endeavors, I finally found the problem. It turned out that if you put layers into list like
self.gat_layers = [
GATLayer(input_dim=16 + int(with_aqi), output_dim=128, adj=adj).cuda(),
GATLayer(input_dim=128, output_dim=128, adj=adj).cuda(),
]
Then the PyTorch won't automatically identify those layers so that when .to(device)
is called, their parameters won't be transferred to the GPU. So one of the solutions is to declare the layers one by one.
While the better solution is to use the nn.ModuleList
to contain all the layers you want, so the code could be changed to
self.gat_layers = nn.ModuleList([
GATLayer(input_dim=16 + int(with_aqi), output_dim=128, adj=adj).cuda(),
GATLayer(input_dim=128, output_dim=128, adj=adj).cuda(),
])
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.