[英]Porting PyTorch code from CPU to GPU
Following the tutorial from https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb遵循https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb的教程
There is a USE_CUDA
flag that is used to control the variable and tensor types between CPU (when False) to GPU (when True) types.有一个USE_CUDA
标志用于控制 CPU(当为 False)到 GPU(当为 True)类型之间的变量和张量类型。
Using the data from en-fr.tsv and converting the sentences to variables:使用en-fr.tsv 中的数据并将句子转换为变量:
import unicodedata
import string
import re
import random
import time
import math
from gensim.corpora.dictionary import Dictionary
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import LongTensor, FloatTensor
from torch import optim
import torch.nn.functional as F
import numpy as np
MAX_LENGTH = 10
USE_CUDA = False
# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicode_to_ascii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
)
# Lowercase, trim, and remove non-letter characters
def normalize_string(s):
s = unicode_to_ascii(s.lower().strip())
s = re.sub(r"([.!?])", r" \1", s)
s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
return s
SOS_IDX, SOS_TOKEN = 0, '<s>'
EOS_IDX, EOS_TOKEN = 1, '</s>'
UNK_IDX, UNK_TOKEN = 2, '<unk>'
PAD_IDX, PAD_TOKEN = 3, '<blank>'
lines = open('en-fr.tsv').read().strip().split('\n')
pairs = [[normalize_string(s).split() for s in l.split('\t')] for l in lines]
src_sents, trg_sents = zip(*pairs)
src_dict = Dictionary([[SOS_TOKEN, EOS_TOKEN, UNK_TOKEN, PAD_TOKEN]])
src_dict.add_documents(src_sents)
trg_dict = Dictionary([[SOS_TOKEN, EOS_TOKEN, UNK_TOKEN, PAD_TOKEN]])
trg_dict.add_documents(trg_sents)
def variablize_sentences(sentence, dictionary):
indices = [dictionary.token2id[tok] for tok in sentence] + [dictionary.token2id[EOS_TOKEN]]
var = Variable(LongTensor(indices).view(-1, 1))
return var.cuda() if USE_CUDA else var
input_variables = [variablize_sentences(sent, src_dict) for sent in src_sents]
output_variables = [variablize_sentences(sent, trg_dict) for sent in trg_sents]
And using a Encoder-Attn-Decoder network:并使用 Encoder-Attn-Decoder 网络:
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=1):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
self.embedding = self.embedding.cuda() if USE_CUDA else self.embedding
self.gru = self.gru.cuda() if USE_CUDA else self.gru
def forward(self, word_inputs, hidden):
seq_len = len(word_inputs)
embedded = self.embedding(word_inputs).view(seq_len, 1, -1)
embedded = embedded.cuda() if USE_CUDA else embedded
output, hidden = self.gru(embedded, hidden)
output = output.cuda() if USE_CUDA else output
hiddne = hidden.cuda() if USE_CUDA else hidden
return output, hidden
def init_hidden(self):
hidden = Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
return hidden.cuda() if USE_CUDA else hidden
class Attn(nn.Module):
def __init__(self, method, hidden_size, max_length=MAX_LENGTH):
super(Attn, self).__init__()
self.method = method
self.hidden_size = hidden_size
if self.method == 'general':
self.attn = nn.Linear(self.hidden_size, hidden_size)
elif self.method == 'concat':
self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
self.other = nn.Parameter(FloatTensor(1, hidden_size))
def forward(self, hidden, encoder_outputs):
seq_len = len(encoder_outputs)
# Create variable to store attention energies
attn_energies = Variable(torch.zeros(seq_len)) # B x 1 x S
attn_energies = attn_energies.cuda() if USE_CUDA else attn_energies
# Calculate energies for each encoder output
for i in range(seq_len):
attn_energies[i] = self.score(hidden, encoder_outputs[i])
# Normalize energies to weights in range 0 to 1, resize to 1 x 1 x seq_len
return F.softmax(attn_energies).unsqueeze(0).unsqueeze(0)
def score(self, hidden, encoder_output):
if self.method == 'dot':
energy =torch.dot(hidden.view(-1), encoder_output.view(-1))
elif self.method == 'general':
energy = self.attn(encoder_output)
energy = torch.dot(hidden.view(-1), energy.view(-1))
elif self.method == 'concat':
energy = self.attn(torch.cat((hidden, encoder_output), 1))
energy = torch.dot(self.v.view(-1), energy.view(-1))
return energy
class AttnDecoderRNN(nn.Module):
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout_p=0.1):
super(AttnDecoderRNN, self).__init__()
# Keep parameters for reference
self.attn_model = attn_model
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
self.dropout_p = dropout_p
# Define layers
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru = nn.GRU(hidden_size * 2, hidden_size, n_layers, dropout=dropout_p)
self.out = nn.Linear(hidden_size * 2, output_size)
self.embedding = self.embedding.cuda() if USE_CUDA else self.embedding
self.gru = self.gru.cuda() if USE_CUDA else self.gru
self.out = self.out.cuda() if USE_CUDA else self.out
# Choose attention model
if attn_model != 'none':
self.attn = Attn(attn_model, hidden_size)
self.attn = self.attn.cuda() if USE_CUDA else self.attn
def forward(self, word_input, last_context, last_hidden, encoder_outputs):
# Note: we run this one step at a time
# Get the embedding of the current input word (last output word)
word_embedded = self.embedding(word_input).view(1, 1, -1) # S=1 x B x N
# Combine embedded input word and last context, run through RNN
rnn_input = torch.cat((word_embedded, last_context.unsqueeze(0)), 2)
rnn_output, hidden = self.gru(rnn_input, last_hidden)
# Calculate attention from current RNN state and all encoder outputs; apply to encoder outputs
attn_weights = self.attn(rnn_output.squeeze(0), encoder_outputs)
context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x 1 x N
# Final output layer (next word prediction) using the RNN hidden state and context vector
rnn_output = rnn_output.squeeze(0) # S=1 x B x N -> B x N
context = context.squeeze(1) # B x S=1 x N -> B x N
output = F.log_softmax(self.out(torch.cat((rnn_output, context), 1)))
if USE_CUDA:
return output.cuda(), context.cuda(), hidden.cuda(), attn_weights.cuda()
else:
return output, context, hidden, attn_weights
And testing the network:并测试网络:
encoder_test = EncoderRNN(10, 10, 2) # I, H , L
decoder_test = AttnDecoderRNN('general', 10, 10, 2) # A, H, O, L
encoder_hidden = encoder_test.init_hidden()
if USE_CUDA:
word_inputs = Variable(torch.LongTensor([1, 2, 3]).cuda())
else:
word_inputs = Variable(torch.LongTensor([1, 2, 3]))
encoder_outputs, encoder_hidden = encoder_test(word_inputs, encoder_hidden)
decoder_attns = torch.zeros(1, 3, 3)
decoder_hidden = encoder_hidden
decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size))
decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(word_inputs[0], decoder_context, decoder_hidden, encoder_outputs)
print(decoder_output)
print(decoder_hidden)
print(decoder_attn)
The code works fine on CPU,该代码在 CPU 上运行良好,
[out]: [出]:
EncoderRNN (
(embedding): Embedding(10, 10)
(gru): GRU(10, 10, num_layers=2)
)
AttnDecoderRNN (
(embedding): Embedding(10, 10)
(gru): GRU(20, 10, num_layers=2, dropout=0.1)
(out): Linear (20 -> 10)
(attn): Attn (
(attn): Linear (10 -> 10)
)
)
Variable containing:
-2.4378 -2.3556 -2.3391 -2.5070 -2.3439 -2.3415 -2.3976 -2.1832 -1.9976 -2.2213
[torch.FloatTensor of size 1x10]
Variable containing:
(0 ,.,.) =
Columns 0 to 8
-0.2325 0.0775 0.5415 0.4876 -0.5771 -0.0687 0.1832 -0.5285 0.2508
Columns 9 to 9
-0.1837
(1 ,.,.) =
Columns 0 to 8
-0.1389 -0.2605 -0.0518 0.3405 0.0774 0.1815 0.0297 -0.1304 -0.1015
Columns 9 to 9
0.2602
[torch.FloatTensor of size 2x1x10]
Variable containing:
(0 ,.,.) =
0.3334 0.3291 0.3374
[torch.FloatTensor of size 1x1x3]
but when changing the flag to USE_GPU=True
, it throws the error when initializing the decoder_test
object, it throws a TypeError
:但是当将标志更改为USE_GPU=True
,它会在初始化decoder_test
对象时抛出错误,它会抛出一个TypeError
:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-76-b3c660013934> in <module>()
12 decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size))
13
---> 14 decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(word_inputs[0], decoder_context, decoder_hidden, encoder_outputs)
15 print(decoder_output)
16 print(decoder_hidden)
~/.local/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
<ipython-input-75-34ecfe9b3112> in forward(self, word_input, last_context, last_hidden, encoder_outputs)
32
33 # Combine embedded input word and last context, run through RNN
---> 34 rnn_input = torch.cat((word_embedded, last_context.unsqueeze(0)), 2)
35 rnn_output, hidden = self.gru(rnn_input, last_hidden)
36
~/.local/lib/python3.5/site-packages/torch/autograd/variable.py in cat(iterable, dim)
895 @staticmethod
896 def cat(iterable, dim=0):
--> 897 return Concat.apply(dim, *iterable)
898
899 @staticmethod
~/.local/lib/python3.5/site-packages/torch/autograd/_functions/tensor.py in forward(ctx, dim, *inputs)
315 ctx.dim = dim
316 ctx.input_sizes = [i.size(dim) for i in inputs]
--> 317 return torch.cat(inputs, dim)
318
319 @staticmethod
TypeError: cat received an invalid combination of arguments - got (tuple, int), but expected one of:
* (sequence[torch.cuda.FloatTensor] seq)
* (sequence[torch.cuda.FloatTensor] seq, int dim)
didn't match because some of the arguments have invalid types: (tuple, int)
The question is why are that types not matching in CUDA but it works on CPU and how to resolve this?问题是为什么这些类型在 CUDA 中不匹配,但它适用于 CPU 以及如何解决这个问题?
Does PyTorch have a global flag to just change all types to CUDA types and not mess around with CPU/GPU types? PyTorch 是否有一个全局标志来将所有类型更改为 CUDA 类型,而不是混淆 CPU/GPU 类型?
You can also try:你也可以试试:
net = YouNetworkClass()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
After that, you have to send the word_inputs
, encoder_hidden
and decoder_context
to the GPU too:之后,您还必须将word_inputs
、 encoder_hidden
和decoder_context
发送到GPU:
word_inputs, encoder_hidden, decoder_context = word_inputs.to(device), encoder_hidden.to(device), decoder_context.to(device)
Look here: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#training-on-gpu看这里: https : //pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#training-on-gpu
Does PyTorch have a global flag to just change all types to CUDA types and not mess around with CPU/GPU types? PyTorch 是否有一个全局标志来将所有类型更改为 CUDA 类型,而不是混淆 CPU/GPU 类型?
Nope.不。
(Source: https://discuss.pytorch.org/t/porting-seq2seq-tutorial-from-spro-practical-pytorh-from-cpu-to-gpu/8604 ) (来源: https : //discuss.pytorch.org/t/porting-seq2seq-tutorial-from-spro-practical-pytorh-from-cpu-to-gpu/8604 )
Specific to the example:具体到示例:
The input variables to the decoder_test
object needs to be in .cuda()
type. decoder_test
对象的输入变量需要是.cuda()
类型。 More specifically:更具体地说:
encoder_hidden = encoder_test.init_hidden()
---> encoder_hidden = encoder_test.init_hidden().cuda()
decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size))
---> decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size)).cuda()
So the code to test the network should be:所以测试网络的代码应该是:
encoder_test = EncoderRNN(10, 10, 2) # I, H , L
decoder_test = AttnDecoderRNN('general', 10, 10, 2) # A, H, O, L
encoder_hidden = encoder_test.init_hidden().cuda()
if USE_CUDA:
word_inputs = Variable(torch.LongTensor([1, 2, 3]).cuda())
else:
word_inputs = Variable(torch.LongTensor([1, 2, 3]))
encoder_outputs, encoder_hidden = encoder_test(word_inputs, encoder_hidden)
decoder_attns = torch.zeros(1, 3, 3)
decoder_hidden = encoder_hidden
decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size)).cuda()
decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(word_inputs[0], decoder_context, decoder_hidden, encoder_outputs)
print(decoder_output)
print(decoder_hidden)
print(decoder_attn)
[out]: [出]:
Variable containing:
-2.1412 -2.4589 -2.4042 -2.1591 -2.5080 -2.0839 -2.5058 -2.3831 -2.4468 -2.0804
[torch.cuda.FloatTensor of size 1x10 (GPU 0)]
Variable containing:
(0 ,.,.) =
Columns 0 to 8
-0.0264 -0.0689 0.1049 0.0760 0.1017 -0.4585 -0.1273 0.0449 -0.3271
Columns 9 to 9
-0.0104
(1 ,.,.) =
Columns 0 to 8
-0.0308 -0.0690 -0.0258 -0.2759 0.1403 -0.0468 -0.0205 0.0126 -0.1729
Columns 9 to 9
0.0599
[torch.cuda.FloatTensor of size 2x1x10 (GPU 0)]
Variable containing:
(0 ,.,.) =
0.3328 0.3328 0.3344
[torch.cuda.FloatTensor of size 1x1x3 (GPU 0)]
Does PyTorch have a global flag to just change all types to CUDA types and not mess around with CPU/GPU types? PyTorch 是否有一个全局标志来将所有类型更改为 CUDA 类型,而不是混淆 CPU/GPU 类型?
Yes.是的。 You can set the default tensor type to cuda with:您可以使用以下命令将默认张量类型设置为 cuda:
torch.set_default_tensor_type('torch.cuda.FloatTensor')
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.