[英]How to instruct CuPy to run multiple number of the same job concurrently in a GPU?
[英]How to run python on GPU with CuPy?
我正在嘗試使用CuPy
庫在 GPU 上執行 Python 代碼。 但是,當我運行nvidia-smi
,找不到 GPU 進程。
這是代碼:
import numpy as np
import cupy as cp
from scipy.stats import rankdata
def get_top_one_probability(vector):
return (cp.exp(vector) / cp.sum(cp.exp(vector)))
def get_listnet_gradient(training_dataset, real_labels, predicted_labels):
ly_topp = get_top_one_probability(real_labels)
cp.cuda.Stream.null.synchronize()
s1 = -cp.matmul(cp.transpose(training_dataset), cp.reshape(ly_topp, (np.shape(cp.asnumpy(ly_topp))[0], 1)))
cp.cuda.Stream.null.synchronize()
exp_lz_sum = cp.sum(cp.exp(predicted_labels))
cp.cuda.Stream.null.synchronize()
s2 = 1 / exp_lz_sum
s3 = cp.matmul(cp.transpose(training_dataset), cp.exp(predicted_labels))
cp.cuda.Stream.null.synchronize()
s2_s3 = s2 * s3 # s2 is a scalar value
s1.reshape(np.shape(cp.asnumpy(s1))[0], 1)
cp.cuda.Stream.null.synchronize()
s1s2s3 = cp.add(s1, s2_s3)
cp.cuda.Stream.null.synchronize()
return s1s2s3
def relu(matrix):
return cp.maximum(0, matrix)
def get_groups_id_count(groups_id):
current_group = 1
group_counter = 0
groups_id_counter = []
for element in groups_id:
if element != current_group:
groups_id_counter.append((current_group, group_counter))
current_group += 1
group_counter = 1
else:
group_counter += 1
return groups_id_counter
def mul_matrix(matrix1, matrix2):
return cp.matmul(matrix1, matrix2)
if mode == 'train': # Train MLP
number_of_features = np.shape(training_set_data)[1]
# Input neurons are equal to the number of training dataset features
input_neurons = number_of_features
# Assuming that number of hidden neurons are equal to the number of training dataset (input neurons) features + 10
hidden_neurons = number_of_features + 10
# Weights random initialization
input_hidden_weights = cp.array(np.random.rand(number_of_features, hidden_neurons) * init_var)
# Assuming that number of output neurons is 1
hidden_output_weights = cp.array(np.float32(np.random.rand(hidden_neurons, 1) * init_var))
listwise_gradients = np.array([])
for epoch in range(0, 70):
print('Epoch {0} started...'.format(epoch))
start_range = 0
for group in groups_id_count:
end_range = (start_range + group[1]) # Batch is a group of words with same group id
batch_dataset = cp.array(training_set_data[start_range:end_range, :])
cp.cuda.Stream.null.synchronize()
batch_labels = cp.array(dataset_labels[start_range:end_range])
cp.cuda.Stream.null.synchronize()
input_hidden_mul = mul_matrix(batch_dataset, input_hidden_weights)
cp.cuda.Stream.null.synchronize()
hidden_neurons_output = relu(input_hidden_mul)
cp.cuda.Stream.null.synchronize()
mlp_output = relu(mul_matrix(hidden_neurons_output, hidden_output_weights))
cp.cuda.Stream.null.synchronize()
batch_gradient = get_listnet_gradient(batch_dataset, batch_labels, mlp_output)
batch_gradient = cp.mean(cp.transpose(batch_gradient), axis=1)
aggregated_listwise_gradient = cp.sum(batch_gradient, axis=0)
cp.cuda.Stream.null.synchronize()
hidden_output_weights = hidden_output_weights - (learning_rate * aggregated_listwise_gradient)
cp.cuda.Stream.null.synchronize()
input_hidden_weights = input_hidden_weights - (learning_rate * aggregated_listwise_gradient)
cp.cuda.Stream.null.synchronize()
start_range = end_range
listwise_gradients = np.append(listwise_gradients, cp.asnumpy(aggregated_listwise_gradient))
print('Gradients: ', listwise_gradients)
我使用cp.cuda.Stream.null.synchronize()
是因為我讀到此語句可確保代碼在轉到下一行之前在 GPU 上完成執行。
誰能幫我在 GPU 上運行代碼? 提前致謝
cupy
可以在不同的設備上運行您的代碼。 您需要選擇與 GPU 關聯的正確設備 ID,以便您的代碼在其上執行。 我認為其中一個設備是您的 CPU(可能 ID 為0
)。 您可以使用以下方法檢查您當前的設備 ID:
x = cp.array([1, 2, 3])
print(x.device)
要獲取計算機上已識別設備的數量:
print(cp.cuda.runtime.getDeviceCount())
例如,要將您當前的設備更改為 ID 1
:
cp.cuda.Device(1).use()
設備 ID 是零索引的,因此如果您有 3 個設備,您會得到一個 ID 集 {0, 1, 2}。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.