使用 CUDA 的並行編程添加不成功

Question

我嘗試使用下面的代碼添加數組，但最終沒有添加數組並且沒有錯誤，這是一個基於 gpu (CUDA) 的並行程序。

#include <cuda_runtime.h>
#include <cuda.h>
#include <iostream>
#include <stdlib.h>

using namespace std;

__global__ void AddInts(int *a, int *b, int count)
{
    int id = blockIdx.x * blockDim.x + threadIdx.x;
    if (id < count)
    {
        a[id] += b[id];
    }
}


int main() 
{
    srand(time(NULL));
    int count = 100;
    int *h_a = new int[count];
    int *h_b = new int[count];

    for (int i = 0; i < count; i++)
    {
        h_a[i] = rand() % 1000;
        h_b[i] = rand() % 1000;
    }

    cout << "Prior to addition:" << endl;
    for (int i = 0; i < 5; i++)
        cout << h_a[i] << " " << h_b[i] << endl;

    int *d_a, *d_b;

    if (cudaMalloc(&d_a, sizeof(int) * count) != cudaSuccess)
    {
        cout << "Nope! No";
        return 0;
    }

    if (cudaMalloc(&d_b, sizeof(int) * count) != cudaSuccess)
    {
        cout << "Nope!";
        cudaFree(d_a);
        return 0;
    }

    if (cudaMemcpy(d_a, h_a, sizeof(int) * count, cudaMemcpyHostToDevice) != cudaSuccess)
    {
        cout << "Could not copy!" << endl;
        cudaFree(d_a);
        cudaFree(d_b);
        return 0;
    }

    if (cudaMemcpy(d_b, h_b, sizeof(int) * count, cudaMemcpyHostToDevice) != cudaSuccess)
    {
        cout << "Could not copy!" << endl;
        cudaFree(d_a);
        cudaFree(d_b);
        return 0;
    }

    AddInts <<<count / 256 + 1, 256 >>> (h_a, h_b, count);

    if (cudaMemcpy(h_a, h_b, sizeof(int) * count, cudaMemcpyDeviceToHost) == cudaSuccess)
    {
        delete[] h_a;
        delete[] h_b;
        cudaFree(d_a);
        cudaFree(d_b);
        cout << "Nope!" << endl;
        return 0;
    }

    for (int i = 0; i < 5; i++)
        cout << "It's " << h_a[i] << endl;

    cudaFree(d_a);
    cudaFree(d_b);


    delete[] h_a;
    delete[] h_b;

    return 0;
}

我的結果是：

Prior to addition: 188 336 489 593 706 673 330 792 329 588 It's 188
It's 489 It's 706 It's 330 It's 329

D:\Learn\CUDA\Visual_stidio\matrxAdd\x64\Release\matrxAdd.exe (process
8468) exited with code 0. To automatically close the console when
debugging stops, enable Tools->Options->Debugging->Automatically close
the console when debugging stops. Press any key to close this window .
. .

Answer 1

首先，設備上的計算應該使用設備內存。 因此，該行：

AddInts <<<count / 256 + 1, 256 >>> (h_a, h_b, count);

應該：

AddInts <<<count / 256 + 1, 256 >>> (d_a, d_b, count);

然后，您應該將結果從設備內存復制到主機內存。 因此，該行：

if (cudaMemcpy(h_a, h_b, sizeof(int) * count, cudaMemcpyDeviceToHost) == cudaSuccess)

應該：

if (cudaMemcpy(h_a, d_a, sizeof(int) * count, cudaMemcpyDeviceToHost) == cudaSuccess)

使用 CUDA 的並行編程添加不成功

問題描述

1 個解決方案

解決方案1
0 2022-07-19 10:48:09

使用 CUDA 的並行編程添加不成功

問題描述

1 個解決方案

解決方案1 0 2022-07-19 10:48:09

解決方案1
0 2022-07-19 10:48:09