cudaMemcpy中的無效參數

Question

我在跟蹤cudaMemcpy調用的無效參數的源時遇到了麻煩，這是相關代碼：

在gpu_memory.cu中，我聲明並分配了設備指針的內存：

#define cudaErrorCheck(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
    if (code != cudaSuccess)
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}
...
__device__ double* conc;
...
__global__ void pointer_set_kernel(..., double* conc_in...) {
...
   conc = conc_in;
...
}

double* d_conc;
...
//memory initialization
void initialize_gpu_memory(int NUM, int block_size, int grid_size) {
    ...
    cudaErrorCheck(cudaMalloc((void**)&d_conc, NUM * 53 * sizeof(double)));
    ...
    pointer_set_kernel<<<1, 1>>>(...d_conc...);
    cudaErrorCheck( cudaPeekAtLastError() ); // Checks for launch error
    cudaErrorCheck( cudaThreadSynchronize() ); // Checks for execution error
}

接下來，在另一個文件（mechanism.cu）中，我將設備指針聲明為向其復制數據的外部元素：

extern __device__ double* conc;
void write_jacobian_and_rates_output(int NUM, int block_size, int grid_size) {
    ...
    initialize_gpu_memory(NUM, block_size, grid_size);
    ...
    //get address of conc
    double* d_conc;
    cudaErrorCheck(cudaGetSymbolAddress((void **)&d_conc, conc));
    //populate the concentrations on the host
    double conc_host[NSP];
    double* conc_host_full = (double*)malloc(NUM * NSP * sizeof(double));
    //populate the concentrations
    get_concentrations(1.01325e6, y_host, conc_host);
    for (int i = 0; i < NUM; ++i) {
        for (int j = 0; j < NSP; ++j) {
           conc_host_full[i + j * NUM] = conc_host[j];
        }
    }
    //check for errors, and copy over
    cudaErrorCheck( cudaPeekAtLastError() ); // Checks for launch error
    cudaErrorCheck( cudaThreadSynchronize() ); // Checks for execution error
    cudaErrorCheck(cudaMemcpy(d_conc, conc_host_full, NUM * 53 * sizeof(double), cudaMemcpyHostToDevice));
    ...
}

我在最后一行（Memcpy）上得到了錯誤。 似乎initialize_gpu_memory函數正常工作，這是在malloc和pointer_set_kernel之后進行的cuda-gdb檢查：

p d_conc 
$1 = (double *) 0x1b03236000
p conc
$2 = (@generic double * @global) 0x1b03236000

並在write_jacobian_and_rates函數中：

p d_conc
$3 = (double *) 0x1b02e20600
p conc
$4 = (@generic double * @global) 0x1b03236000

我不知道為什么在cudaGetSymbolAddress調用之后寫函數中的d_conc指向不同的內存位置，或者為什么我在memcpy上收到無效的參數。 我確定我在做一些愚蠢的事情，但是我一生中看不到它。 希望能幫助您找到源頭，謝謝！

Answer 1

您的代碼段中沒有任何內容表明d_conc具有extern作用域，因此，在兩個不同文件中的d_conc的兩個實例是完全不同的對象。 所以，
在這種情況下 ：（ mechanism.cu ）

double* d_conc;  //you create a new variable in this context
cudaErrorCheck(cudaGetSymbolAddress((void **)&d_conc, conc));
//populate the concentrations on the host
double conc_host[NSP];
double* conc_host_full = (double*)malloc(NUM * NSP * sizeof(double));

尚未將內存分配給d_conc

我看到您已經在gpu_memory.cu上下文中gpu_memory.cu分配了具有相同名稱的變量的內存，但是在此處沒有發生錯誤的地方。

這似乎也可以解決您的問題： 我不知道為什么在cudaGetSymbolAddress調用之后，寫函數中的d_conc指向不同的內存位置

cudaMemcpy中的無效參數

問題描述

1 個解決方案

解決方案1
1 已采納 2015-02-12 17:44:45

cudaMemcpy中的無效參數

問題描述

1 個解決方案

解決方案1 1 已采納 2015-02-12 17:44:45

解決方案1
1 已采納 2015-02-12 17:44:45