内核CUDA中的2D数组

Question

I wanted to used 2D array on GPU as we do on CPU. 我想在GPU上使用2D数组，就像在CPU上一样。 Thus the below code. 因此下面的代码。 It executes without errors but returns some garbage values. 它执行没有错误但返回一些垃圾值。

Could anyone please suggest me what might have went wrong...! 任何人都可以建议我可能出了什么问题......！

Thank you. 谢谢。

#include<stdio.h>

__global__ void add2(int** da)
{
 int idx=threadIdx.x;
 int idy=threadIdx.y;

da[idx][idy]+=2;
// printf("It came here");
printf("%d \n",da[idx][idy]);
}

int main()
{
 int ha[2][2],**da, hb[2][2];
 size_t pitch;
 for(int i=0;i<2;i++)
    {
    for(int j=0;j<2;j++)
       ha[i][j]=0;
    }

 cudaMallocPitch((void**)&da, &pitch, 2*sizeof(int),2);
 cudaMemcpy2D(&da, 2*sizeof(int), ha, pitch, 2*sizeof(int), 2, cudaMemcpyHostToDevice);

 printf("Before kernel\n");
 for(int i=0;i<2;i++)
    {
     for(int j=0;j<2;j++)
     printf("%d ",ha[i][j]);
     printf("\n");
    }
 printf("\n");

 add2<<<2,2>>>(da);
 // gpuErrchk(cudaPeekAtLastError());
 // gpuErrchk(cudaDeviceSynchronize());

 cudaMemcpy2D(&hb, 2*sizeof(int), da, pitch, 2*sizeof(int), 2, cudaMemcpyDeviceToHost);

 printf("After kernel\n");
 for(int i=0;i<2;i++)
    {
     for(int j=0;j<2;j++)
    printf("%d ",hb[i][j]);
    printf("\n");
    }
  return 0;
}

Answer 1

One of the other approaches to 2D arrays is, if you think its nothing but the arrangement of the elements in memory. 2D阵列的其他方法之一是，如果您认为它只是内存中元素的排列。 The following code explains you such an approach with row-major order and more generalised way to write with proper error checking . 下面的代码解释了这种具有行主顺序的方法，以及通过适当的错误检查进行写入的更通用的方法。

 #include<stdio.h>

 #define NUM_ROWS 2
 #define NUM_COLS 2

 __global__ void add2(int* da, int iNumCol)
{
    int idx=threadIdx.x;
    int idy=threadIdx.y;

    da[(idx * iNumCol) + idy]+=2;
    // printf("It came here");
    //printf("%d \n",da[idx][idy]);
}

int main()
{
    int             ha[NUM_ROWS][NUM_COLS] ;
    int             *da ;
    int             hb[NUM_ROWS][NUM_COLS] ;
    int             iSize = NUM_ROWS * NUM_COLS * sizeof(int) ;
    cudaError_t     cuError = cudaSuccess ;
    dim3            dimGrid (1,1,1) ;
    dim3            dimBlock (NUM_ROWS, NUM_COLS, 1) ;

    for(int i=0;i<NUM_ROWS;i++)
    {
            for(int j=0;j<NUM_COLS;j++)
            {
                    ha[i][j]=0;
            }
    }

    cuError = cudaMalloc((void**)&da, iSize) ;
    if (cudaSuccess != cuError)
    {
            printf ("Failed to allocate memory\n") ;
            return 1 ;
    }
    cuError = cudaMemcpy(da, ha, iSize, cudaMemcpyHostToDevice);
    if (cudaSuccess != cuError)
    {
            cudaFree (da) ;
            printf ("Failed in Memcpy 1\n") ;
            return 1 ;
    }

    printf("Before kernel\n");
    for(int i=0;i<NUM_ROWS;i++)
    {
            for(int j=0;j<NUM_COLS;j++)
            {
                    printf("%d ",ha[i][j]);
            }
            printf("\n");
    }
    printf("\n");

    add2<<<dimGrid, dimBlock>>>(da, NUM_COLS);
    cuError = cudaGetLastError () ;
    if (cudaSuccess != cuError)
    {
            printf ("Failed in kernel launch and reason is %s\n", cudaGetErrorString(cuError)) ;
            return 1 ;
    }

    cuError = cudaMemcpy(hb, da, iSize, cudaMemcpyDeviceToHost);
    if (cudaSuccess != cuError)
    {
            cudaFree (da) ;
            printf ("Failed in Memcpy 2\n") ;
            return 1 ;
    }

    printf("After kernel\n");
    for(int i=0;i<NUM_ROWS;i++)
    {
            for(int j=0;j<NUM_COLS;j++)
            {
                    printf("%d ",hb[i][j]);
            }
            printf("\n");
    }
    cudaFree (da) ;

    return 0;
}

内核CUDA中的2D数组

问题描述

1 个解决方案

解决方案1
3 已采纳 2013-10-23 03:46:06

内核CUDA中的2D数组

问题描述

1 个解决方案

解决方案1 3 已采纳 2013-10-23 03:46:06

解决方案1
3 已采纳 2013-10-23 03:46:06