繁体   English   中英

cuda中线程索引的行主要访问还是列主要访问?

[英]row-major or column-major access of thread index in cuda?

我很困惑图像是按行优先还是列优先顺序存储在设备的全局内存中。 在以两个顺序访问图像时,我得到了图像的两个不同输出。
按行优先顺序访问时-

int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;

int m = numCols * y + x;

if (x >= numCols || y >= numRows)
    return;

//marking column boundaries
if (x <= 2){                    
    d_Image[m].x = 255;
    d_Image[m].y = 0;
    d_Image[m].z = 0;
}
else if (x >= numCols-2){
    d_Image[m].x = 0;
    d_Image[m].y = 0;
    d_Image[m].z = 255;
}
else{
    d_Image[m].x = d_sample[m].x;
    d_Image[m].y = d_sample[m].y;
    d_Image[m].z = d_sample[m].z;
}
d_Image[m].w = d_sample[m].w;

使用行主输出
按列优先顺序访问时-

int m = x * numRows + y;

使用 col-major输出
方面-

const dim3 blockSize(16,16);
const dim3 gridSize(numCols/16+1, numRows/16+1, 1);
blur << < gridSize, blockSize >> >(d_Image, d_sample, numRows, numCols);

我正在使用 opencv 加载和保存图像。
在第一个输出中,红色和蓝色点散布在整个图像中。 在第二个输出(col-major)中,我尝试标记列时标记了边界行。 我太糊涂了。 编辑

void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols);

cv::Mat sample;
cv::Mat Image;

size_t numRows() { return sample.rows; }
size_t numCols() { return sample.cols; }

__global__ void blur(const uchar4 *d_sample, uchar4* d_Image, size_t numRows, size_t numCols){

  int x = threadIdx.x + blockDim.x * blockIdx.x;
  int y = threadIdx.y + blockDim.y * blockIdx.y;

  int m = y*numCols + x;                    

  if (x >= numCols || y >= numRows)
        return;

  if (x <= 2){
      d_Image[m].x = 255;
      d_Image[m].y = 0;
      d_Image[m].z = 0;
  }
  else if (x >= (numCols-2)){
      d_Image[m].x = 0;
      d_Image[m].y = 0;
      d_Image[m].z = 255;
  }
  else{
      d_Image[m].x = d_sample[m].x;
      d_Image[m].y = d_sample[m].y;
      d_Image[m].z = d_sample[m].z;
  }
  d_Image[m].w = d_sample[m].w;
  }

int main(){

  uchar4  *h_sample, *d_sample, *d_Image, *h_Image;
  int filter[9];
  sample = cv::imread("sample.jpg", CV_LOAD_IMAGE_COLOR);
  if (sample.empty()){
        std::cout << "error in loading image.";
        system("pause");
  }

  cv::cvtColor(sample,sample,CV_BGR2RGBA);
  Image.create(numRows(), numCols(), CV_8UC4);

  if (!sample.isContinuous() || !Image.isContinuous()) {
      std::cerr << "Images aren't continuous!! Exiting." << std::endl;
      system("pause");
      exit(1);
  }
  cv::cvtColor(Image,Image,CV_BGR2RGBA);

  h_sample = (uchar4*)sample.data;
  h_Image = (uchar4*)Image.data;

  size_t numPixels = numRows() * numCols();

    //allocate mmeory on device
  checkCudaErrors(cudaMalloc((void**)&d_sample, sizeof(uchar4) * numPixels));
  checkCudaErrors(cudaMalloc((void**)&d_Image, sizeof(uchar4) * numPixels));

  checkCudaErrors(cudaMemset(d_sample, 0, sizeof(uchar4) * numPixels));
  checkCudaErrors(cudaMemset(d_Image, 0, sizeof(uchar4) * numPixels));

//copy to device
  checkCudaErrors(cudaMemcpy(d_sample, h_sample, sizeof(uchar4) * numPixels, cudaMemcpyHostToDevice));

  helper(d_sample, d_Image, numCols(), numRows());

//copy back to  host
  checkCudaErrors(cudaMemcpy(h_Image, d_Image, sizeof(uchar4) * numPixels, cudaMemcpyDeviceToHost));

  cv::cvtColor(Image,Image,CV_RGBA2BGR);

  cv::namedWindow("Image", CV_WINDOW_AUTOSIZE);
  cv::imshow("Image", Image);
  cv::waitKey(0);
  cv::imwrite("sample.jpg", Image);

  return 0;
}

void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols){

  const dim3 blockSize(16,16);
  const dim3 gridSize(numCols/16+1, numRows/16+1, 1);
  blur << < gridSize, blockSize >> >(d_sample, d_Image, numRows, numCols);
  cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
}
void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols){

然后你打电话

helper(d_sample, d_Image, numCols(), numRows());

我认为您在调用 helper 时可能已经切换了 cols 和 rows...

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM