從設備到主機的Cuda Memcpy崩潰

Question

我正在嘗試在15 x 15的補丁大小附近找到最小的RGB

在source.cpp文件中

SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));

程序崩潰這是我的代碼段

黑暗先驗

#ifndef DARKPRIOR_H_INCLUDED
#define DARKPRIOR_H_INCLUDED

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "cuda.h"
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
 #include <iostream>
 #include "opencv2/opencv.hpp"

 #define SAFE_CALL(call)                                                                                                            \
 do                                                                                                                          \
    {                                                                                                                           \
    cudaError_t err = (call);                                                                                               \
    if(cudaSuccess != err)                                                                                                  \
            {                                                                                                                       \
        fprintf(stderr,"CUDA Error:\nFile = %s\nLine = %d\nReason = %s\n", __FILE__, __LINE__, cudaGetErrorString(err));    \
        cudaDeviceReset();                                                                                                  \
        exit(EXIT_FAILURE);                                                                                                 \
            }                                                                                                                       \
    }                                                                                                                           \
        while (0)


    void dark_channel(float *image_d, float *rgbmin_d, int height, int width);



   #endif

Source.cpp

#include "DarkPrior.h"
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;

int main()
{
    //load the image
    Mat src = imread("foggy_river.jpg");

    //check whether image loaded is empty or not.
    if (src.empty())
    {
         cerr << "no image"; return -1;
    }

    //Mat rgbMin(src.size(), CV_MAKETYPE(src.depth(), 1));
   //   int step = src.step;
    float *image_h = NULL;
    float *image_d = NULL;
     float *Dark_d = NULL;
    float *Dark_h = NULL;
   //Mat rgbmin(src.size(), CV_MAKETYPE(src.depth(), 1));

   size_t size1 = src.step * src.rows * sizeof(float);
   size_t size2 = src.cols * src.rows * sizeof(float);

   image_h = (float *)malloc(size1);
   Dark_h = (float *)malloc(size1);

   SAFE_CALL(cudaMalloc((void**)&image_d, size1));
   SAFE_CALL(cudaMalloc((void**)&Dark_d, size2));

   //convert image from CV::MAT to float*.
   Mat dst;
   src.convertTo(dst, CV_32F);
   image_h = dst.ptr<float>();

   SAFE_CALL(cudaMemcpy(image_d, image_h, size1, cudaMemcpyHostToDevice));

   cout << "Calculating Minimum of RGB ..." << endl;
   dark_channel(image_d, Dark_d, src.rows, src.cols);

   SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));

   Mat Dark_out(src.rows, src.cols, CV_32FC1, Dark_h);
   imwrite("MinRGB.jpg", Dark_out);

   cudaFree(image_d);
   cudaFree(Dark_d);

   //free(image_h);
   //free(rgbmin_h);

   return 0;
}

minRGB.cu

#include "DarkPrior.h"

//#define min(x,y) ((x<y)?x:y)

__device__ float safe_get(float *rgbMin, int width, int height, int x, int y)
{

 // Clamp indices to image boundaries
 x = min( max(0, x), width - 1);
 y = min( max(0, y), height - 1);

 // Translate 2D index into 1D index
 const int idx = y * width + x ;

 return rgbMin[idx];
}

 __device__ float  estimate_minimum_patch(float *rgbMin, int width, int  height, int radius, int x, int y, float Minval)
{
   for(int i = -radius; i <= radius; i++)
  {
    for(int j = -radius; j <= radius; j++)
    {
        float val = safe_get(rgbMin, width, height, x+i, y+j);

        Minval = min (val, Minval);
     }
   }

}

  __global__ void kernel_darkChannel (float *rgbMin, float *darkCh, int height,    int width)
 {
  int radius  = 7;

int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int tid = y * width + x;

float Minval = 255.0;

estimate_minimum_patch(rgbMin, width, height, radius, x, y, Minval);

darkCh[tid] = Minval;
}

__global__ void kernel_findMinRGB (float3 *image, float *tmp_min, int height, int width)
{
int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int i = y * width + x;

if(x > height && y > width)
{
    return;
}

tmp_min[i] = min(image[i].x, min(image[i].y, image[i].z));

}

 void dark_channel(float *image_d, float *Dark_d, int height, int width)
 {
dim3 grid(width, height);

float *tmp_min;
cudaMalloc((void **)(&tmp_min), sizeof(float)*height*width);

kernel_findMinRGB <<<grid, 1>>> ((float3 *)image_d, tmp_min, height, width);
printf("RGB min is found\n");

kernel_darkChannel <<<grid, 1>>> (tmp_min, Dark_d, height, width);
printf("patch of minimum is also found\n");


return;
}

我的代碼因source.cpp的第45行出現未知錯誤而崩潰

我完全不知道原因是什么，也許您將能夠提供幫助。

Answer 1

指針Dark_h指向size1為size1字節的主機內存段。 指針Dark_d指向Dark_d為size2個字節的設備存儲段。 如果size1 < size2則調用：

cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost)

這會很麻煩，因為您將寫入非法內存（該內存不是Dark_h指向的數組段的一部分，並且可能會得到SEGFAULT）。 我沒有嘗試過，但是我敢打賭，這就是崩潰的原因。

從設備到主機的Cuda Memcpy崩潰

問題描述

1 個解決方案

解決方案1
1 2015-11-09 13:29:18

從設備到主機的Cuda Memcpy崩潰

問題描述

1 個解決方案

解決方案1 1 2015-11-09 13:29:18

解決方案1
1 2015-11-09 13:29:18