[英]Cuda Memcpy from Device to Host crashes
我正在嘗試在15 x 15的補丁大小附近找到最小的RGB
在source.cpp文件中
SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));
程序崩潰這是我的代碼段
黑暗先驗
#ifndef DARKPRIOR_H_INCLUDED
#define DARKPRIOR_H_INCLUDED
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include "opencv2/opencv.hpp"
#define SAFE_CALL(call) \
do \
{ \
cudaError_t err = (call); \
if(cudaSuccess != err) \
{ \
fprintf(stderr,"CUDA Error:\nFile = %s\nLine = %d\nReason = %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \
cudaDeviceReset(); \
exit(EXIT_FAILURE); \
} \
} \
while (0)
void dark_channel(float *image_d, float *rgbmin_d, int height, int width);
#endif
Source.cpp
#include "DarkPrior.h"
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main()
{
//load the image
Mat src = imread("foggy_river.jpg");
//check whether image loaded is empty or not.
if (src.empty())
{
cerr << "no image"; return -1;
}
//Mat rgbMin(src.size(), CV_MAKETYPE(src.depth(), 1));
// int step = src.step;
float *image_h = NULL;
float *image_d = NULL;
float *Dark_d = NULL;
float *Dark_h = NULL;
//Mat rgbmin(src.size(), CV_MAKETYPE(src.depth(), 1));
size_t size1 = src.step * src.rows * sizeof(float);
size_t size2 = src.cols * src.rows * sizeof(float);
image_h = (float *)malloc(size1);
Dark_h = (float *)malloc(size1);
SAFE_CALL(cudaMalloc((void**)&image_d, size1));
SAFE_CALL(cudaMalloc((void**)&Dark_d, size2));
//convert image from CV::MAT to float*.
Mat dst;
src.convertTo(dst, CV_32F);
image_h = dst.ptr<float>();
SAFE_CALL(cudaMemcpy(image_d, image_h, size1, cudaMemcpyHostToDevice));
cout << "Calculating Minimum of RGB ..." << endl;
dark_channel(image_d, Dark_d, src.rows, src.cols);
SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));
Mat Dark_out(src.rows, src.cols, CV_32FC1, Dark_h);
imwrite("MinRGB.jpg", Dark_out);
cudaFree(image_d);
cudaFree(Dark_d);
//free(image_h);
//free(rgbmin_h);
return 0;
}
minRGB.cu
#include "DarkPrior.h"
//#define min(x,y) ((x<y)?x:y)
__device__ float safe_get(float *rgbMin, int width, int height, int x, int y)
{
// Clamp indices to image boundaries
x = min( max(0, x), width - 1);
y = min( max(0, y), height - 1);
// Translate 2D index into 1D index
const int idx = y * width + x ;
return rgbMin[idx];
}
__device__ float estimate_minimum_patch(float *rgbMin, int width, int height, int radius, int x, int y, float Minval)
{
for(int i = -radius; i <= radius; i++)
{
for(int j = -radius; j <= radius; j++)
{
float val = safe_get(rgbMin, width, height, x+i, y+j);
Minval = min (val, Minval);
}
}
}
__global__ void kernel_darkChannel (float *rgbMin, float *darkCh, int height, int width)
{
int radius = 7;
int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int tid = y * width + x;
float Minval = 255.0;
estimate_minimum_patch(rgbMin, width, height, radius, x, y, Minval);
darkCh[tid] = Minval;
}
__global__ void kernel_findMinRGB (float3 *image, float *tmp_min, int height, int width)
{
int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int i = y * width + x;
if(x > height && y > width)
{
return;
}
tmp_min[i] = min(image[i].x, min(image[i].y, image[i].z));
}
void dark_channel(float *image_d, float *Dark_d, int height, int width)
{
dim3 grid(width, height);
float *tmp_min;
cudaMalloc((void **)(&tmp_min), sizeof(float)*height*width);
kernel_findMinRGB <<<grid, 1>>> ((float3 *)image_d, tmp_min, height, width);
printf("RGB min is found\n");
kernel_darkChannel <<<grid, 1>>> (tmp_min, Dark_d, height, width);
printf("patch of minimum is also found\n");
return;
}
我的代碼因source.cpp的第45行出現未知錯誤而崩潰
我完全不知道原因是什么,也許您將能夠提供幫助。
指針Dark_h
指向size1
為size1
字節的主機內存段。 指針Dark_d
指向Dark_d
為size2
個字節的設備存儲段。 如果size1 < size2
則調用:
cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost)
這會很麻煩,因為您將寫入非法內存(該內存不是Dark_h
指向的數組段的一部分,並且可能會得到SEGFAULT)。 我沒有嘗試過,但是我敢打賭,這就是崩潰的原因。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.