[英]CUDA: Access violation reading location
我是CUDA的新手,并且在遇到读取访问位置运行时异常的访问冲突时,尝试制作一个用于模糊.tga文件的简单程序。 因为我对CUDA不太熟悉,所以我不知道如何解决它,而google并不是很有帮助。 这是代码:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>
#define println(...) std::cout << __VA_ARGS__ << "\n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5
unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];
void read(char input[256]) {
FILE* f;
f = fopen(input, "rb");
if (!f) {
printf("File Reading Failed\n");
}
fread(&header, 1, 18, f);
fread(&in, 1, HEIGHT*WIDTH * 3, f);
fclose(f);
}
void write(char output[256]) {
FILE* fw;
fw = fopen(output, "wb+");
if (!fw) {
printf("File Writing Failed\n");
}
header[16] = 24;
header[13] = WIDTH / 256;
header[12] = WIDTH % 256;
header[15] = HEIGHT / 256;
header[14] = HEIGHT % 256;
fwrite(&header, 1, 18, fw);
fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);
fclose(fw);
}
__device__
int toIndex(int x, int y) {
return x + y / WIDTH;
}
__device__
void doPixel(int x, int y, char *red, char *green, char *blue) {
int avgRed = 0;
int avgGreen = 0;
int avgBlue = 0;
for (int i = -BLUR; i <= BLUR; i++) {
for (int j = -BLUR; j <= BLUR; j++) {
avgRed += red[toIndex(i, j)];
avgBlue += blue[toIndex(i, j)];
avgGreen += green[toIndex(i, j)];
}
}
red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);
}
__global__
void setValue(char *red, char *green, char *blue) {
int x;
int y;
for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
x = i % WIDTH;
y = i / WIDTH;
doPixel(x, y, red, green, blue);
}
}
int main(void) {
char *red, *green, *blue;
double time;
read("test.tga");
cudaMallocManaged(&red, WIDTH * HEIGHT);
cudaMallocManaged(&green, WIDTH * HEIGHT);
cudaMallocManaged(&blue, WIDTH * HEIGHT);
for (int x = 0; x < WIDTH; x++) {
for (int y = 0; y < HEIGHT; y++) {
red[x + y*WIDTH] = in[y][x][2];
green[x + y*WIDTH] = in[y][x][1];
blue[x + y*WIDTH] = in[y][x][0];
}
}
time = clock();
setValue<<<1, 1024>>>(red, green, blue);
cudaDeviceSynchronize();
println((clock() - time) / CLOCKS_PER_SEC);
int x;
int y;
for (int i = 0; i < WIDTH * HEIGHT; i++) {
x = i % WIDTH;
y = i / WIDTH;
out[y][x][0] = blue[i]; //Program gives error here
out[y][x][1] = green[i];
out[y][x][2] = red[i];
}
write("test.tga");
cudaFree(red);
cudaFree(green);
cudaFree(blue);
getchar();
}
我读到cudaDeviceSynchronize()是解决此问题的方法,但这似乎不起作用。 cudaThreadSynchronize()也不能解决问题。
查找非法内存访问错误的最简单方法是使用cuda-gdb
运行二进制文件。 确保在编译时提供-g -G -O0
标志
您可能会在此代码段中发现一些错误
for (int i = -BLUR; i <= BLUR; i++) {
for (int j = -BLUR; j <= BLUR; j++) {
avgRed += red[toIndex(i, j)];
avgBlue += blue[toIndex(i, j)];
avgGreen += green[toIndex(i, j)];
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.