[英]CUDA: Access violation reading location
我是CUDA的新手,並且在遇到讀取訪問位置運行時異常的訪問沖突時,嘗試制作一個用於模糊.tga文件的簡單程序。 因為我對CUDA不太熟悉,所以我不知道如何解決它,而google並不是很有幫助。 這是代碼:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>
#define println(...) std::cout << __VA_ARGS__ << "\n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5
unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];
void read(char input[256]) {
FILE* f;
f = fopen(input, "rb");
if (!f) {
printf("File Reading Failed\n");
}
fread(&header, 1, 18, f);
fread(&in, 1, HEIGHT*WIDTH * 3, f);
fclose(f);
}
void write(char output[256]) {
FILE* fw;
fw = fopen(output, "wb+");
if (!fw) {
printf("File Writing Failed\n");
}
header[16] = 24;
header[13] = WIDTH / 256;
header[12] = WIDTH % 256;
header[15] = HEIGHT / 256;
header[14] = HEIGHT % 256;
fwrite(&header, 1, 18, fw);
fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);
fclose(fw);
}
__device__
int toIndex(int x, int y) {
return x + y / WIDTH;
}
__device__
void doPixel(int x, int y, char *red, char *green, char *blue) {
int avgRed = 0;
int avgGreen = 0;
int avgBlue = 0;
for (int i = -BLUR; i <= BLUR; i++) {
for (int j = -BLUR; j <= BLUR; j++) {
avgRed += red[toIndex(i, j)];
avgBlue += blue[toIndex(i, j)];
avgGreen += green[toIndex(i, j)];
}
}
red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);
}
__global__
void setValue(char *red, char *green, char *blue) {
int x;
int y;
for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
x = i % WIDTH;
y = i / WIDTH;
doPixel(x, y, red, green, blue);
}
}
int main(void) {
char *red, *green, *blue;
double time;
read("test.tga");
cudaMallocManaged(&red, WIDTH * HEIGHT);
cudaMallocManaged(&green, WIDTH * HEIGHT);
cudaMallocManaged(&blue, WIDTH * HEIGHT);
for (int x = 0; x < WIDTH; x++) {
for (int y = 0; y < HEIGHT; y++) {
red[x + y*WIDTH] = in[y][x][2];
green[x + y*WIDTH] = in[y][x][1];
blue[x + y*WIDTH] = in[y][x][0];
}
}
time = clock();
setValue<<<1, 1024>>>(red, green, blue);
cudaDeviceSynchronize();
println((clock() - time) / CLOCKS_PER_SEC);
int x;
int y;
for (int i = 0; i < WIDTH * HEIGHT; i++) {
x = i % WIDTH;
y = i / WIDTH;
out[y][x][0] = blue[i]; //Program gives error here
out[y][x][1] = green[i];
out[y][x][2] = red[i];
}
write("test.tga");
cudaFree(red);
cudaFree(green);
cudaFree(blue);
getchar();
}
我讀到cudaDeviceSynchronize()是解決此問題的方法,但這似乎不起作用。 cudaThreadSynchronize()也不能解決問題。
查找非法內存訪問錯誤的最簡單方法是使用cuda-gdb
運行二進制文件。 確保在編譯時提供-g -G -O0
標志
您可能會在此代碼段中發現一些錯誤
for (int i = -BLUR; i <= BLUR; i++) {
for (int j = -BLUR; j <= BLUR; j++) {
avgRed += red[toIndex(i, j)];
avgBlue += blue[toIndex(i, j)];
avgGreen += green[toIndex(i, j)];
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.