简体   繁体   中英

CUDA: Access violation reading location

I'm new to CUDA and was trying to make a simple program for blurring .tga files, when I encountered an access violation reading location runtime exception. Because I'm so new to CUDA I couldn't figure out how to fix it and google wasn't very helpful. Here's the code:

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>

#define println(...) std::cout << __VA_ARGS__ << "\n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5

unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];

void read(char input[256]) {
    FILE* f;
    f = fopen(input, "rb");
    if (!f) {
        printf("File Reading Failed\n");
    fread(&header, 1, 18, f);
    fread(&in, 1, HEIGHT*WIDTH * 3, f);

void write(char output[256]) {
    FILE* fw;
    fw = fopen(output, "wb+");
    if (!fw) {
        printf("File Writing Failed\n");
    header[16] = 24;
    header[13] = WIDTH / 256;
    header[12] = WIDTH % 256;
    header[15] = HEIGHT / 256;
    header[14] = HEIGHT % 256;
    fwrite(&header, 1, 18, fw);
    fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);

int toIndex(int x, int y) {
    return x + y / WIDTH;

void doPixel(int x, int y, char *red, char *green, char *blue) {
    int avgRed = 0;
    int avgGreen = 0;
    int avgBlue = 0;
    for (int i = -BLUR; i <= BLUR; i++) {
        for (int j = -BLUR; j <= BLUR; j++) {
            avgRed += red[toIndex(i, j)];
            avgBlue += blue[toIndex(i, j)];
            avgGreen += green[toIndex(i, j)];
    red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
    green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
    blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);

void setValue(char *red, char *green, char *blue) {
    int x;
    int y;
    for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
        x = i % WIDTH;
        y = i / WIDTH;
        doPixel(x, y, red, green, blue);

int main(void) {
    char *red, *green, *blue;
    double time;
    cudaMallocManaged(&red, WIDTH * HEIGHT);
    cudaMallocManaged(&green, WIDTH * HEIGHT);
    cudaMallocManaged(&blue, WIDTH * HEIGHT);
    for (int x = 0; x < WIDTH; x++) {
        for (int y = 0; y < HEIGHT; y++) {
            red[x + y*WIDTH] = in[y][x][2];
            green[x + y*WIDTH] = in[y][x][1];
            blue[x + y*WIDTH] = in[y][x][0];
    time = clock();
    setValue<<<1, 1024>>>(red, green, blue);
    println((clock() - time) / CLOCKS_PER_SEC);
    int x;
    int y;
    for (int i = 0; i < WIDTH * HEIGHT; i++) {
        x = i % WIDTH;
        y = i / WIDTH;
        out[y][x][0] = blue[i];      //Program gives error here
        out[y][x][1] = green[i];
        out[y][x][2] = red[i];

I read that cudaDeviceSynchronize() was the way to fix this issue, but that doesn't seem to be working. cudaThreadSynchronize() also doesn't fix the issue.

Easiest way to find an illegal memory access error is to run the binary with cuda-gdb . Make sure you give -g -G -O0 flags when compiling

In your case you might find some errors in this code segment

for (int i = -BLUR; i <= BLUR; i++) {
    for (int j = -BLUR; j <= BLUR; j++) {
        avgRed   += red[toIndex(i, j)];
        avgBlue  += blue[toIndex(i, j)];
        avgGreen += green[toIndex(i, j)];

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

粤ICP备18138465号  © 2020-2024 STACKOOM.COM