CUDA：訪問沖突讀取位置

Question

我是CUDA的新手，並且在遇到讀取訪問位置運行時異常的訪問沖突時，嘗試制作一個用於模糊.tga文件的簡單程序。 因為我對CUDA不太熟悉，所以我不知道如何解決它，而google並不是很有幫助。 這是代碼：

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>

#define println(...) std::cout << __VA_ARGS__ << "\n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5

unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];

void read(char input[256]) {
    FILE* f;
    f = fopen(input, "rb");
    if (!f) {
        printf("File Reading Failed\n");
    }
    fread(&header, 1, 18, f);
    fread(&in, 1, HEIGHT*WIDTH * 3, f);
    fclose(f);
}

void write(char output[256]) {
    FILE* fw;
    fw = fopen(output, "wb+");
    if (!fw) {
        printf("File Writing Failed\n");
    }
    header[16] = 24;
    header[13] = WIDTH / 256;
    header[12] = WIDTH % 256;
    header[15] = HEIGHT / 256;
    header[14] = HEIGHT % 256;
    fwrite(&header, 1, 18, fw);
    fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);
    fclose(fw);
}

__device__
int toIndex(int x, int y) {
    return x + y / WIDTH;
}

__device__
void doPixel(int x, int y, char *red, char *green, char *blue) {
    int avgRed = 0;
    int avgGreen = 0;
    int avgBlue = 0;
    for (int i = -BLUR; i <= BLUR; i++) {
        for (int j = -BLUR; j <= BLUR; j++) {
            avgRed += red[toIndex(i, j)];
            avgBlue += blue[toIndex(i, j)];
            avgGreen += green[toIndex(i, j)];
        }
    }
    red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
    green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
    blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);
}

__global__
void setValue(char *red, char *green, char *blue) {
    int x;
    int y;
    for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
        x = i % WIDTH;
        y = i / WIDTH;
        doPixel(x, y, red, green, blue);
    }
}

int main(void) {
    char *red, *green, *blue;
    double time;
    read("test.tga");
    cudaMallocManaged(&red, WIDTH * HEIGHT);
    cudaMallocManaged(&green, WIDTH * HEIGHT);
    cudaMallocManaged(&blue, WIDTH * HEIGHT);
    for (int x = 0; x < WIDTH; x++) {
        for (int y = 0; y < HEIGHT; y++) {
            red[x + y*WIDTH] = in[y][x][2];
            green[x + y*WIDTH] = in[y][x][1];
            blue[x + y*WIDTH] = in[y][x][0];
        }
    }
    time = clock();
    setValue<<<1, 1024>>>(red, green, blue);
    cudaDeviceSynchronize();
    println((clock() - time) / CLOCKS_PER_SEC);
    int x;
    int y;
    for (int i = 0; i < WIDTH * HEIGHT; i++) {
        x = i % WIDTH;
        y = i / WIDTH;
        out[y][x][0] = blue[i];      //Program gives error here
        out[y][x][1] = green[i];
        out[y][x][2] = red[i];
    }
    write("test.tga");
    cudaFree(red);
    cudaFree(green);
    cudaFree(blue);
    getchar();
}

我讀到cudaDeviceSynchronize（）是解決此問題的方法，但這似乎不起作用。 cudaThreadSynchronize（）也不能解決問題。

Answer 1

查找非法內存訪問錯誤的最簡單方法是使用cuda-gdb運行二進制文件。 確保在編譯時提供-g -G -O0標志

您可能會在此代碼段中發現一些錯誤

for (int i = -BLUR; i <= BLUR; i++) {
    for (int j = -BLUR; j <= BLUR; j++) {
        avgRed   += red[toIndex(i, j)];
        avgBlue  += blue[toIndex(i, j)];
        avgGreen += green[toIndex(i, j)];
    }
}

CUDA：訪問沖突讀取位置

問題描述

1 個解決方案

解決方案1
1 已采納 2017-11-02 04:40:05

CUDA：訪問沖突讀取位置

問題描述

1 個解決方案

解決方案1 1 已采納 2017-11-02 04:40:05

解決方案1
1 已采納 2017-11-02 04:40:05