简体   繁体   English

尝试使用 cuda 进行图像模糊

[英]Trying image blurring with cuda

I'm trying to blur an image with cuda using stbi_image as library for loading and saving the image.我正在尝试使用 stbi_image 作为加载和保存图像的库,使用 cuda 模糊图像。 I get no error when I compile my code, but when I try to see the result it's just a blank image.编译代码时没有错误,但是当我尝试查看结果时,它只是一个空白图像。 That is the code.那就是代码。

#include "lodepng.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image.h"
#include "stb_image_write.h"
#include <iostream>
#define BLUR_SIZE 7
#define R 0
#define G 1
#define B 2

__global__ void blurKernel(unsigned char* in, unsigned char* out, int width, int height, int num_channel, int channel) {

  int col = blockIdx.x * blockDim.x + threadIdx.x;
  int row = blockIdx.y * blockDim.y + threadIdx.y;

  if(col < width && row < height) {
    int pixVal = 0;
    int pixels = 0;

    for(int blurRow = -BLUR_SIZE; blurRow < BLUR_SIZE + 1; ++blurRow) {
      for(int blurCol = -BLUR_SIZE; blurCol < BLUR_SIZE + 1; ++blurCol) {
        int curRow = row + blurRow;
        int curCol = col + blurCol;
        if(curRow > -1 && curRow < height && curCol > -1 && curCol < width) {
          pixVal += in[curRow * width * num_channel + curCol * num_channel + channel];
          pixels++;
        }
      }
    }
    out[row * width * num_channel + col * num_channel + channel] = (unsigned char)(pixVal/pixels);
  }
}

int main() {

  int width, height,n;
  unsigned char *image = stbi_load("image4.png",&width,&height,&n,0);
  unsigned char *output = (unsigned char*)malloc(width * height * n *sizeof(unsigned char));
  
  unsigned char* Dev_Input_Image = NULL;
  unsigned char* Dev_Output_Image = NULL;
  cudaMalloc((void**)&Dev_Input_Image, sizeof(unsigned char)* height * width * n);
  cudaMalloc((void**)&Dev_Output_Image, sizeof(unsigned char)* height * width * n);

  cudaMemcpy(Dev_Input_Image, image, sizeof(unsigned char) * height * width * n, cudaMemcpyHostToDevice);

  //kernel call
  dim3 blockSize(16, 16, 1);
  dim3 gridSize(width/blockSize.x, height/blockSize.y,1);
  blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,R);
  blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,G);
  blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,B);
  
  cudaDeviceSynchronize();

    cudaMemcpy(image, Dev_Output_Image, sizeof(unsigned char) * height * width * n, cudaMemcpyDeviceToHost);

  stbi_write_png("output_stbimage.png", width, height, n, image, width * n);

  cudaFree(Dev_Input_Image);
  cudaFree(Dev_Output_Image);

  return 0;
}

I tried every possible ways, but I can't get where I'm stuck at.我尝试了所有可能的方法,但我无法到达我被困在的地方。 I tried doing it in a serial-way and it perfectly works with the same logic (I mean processing the blur on every channel).我尝试以串行方式执行此操作,它完美地使用相同的逻辑(我的意思是处理每个通道上的模糊)。 Hope somebody can help me希望有人可以帮助我

A problem with your code is that you are not copying (or setting) the alpha channel from input to output image in your kernel code (or anywhere else).您的代码的一个问题是您没有将 alpha 通道从输入复制(或设置)到 kernel 代码(或其他任何地方)中的 output 图像。 The alpha channel is effectively uninitialized. Alpha 通道实际上是未初始化的。 If it happens to end up at zero, you won't see anything interesting in the output picture, regardless of the other channels.如果它碰巧最终为零,那么无论其他通道如何,您都不会在 output 图片中看到任何有趣的东西。

When I fix your code like this:当我像这样修复您的代码时:

#include "lodepng.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image.h"
#include "stb_image_write.h"
#include <iostream>
#define BLUR_SIZE 7
#define R 0
#define G 1
#define B 2
#define A 3

__global__ void blurKernel(unsigned char* in, unsigned char* out, int width, int height, int num_channel, int channel, int copy_A) {

  int col = blockIdx.x * blockDim.x + threadIdx.x;
  int row = blockIdx.y * blockDim.y + threadIdx.y;

  if(col < width && row < height) {
    int pixVal = 0;
    int pixels = 0;
    if (copy_A)
      out[row*width*num_channel+col*num_channel+A] = in[row*width*num_channel+col*num_channel+A];
    for(int blurRow = -BLUR_SIZE; blurRow < BLUR_SIZE + 1; ++blurRow) {
      for(int blurCol = -BLUR_SIZE; blurCol < BLUR_SIZE + 1; ++blurCol) {
        int curRow = row + blurRow;
        int curCol = col + blurCol;
        if(curRow > -1 && curRow < height && curCol > -1 && curCol < width) {
          pixVal += in[curRow * width * num_channel + curCol * num_channel + channel];
          pixels++;
        }
      }
    }
    out[row * width * num_channel + col * num_channel + channel] = (unsigned char)(pixVal/pixels);
  }
}

int main() {

  int width, height,n;
  unsigned char *image = stbi_load("image4.png",&width,&height,&n,0);
  unsigned char *output = (unsigned char*)malloc(width * height * n *sizeof(unsigned char));
  unsigned char* Dev_Input_Image = NULL;
  unsigned char* Dev_Output_Image = NULL;
  cudaMalloc((void**)&Dev_Input_Image, sizeof(unsigned char)* height * width * n);
  cudaMalloc((void**)&Dev_Output_Image, sizeof(unsigned char)* height * width * n);

  cudaMemcpy(Dev_Input_Image, image, sizeof(unsigned char) * height * width * n, cudaMemcpyHostToDevice);

  //kernel call
  dim3 blockSize(16, 16, 1);
  dim3 gridSize(width/blockSize.x, height/blockSize.y,1);
  blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,R,0);
  blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,G,0);
  blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,B,1);
  
  cudaDeviceSynchronize();

    cudaMemcpy(image, Dev_Output_Image, sizeof(unsigned char) * height * width * n, cudaMemcpyDeviceToHost);
  cudaFree(Dev_Input_Image);
  cudaFree(Dev_Output_Image);
  stbi_write_png("output_stbimage.png", width, height, n, image, width * n);


  return 0;
}

And compile and run it using this picture :并使用这张图片编译并运行它:

在此处输入图像描述

I get an output picture that looks like this:我得到一张看起来像这样的 output 图片:

在此处输入图像描述

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM