需要有關非常簡單的Cuda和C ++程序的幫助

Question

我是Cuda編程的新手。 我正在嘗試創建一個簡單的cuda和cpp圖像處理程序，以改變圖像的亮度，飽和度，對比度等。我開始用一個非常簡單的功能，只是為了改變圖像的亮度，通過乘以所有RGB組件具有alpha值的圖像。

這是我的CPP計划：

#include <cutil_inline.h>
#include <cutil_gl_inline.h>

#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

using namespace std;

struct ImageData {
    unsigned char *data;    /* Points to large array of R,G,B-order data */
    int  height;
    int width;
};

ImageData imageData;
float *imageResult; // to store the image result from cuda after running the kernel
unsigned char *d_Input;
unsigned char *d_Output;
unsigned char *h_Output;

// These are CUDA functions to handle allocation and launching the kernels
extern "C" void initInput( unsigned char *data, unsigned char **device, unsigned int size); 
extern "C" void filter(unsigned char *d_src, unsigned char *d_dest, int width, int height, int filterMode, 
                float alpha, float contrast, float saturation, bool use_array );

void initCuda()
{
        unsigned int size = imageData.width * imageData.height * 3 * sizeof(unsigned char);
        cutilSafeCall(cudaMalloc ((void**) &d_Input, size));  // allocate storage for device image input
        cutilSafeCall(cudaMalloc ((void**) &d_Output, size)); // allocate storage for device image output
    initInput( imageData.data, &d_Input, size); 
}

int main () {


    loadPPMImageData( (char *)"boxes.ppm", &imageData); //this function is defined in another file
    cudaGLSetGLDevice( 0 );
    initCuda();
    filter(  d_Input, d_Output, imageData.width, imageData.height, 1, 0.8, 1.0, 1.0, 1 );

    cutilSafeCall(cudaMemcpy( h_Output, d_Output, size, cudaMemcpyDeviceToHost)); // copy output data from device to host
    //print the output
    for (int i = 0; i < imageData.size; i++) {
        cout << d_Output
    }
    // do some memory cleanups

    //done
    return 0
}

這是我的kernel.cu文件：

#include <iostream>
#include <cstdlib>
#include <string>
#include <cmath>

#include <shrUtils.h>
#include <cutil_inline.h>
#include <cutil_math.h>


//Kernel function
__global__ void
applyAlpha(unsigned char* input, unsigned char* output, int width, int height, float alpha) 
{
// calculate normalized coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
output[ ((y * width + x) * 3) + 0] = (int) ( (int)input [ ((y * width + x) * 3) + 0] * alpha); // r
output[ ((y * width + x) * 3) + 1] = (int) ( (int)input [ ((y * width + x) * 3) + 1] * alpha); // g
output[ ((y * width + x) * 3) + 2] = (int) ( (int)input [ ((y * width + x) * 3) + 2] * alpha); // b
}


extern "C"
int iDivUp( int a, int b ){
    return (a % b != 0) ? (a / b + 1) : (a / b);
}


extern "C" 
void initInput( unsigned char *data, unsigned char **deviceArray, unsigned int size) // /* image data, device pointer, etc */ )
{
    /* TODO: Array version DONE
     *  Initialize device memory for array version
     *  and cuda arrays for texture version here
     */
    cutilSafeCall(cudaMemcpy( deviceArray, data, size, cudaMemcpyHostToDevice)); // copy image data from host to device (array version)
    //TODO: Texture version

}


extern "C" 
void filter( unsigned char *d_src, unsigned char *d_dest, int width, int height, int filter_mode, float alpha, float contrast, float saturation, bool use_array )
{
    /*  TODO
     * run different kernels for array and texture version
     */

    dim3 dimBlock(16, 16, 1);
    dim3 dimGrid( iDivUp (width, dimBlock.x), iDivUp( height, dimBlock.y), 1);

    if (use_array) { // Array version
        if (filter_mode == 1) { // filter mode: brightness (alpha)

            applyAlpha<<< dimGrid, dimBlock >>>(d_src, d_dest, width, height, alpha);

            // check if kernel execution generated an error
            cutilCheckMsg("Kernel execution failed");

            cutilSafeCall( cutilDeviceSynchronize() );
        }
    }
    else { //Texture Version
        //not yet implemented
    }

}

//編輯我根據安德魯的回答修改了上述文件。 但是現在編譯后我遇到了以下錯誤：

ld: warning: ignoring file kernel.o, file was built for i386 which is not the architecture being linked (x86_64)
Undefined symbols for architecture x86_64:
  "_initInput", referenced from:
      initCuda()    in CS380_prog4.o
  "_filter", referenced from:
      display()     in CS380_prog4.o
     (maybe you meant: ___GLEW_SGIS_texture_filter4, ___GLEW_EXT_texture_filter_anisotropic , ___GLEW_NV_multisample_filter_hint )
ld: symbol(s) not found for architecture x86_64
collect2: ld returned 1 exit status
make: *** [testprog] Error 1

我在這兩個函數中都使用了“extern C”命令：initInput和filter。 函數聲明（在test.cpp中）和定義（在kernel.cu中）也有相同的參數，但它仍然抱怨它無法找到該函數。 我該如何解決這個鏈接問題？

Answer 1

您將.cu文件直接包含在.cpp文件中，該文件有效地將內容復制到.cpp文件中。 nvcc將使用標准的C ++編譯器來編譯它（在unix平台上使用g ++），它不知道任何Cuda語法的含義。

您必須將每個文件編譯為目標文件，然后將它們與C ++編譯器鏈接，為.cu文件中的導出函數創建標題，方法與標准C相同。

Answer 2

在cuda-grayscale中有一個很好的例子。

它曾用於在CUDA 3.1上編譯。 那里有一個Makefile，在它上面巔峰。

CXX=g++

CUDA_INSTALL_PATH=/usr/local/cuda
CFLAGS= -I. -I$(CUDA_INSTALL_PATH)/include `pkg-config --cflags opencv`
LDFLAGS= -L$(CUDA_INSTALL_PATH)/lib -lcudart `pkg-config --libs opencv`    

all:
        $(CXX) $(CFLAGS) -c main.cpp -o Debug/main.o
        nvcc $(CUDAFLAGS) -c kernel_gpu.cu -o Debug/kernel_gpu.o
        $(CXX) $(LDFLAGS) Debug/main.o Debug/kernel_gpu.o -o Debug/grayscale

clean:
        rm -f Debug/*.o Debug/grayscale

需要有關非常簡單的Cuda和C ++程序的幫助

問題描述

2 個解決方案

解決方案1
4 已采納 2011-04-22 21:51:12

解決方案2
0 2011-04-22 23:19:08

需要有關非常簡單的Cuda和C ++程序的幫助

問題描述

2 個解決方案

解決方案1 4 已采納 2011-04-22 21:51:12

解決方案2 0 2011-04-22 23:19:08

解決方案1
4 已采納 2011-04-22 21:51:12

解決方案2
0 2011-04-22 23:19:08