简体   繁体   中英

Compilation error in cuda kernel calling/ passing parameters

In the actual code, my intention is to get the output array by comparing the input array to the scalar. Or simply output = input > scalar.

Simple sample host-side code as shown below is working as expected.

    float *h_data1 = (float *)malloc(W1*H1 * sizeof(float));
    bool *h_result = (bool *)malloc(H1*W2 * sizeof(bool));

    float *d_data1;      gpuErrchk(cudaMalloc(&d_data1, W1*H1 * sizeof(float)));
    bool *d_result;    gpuErrchk(cudaMalloc(&d_result, H1*W2 * sizeof(bool)));

    for (int i = 0; i < W1*H1; i++) h_data1[i] = (float)i;

    gpuErrchk(cudaMemcpy(d_data1, h_data1, W1*H1 * sizeof(float), cudaMemcpyHostToDevice));

    float scalar = 2;
    compGraterRetOut<float, bool><< <outw, outh >> > (d_data1, d_result, scalar);

    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

The device side code is

template<typename TType, typename TTypeOut>
__global__  void compGraterRetOut(TType *dataIn, TTypeOut *dataOut, const TType scalar)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    dataOut[i] = (dataIn[i] > scalar);
}

Coming to the actual code, I have an image class as shown below (Only some part of the class is shown).

template<typename TType, ImageType TImageType>
class Image
{
public:
    Image(uint32_t width, uint32_t height, uint32_t depth = 1);

private:
    TType* m_data;
    uint32_t m_width;
    uint32_t m_height;
    uint32_t m_depth;
    uint32_t m_bufferSize;
};

template<typename TType, ImageType TImageType>
Image<TType, TImageType>::Image(uint32_t width, uint32_t height, uint32_t depth) :m_width(width), \
m_height(height), m_depth(depth)
{
    if (width == 0 || height == 0)
        return;
    cudaError_t cudaStatus;

    //m_data = new TType[m_width * m_height * m_depth];
    gpuErrchk(cudaStatus = cudaMalloc(&m_data, sizeof(TType) * m_width * m_height * m_depth));
    if (cudaStatus == cudaSuccess)
    {
        m_bufferSize = m_width * m_height * m_depth;
    }
    else
    {
        std::cout << "Error malloc function failed [" << cudaStatus << "]" << std::endl;
    }
};

To achieve the objective out = in > scalar, operator> is overloaded as shown below. This threw a compilation error as

"member "Image::m_data [with TType=float_t, TImageType=ImageType::WHD]""

the code looks as shown below.

inline Image<uint32_t, TImageType> Image<TType, TImageType>::operator>(TType scalar) const
{
        Image<uint32_t, TImageType> ret(m_width, m_height, m_depth);

        compGraterRetOut<TType, uint32_t> << <m_width * 4, (m_height * m_depth/4) >> > (m_data, ret.m_data, scalar);

        gpuErrchk(cudaGetLastError());
        gpuErrchk(cudaDeviceSynchronize());
        return std::move(ret);
}

To fix the compilation error I changed the function operator>. Here, cuda memory is allocated inside the function instead of inside of class's contructor.

template<class TType, ImageType TImageType>
inline Image<uint32_t, TImageType> Image<TType, TImageType>::operator>(TType scalar) const
{
        cudaError_t cudaStatus;

        uint32_t *dataout;
        gpuErrchk(cudaMalloc(&dataout, m_width*m_height*m_depth * sizeof(uint32_t)));

        Image<uint32_t, TImageType> ret(dataout, m_width, m_height, m_depth);

        compGraterRetOut<TType, uint32_t> << <m_width * 4, (m_height * m_depth/4) >> > (m_data, dataout, scalar);

        gpuErrchk(cudaGetLastError());
        gpuErrchk(cudaDeviceSynchronize());

        return std::move(ret);
}

Finally, my question is why last code compiled without an error, but not previous to that?

The problem has nothing to do with Cuda. It is the problem with templates and OOPS. When template class access a member in its own type it would not violate OOPS paradigm. Accessing a private member of the same class with different template arguments violate the OOPS paradigm. That is the answer.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM