简体   繁体   中英

CudaMemCpy returns cudaErrorInvalidValue on copying vector<cv::Point3f>

CudaMemCpy returns cudaErrorInvalidValue on copying vector onto the device. I have tried giving "&input", "&input[0]",... I always get the same error but don't understand why?

Can you copy a vector using cudaMemcpy or do I need to copy the contents of that vector in a new array first?

void computeDepthChangeMap(unsigned char* depthChangeMap, size_t size, std::vector<cv::Point3f>* input, float dcf, int width, int height)                                           {
    unsigned char* dev_depthChangeMap = 0;
    float* dev_dcf = 0;
    int* dev_wdt = 0;
    int arraySize = size;
    cv::Point3f* dev_input = 0;
    cudaError_t cudaStatus;

    cudaStatus = cudaSetDevice(0);
    cudaStatus = cudaMalloc((void**)&dev_depthChangeMap, size);
    cudaStatus = cudaMalloc((void**)&dev_input, size);
    cudaStatus = cudaMalloc((void**)&dev_dcf, sizeof(float));
    cudaStatus = cudaMalloc((void**)&dev_wdt, sizeof(int));

    cudaStatus = cudaMemcpy(dev_depthChangeMap, depthChangeMap, size, cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_wdt, &width, sizeof(int), cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_dcf, &dcf, sizeof(float), cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_input, &input[0], sizeof(cv::Point3f)*size, cudaMemcpyHostToDevice);

    //cuaStatus returns cudaErrorInvalidValue >> PROBLEM HERE << 

    dim3 threadsPerBlock(8, 8); //init x, y
    dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y);

    addKernel <<<numBlocks, threadsPerBlock >>>(dev_depthChangeMap, dev_dcf, dev_input, dev_wdt);


    cudaStatus = cudaGetLastError();   
    cudaStatus = cudaDeviceSynchronize();
    cudaStatus = cudaMemcpy(depthChangeMap, dev_depthChangeMap, size, cudaMemcpyDeviceToHost);
}

__global__ void addKernel(unsigned char* dev_depthChangeMap, float* dcf, cv::Point3f* inp, int* wdt)
{
    register int row_idx = (blockIdx.x * blockDim.x) + threadIdx.x;
    register int col_idx = (blockIdx.y * blockDim.y) + threadIdx.y;
    register int idx = row_idx * (*wdt) + col_idx;

    register float depth = inp[idx].z;
    register float depthR = inp[idx + 1].z;
    register float depthD = inp[idx + *wdt].z;

    //and so on

}

Yes, you can copy from std::vector using cudaMemcpy .

You don't have your sizes set up correctly:

void computeDepthChangeMap(unsigned char* depthChangeMap, size_t size, std::vector<cv::Point3f>* input, float dcf, int width, int height)                                           {

...
cudaStatus = cudaMalloc((void**)&dev_input, size);
                                            ^^^^

cudaStatus = cudaMemcpy(dev_input, &input[0], sizeof(cv::Point3f)*size, cudaMemcpyHostToDevice);
                                                     ^^^^^^^^^^^^^^^^^

These size parameters should all be in bytes . You can't copy data of length sizeof(cv::Point3f)*size bytes into an allocation of length size bytes.

Also, it seems that your function parameter is a pointer to a vector:

std::vector<cv::Point3f>* input,

based on the code you have shown, this is probably not what you want. You probably either want to pass the vector by value :

std::vector<cv::Point3f> input,

or more likely, by reference :

std::vector<cv::Point3f> &input,

Since you haven't shown how you intend to call this function, it's not possible to be entirely sure what is best here.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM