[英]Function member as parameter of CUDA kernel
我正在使用动态并行性,我想创建一个给定对象指针+成员函数指针执行该函数的模板内核。 这是一个最小的(不是)工作示例,使用-arch = compute_35 -dlink标志进行编译,
#include <iostream>
struct A
{
int i;
__device__ void clear()
{
i = 0;
}
};
template<typename Object, typename memberFunction>
__global__ void generalKernel(Object* o, memberFunction f)
{
(o->*f)();
}
template<typename Object, typename memberFunction>
__device__ void executeFunction(Object* o, memberFunction f)
{
generalKernel<<<1,1>>>(o,f);
cudaDeviceSynchronize();
}
__global__ void mainKernel(A* a)
{
executeFunction(a, &A::clear);
}
int main(int argc, char * argv[])
{
A* a;
cudaMallocManaged(&a, sizeof(A));
a->i = 1;
mainKernel<<<1,1>>>(a);
cudaDeviceSynchronize();
std::cout << a->i << std::endl;
return EXIT_SUCCESS;
}
这是一个简单的CUDA代码,以显示如何将成员函数指针传递给内核。 一切都在代码中进行了解释。
#define gpuErrchk(val) \
cudaErrorCheck(val, __FILE__, __LINE__, true)
void cudaErrorCheck(cudaError_t err, char* file, int line, bool abort)
{
if(err != cudaSuccess)
{
printf("%s %s %d\n", cudaGetErrorString(err), file, line);
if(abort) exit(-1);
}
}
// struct holds an 'int' type data memeber and '__device__' function member
struct ST
{
int id;
__device__ void foo()
{
printf("value of id: %d\n",id);
}
};
// creating an alias for our function pointer
// since the function is a member of a struct, we add struct name and scope resolution 'ST::'
// to signify as such
typedef void (ST::*Fptr)(void);
// templated kernel
template<typename Object, typename memberFunction>
__global__ void kernel(Object* o, memberFunction f)
{
(o->*f)();
}
// declaring a __device__ function pointer, assigning it the address of 'ST::foo'
// remember that this function pointer is also direclty accessible from the kernel
__device__ Fptr fp = &ST::foo;
int main(int argc, char** argv)
{
// declaring and initializing a host 'ST' object
ST h_st;
h_st.id = 10;
// device 'ST' object
ST* d_st;
// allocating device memory
gpuErrchk(cudaMalloc((void**)&d_st, sizeof(ST)));
// copying host data from host object to device object
gpuErrchk(cudaMemcpy(d_st, &h_st, sizeof(ST), cudaMemcpyHostToDevice));
// declaring host side function pointer of type 'Fptr', which can be passed to kernel as argument
Fptr h_fptr;
// copying address of '__device__' function pointer to host side function pointer
gpuErrchk(cudaMemcpyFromSymbol(&h_fptr, fp, sizeof(Fptr)));
// passing arguments to kernel
kernel<<<1,1>>>(d_st,h_fptr);
// making sure no errors occured
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
// free device memory
gpuErrchk(cudaFree(d_st));
return 0;
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.