简体   繁体   English

如何在cuda中使用虚拟课程?

[英]How to use virtual class in cuda?

I wrote a simple cuda code to test if I can copy a vector of pointers to classes the GPU. 我写了一个简单的cuda代码来测试我是否可以将指针向量复制到GPU类。

This is what I have: 这就是我所拥有的:

test.hpp: test.hpp:

class Class {
public:
    Class() {};
    virtual ~Class() {};

    __host__ __device__ int print() { return 42; };
    __host__ __device__ virtual int getClass() const = 0;
};                                                                                                                                                                                       

class AlphaClass : public Class
{
public:
    AlphaClass() {
      className = "Alpha";
      classAvgGrade = 90;
      classSize = 100;
      for(int i = 0; i < classSize; i++){
          classScores.push_back((90+i)%100);
      } 
    };
    ~AlphaClass() { };

    std::string className;
    int classAvgGrade;
    int classSize; 
    std::vector<int> classScores;

    __host__ __device__ void incClassSize(){
        classSize++;
    };

    __host__ __device__ virtual int getClass() const{
        return 0;
    }; 
};  


class BetaClass : public Class
{                                                                                                                                                                                        
public:
    BetaClass() {
      className = "Beta";
      classAvgGrade = 80;
      classSize = 120;
      for(int i = 0; i < classSize; i++){
          classScores.push_back((80+i)%100);
      } 
    }
    ~BetaClass() { };
    std::string className;
    int classAvgGrade;
    int classSize;
    std::vector<int> classScores;

    __host__ __device__ void incClassSize(){
        classSize++;
    }   

    __host__ __device__ virtual int getClass() const{
        return 1;
    };

};


class CudaClass : public Class
{     
public:
    CudaClass() {
      className = "Cuda";
      classAvgGrade = 70;
      classSize = 110;
      for(int i = 0; i < classSize; i++){
          classScores.push_back(70);
      }   
    };
    ~CudaClass() {
        //do nothing
    };

    std::string className;
    int classAvgGrade;
    int classSize;
    std::vector<int> classScores;

    __host__ __device__ void incClassSize(){
        classSize++;
    };
 };

test.cpp: TEST.CPP:

struct GlobalConstants {
    Class** classList;
};  

__constant__ GlobalConstants cuConstRaytracerParams;


   __global__ void useClass()
  {

    Class** deviceClassList = cuConstRaytracerParams.classList;
    AlphaClass* alpha = (AlphaClass*) deviceClassList[0];
    BetaClass* beta = (BetaClass*) deviceClassList[1];
    CudaClass* cuda = (CudaClass*) deviceClassList[2];

    printf("%s\n", alpha->className);
    printf("%s\n", beta->className); 
    printf("%s\n", cuda->ClassName);

    printf("alpha avg = %d\n", alpha->classAvgGrade);
    printf("beta avg = %d\n", beta->classAvgGrade);
    printf("cuda avg = %d\n", cuda->classAvgGrade);

  };  


...



    AlphaClass *alpha;
    alpha = new AlphaClass();
    BetaClass *beta;
    beta = new BetaClass();
    CudaClass *cuda;
    cuda = new CudaClass();
    std::vector<Class*> classes;
    classes.push_back(alpha);
    classes.push_back(beta);
    classes.push_back(cuda);

    AlphaClass* alpha_ptr;
    BetaClass* beta_ptr;
    CudaClass* cuda_ptr;

    // copy classes to cuda
    thrust::device_vector<Class*> deviceClassList;
    for(int i = 0; i < classes.size(); i++){
        if(classes[i]->getClass() == 0){
            cudaMalloc(&alpha_ptr, sizeof(AlphaClass));
            cudaMemcpy(alpha_ptr, &classes[i],sizeof(AlphaClass), cudaMemcpyHostToDevice);
            deviceClassList.push_back(alpha_ptr);

        }else if(classes[i]->getClass() == 1){
            cudaMalloc(&beta_ptr, sizeof(BetaClass));
            cudaMemcpy(beta_ptr, &classes[i],sizeof(BetaClass), cudaMemcpyHostToDevice);
            deviceClassList.push_back(beta_ptr);


        }else if(classes[i]->getClass() == 2){
            cudaMalloc(&cuda_ptr, sizeof(CudaClass));
            cudaMemcpy(cuda_ptr, &classes[i],sizeof(CudaClass), cudaMemcpyHostToDevice);
            deviceClassList.push_back(cuda_ptr);

        }else{
            //do nothing
        }
    }
    Class** class_ptr = thrust::raw_pointer_cast(&deviceClassList[0]);

    //ADD IT TO CUDA PARAM STRUCT
    GlobalConstants params;
    params.classList = class_ptr;
    cudaMemcpyToSymbol(cuConstRaytracerParams, &params, sizeof(GlobalConstants));

    useClass<<<1,1>>>();
    cudaDeviceSynchronize();

    ...cleanup code

When i run this i don't get the correct value and get the following results: 当我运行这个时,我没有得到正确的值,并得到以下结果:

alpha avg = 39696816 alpha avg = 39696816

beta avg = 70 beta avg = 70

cuda avg = 0 cuda avg = 0

And i don't get any results for string. 我没有得到任何字符串的结果。

The OP raises several questions. OP提出了几个问题。 The main question is in the title "How to use virtual class in cuda?". 主要问题在标题为“如何在cuda中使用虚拟课程?”。 An unrelated question is how to use strings in cuda code. 一个不相关的问题是如何在cuda代码中使用字符串。 I will focus mainly on the question in the title. 我将主要关注标题中的问题。

According to the cuda c programming guide you can use virtual functions but with limitations. 根据cuda c编程指南,您可以使用虚拟功能但有限制。 The limitation you run into is 你遇到的限制是

It is not allowed to pass as an argument to a __global__ function an object of a class derived from virtual base classes. 不允许将__global__函数作为参数传递给从虚拟基类派生的类的对象。

In your example code you try to avoid this by passing the object (array of device pointers) via constant memory. 在您的示例代码中,您尝试通过常量内存传递对象(设备指针数组)来避免这种情况。 However I think the programming guide is just not precise here. 不过我认为编程指南在这里并不准确。 I think it is not possible to copy an object of a class derived from virtual base classes to device . 我认为不可能将从虚拟基类派生的类的对象复制到设备 The problem is (as far as I understand) that you will copy the host virtual function table to device. 问题是(据我所知)您将主机虚拟功能表复制到设备。

The example code is much too complicated (and has other problems) to demonstrate the behaviour. 示例代码太复杂(并且有其他问题)来演示行为。 The following more simplified code shows what you can do with virtual functions in cuda: 以下更简化的代码显示了您可以使用cuda中的虚拟函数执行的操作:

#include <stdio.h>

class Class
{
public:
    __host__ __device__ virtual int getNumber() = 0;
    __host__ __device__ virtual ~Class() {};
};

class ClassA: public Class
{
public:
    int aNumber;
    __host__ __device__ ClassA(): aNumber(0){}

    __host__ __device__ int getNumber()
    {
        return aNumber;
    }
};

class ClassB: public Class
{
public:
    int aNumber;
    int anotherNumber;
    __host__ __device__ ClassB(): aNumber(1), anotherNumber(2){}

    __host__ __device__ int getNumber()
    {
        return aNumber;
    }
};

__global__ void invalidClassKernel( Class* superClass )
{
    printf( "superClass->getNumber(): %d\n", superClass->getNumber() );
}

__global__ void validClassKernel()
{
    Class* classVector[2];
    classVector[0] = new ClassA();
    classVector[1] = new ClassB();

    printf( "classVector[0]->getNumber(): %d\n", classVector[0]->getNumber() );
    printf( "classVector[1]->getNumber(): %d\n", classVector[1]->getNumber() );

    delete classVector[0];
    delete classVector[1];
}

int main()
{
    ClassA hostClassA;
    ClassB hostClassB;

    ClassA* devClassA;
    ClassA* devClassB;
    cudaMalloc( &devClassA, sizeof(ClassA) );
    cudaMalloc( &devClassB, sizeof(ClassB) );
    cudaMemcpy( devClassA, &hostClassA, sizeof( ClassA ), cudaMemcpyHostToDevice );
    cudaMemcpy( devClassB, &hostClassB, sizeof( ClassB ), cudaMemcpyHostToDevice );

    validClassKernel<<<1,1>>>();
    cudaDeviceSynchronize();
    cudaError_t error = cudaGetLastError();
    if(error!=cudaSuccess)
    {
        fprintf(stderr,"ERROR: validClassKernel: %s\n", cudaGetErrorString(error) );
    }

    invalidClassKernel<<<1,1>>>( devClassA );
    cudaDeviceSynchronize();
    error = cudaGetLastError();
    if(error!=cudaSuccess)
    {
        fprintf(stderr,"ERROR: invalidClassKernel: %s\n", cudaGetErrorString(error) );
    }
}

The validClassKernel() shows how you can store pointers of derived objects in an array of base class pointers and access the virtual function getNumber() . validClassKernel()显示了如何在基类指针数组中存储派生对象的指针并访问虚函数getNumber() In this example the objects are created in device code. 在此示例中,对象是在设备代码中创建的。

The invalidClassKernel() shows that you cannot use in device code a copy of an object derived from a virtual base class that was created on the host. invalidClassKernel()表明您不能在设备代码中使用从在主机上创建的虚拟基类派生的对象的副本。 The code compiles but the kernel fails with an illegal memory access was encountered . 代码编译但内核失败, an illegal memory access was encountered Very likely this is the main problem in the original example code. 这很可能是原始示例代码中的主要问题。


Other problems: 其他问题:

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM