簡體   English   中英

clCloneKernel 導致分段錯誤

[英]clCloneKernel causes a segmentation fault

我正在試驗 clCloneKernel 以查看內核如何被多個主機線程使用。 OpenCL 規范聲明設置內核參數(除其他外)不是線程安全的。 因此,如果多個主機線程需要調用同一個內核,則使用 clCloneKenrel 應該可以提供解決方法。

問題在於,一旦在初始化的 cl_kernel 對象上調用 clCloneKernel(無論是在設置參數之前還是之后),都​​會導致程序段錯誤。

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>

#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

using namespace std;

const int MAXNUMDEV = 10;

string kernSource = "       \
kernel void hello()   \
{                     \
   int ID = get_global_id(0);  \
   int grID = get_group_id(0);  \
   printf(\"Work item %i from group %i says hello!\\n\", ID, grID); \
}";

//============================================
void cleanUp (cl_context c, cl_command_queue q, cl_program p, cl_kernel k)
{
  if (k != 0)
    clReleaseKernel (k);

  if (p != 0)
    clReleaseProgram (p);

  if (q != 0)
    clReleaseCommandQueue (q);

  if (c != 0)
    clReleaseContext (c);
}
//============================================
int main ()
{
  cl_int errNum;
  cl_uint numPlatforms;
  cl_platform_id firstPlatformId;
  cl_device_id devID[MAXNUMDEV];
  cl_uint numDev;
  cl_context cont = 0;          // initialize for cleanup check
  cl_command_queue q = 0;
  cl_program pr = 0;
  cl_kernel kernel = 0;

  // Get a reference to an object representing a platform 
  errNum = clGetPlatformIDs (1, &firstPlatformId, &numPlatforms);
  if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
      cerr << "Failed to find any OpenCL platforms." << endl;
      return 1;
    }

  // Get the device IDs matching the CL_DEVICE_TYPE parameter, up to the MAXNUMDEV limit
  errNum = clGetDeviceIDs (firstPlatformId, CL_DEVICE_TYPE_ALL, MAXNUMDEV, devID, &numDev);
  if (errNum != CL_SUCCESS || numDev <= 0)
    {
      cerr << "Failed to find any OpenCL devices." << endl;
      return 2;
    }

  char devName[100];
  size_t nameLen;
  for (int i = 0; i < numDev; i++)
    {
      errNum = clGetDeviceInfo (devID[i], CL_DEVICE_NAME, 100, (void *) devName, &nameLen);
      if (errNum == CL_SUCCESS)
        cout << "Device " << i << " is " << devName << endl;
    }


  cl_context_properties prop[] = {
    CL_CONTEXT_PLATFORM,
    (cl_context_properties) firstPlatformId,
    0                           // termination
  };

  cont = clCreateContext (prop, numDev, devID, NULL,    // no callback function
                          NULL, // no data for callback
                          &errNum);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to create a context." << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  cl_queue_properties qprop[] = {
    CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
    0
  };
  q = clCreateCommandQueueWithProperties (cont, devID[0], qprop, &errNum);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to create a command queue" << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  const char *src = kernSource.c_str ();
  size_t len = kernSource.size ();
  pr = clCreateProgramWithSource (cont, 1, (const char **) (&src), &len, &errNum);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to create program." << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  errNum = clBuildProgram (pr, 1, devID, NULL, NULL, NULL);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to build program" << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }


  kernel = clCreateKernel (pr, "hello", &errNum);
  if (errNum != CL_SUCCESS || kernel == NULL)
    {
      cerr << "Failed to create kernel" << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  cl_kernel copyKern = clCloneKernel(kernel, &errNum); // <<<<<<<<<<<<<<<

  // work item index space and group size setup
  size_t idxSpace[] = { 12 };
  size_t localWorkSize[] = { 3 };

  cl_event completeEv;
  errNum = clEnqueueNDRangeKernel (q, kernel, 1, NULL, idxSpace, localWorkSize, 0, NULL, &completeEv);

  // wait for enqueued command to finish
  clWaitForEvents (1, &completeEv);

  cleanUp (cont, q, pr, kernel);
  return 0;
}

clCloneKernel()是在 OpenCL 2.1 中引入的。 您的 OpenCL 平台是否實現了該版本的標准? 我懷疑它可能不會,因此崩潰。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM