[英]OpenCL - Kernel method returns unexpected results
我是OpenCL的初學者。 我試圖運行一個非常簡單的內核代碼,將vector的每個值加1。 一切運行正常,不返回任何錯誤代碼(我在每個步驟之后檢查了返回值)。 源代碼:
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj , resobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t work_units_per_kernels;
int input[10] = {1,2,3,4,5,6,7,8,9,10};
int output[10];
int length = 10 ;
FILE *fp;
char fileName[] = "/home/tuan/OpenCLPlayaround/hello.cl";
char *source_str;
size_t source_size;
/* Load the source code containing the kernel*/
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(0x100000);
source_size = fread(source_str,1,0x100000, fp);
fclose(fp);
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
std::cout<<ret<<" code"<<std::endl;
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
std::cout<<ret<<" code"<<std::endl;
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
std::cout<<ret<<" code"<<std::endl;
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
//Check Concept of memory
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,length * sizeof(int), input, &ret);
resobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length * sizeof(int), output, &ret);
std::cout<<ret<<" code"<<std::endl;
program = clCreateProgramWithSource(context,1,(const char**)&source_str, (const size_t*)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
kernel = clCreateKernel(program, "hello", &ret);
ret = clSetKernelArg(kernel,0, sizeof(memobj),(void *)&memobj);
ret = clSetKernelArg(kernel,1, sizeof(resobj),(void *)&resobj);
ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);
ret = clEnqueueReadBuffer(command_queue, resobj, CL_TRUE, 0, length* sizeof(int),output, 0, NULL, NULL);
for (int i = 0 ; i <10 ; i++) {
std::cout<<output[i]<<" "<<std::endl;
}
return 0;
結果有些奇怪,而應該是{2,3,4,5,6,7,8,9,10,11}:
2
-16777216
65535
1
-1242789408
32767
4201449
0
2
0
而我的內核:
__kernel void hello(__global int* a, __global int* b)
{
int sam = 0;
int gid = get_global_id(0);
b[gid] = sam + a[gid] +1 ;
}
有人可以解釋為什么嗎? 好幾個小時我的頭都爆了!
clEnqueueTask
等效於調用clEnqueueNDRangeKernel
其中work_dim = 1
, global_work_offset = NULL
, global_work_size[0]
設置為1
, local_work_size[0]
設置為1
。
因此請使用clEnqueueNDRangeKernel
。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.