简体   繁体   English

OpenCL/C:结果始终为零

[英]OpenCL/C: results always zero

I'm using Ubntu 20.04 to run my Opencl program with C but whatever the function i execute the results is always zero or other illogical results, it's like kernel not working, Please if anyone has OpenCL, please try this program i want to know if the problem is in the program or in the installation of OpenCL.我正在使用 Ubntu 20.04 用 C 运行我的 Opencl 程序,但是无论我执行什么函数,结果总是为零或其他不合逻辑的结果,就像内核不工作一样,如果有人有 OpenCL,请尝试这个程序我想知道是否问题出在程序或 OpenCL 的安装中。 This is the programme i use.这是我使用的程序。

#define CL_USE_DEPRECATED_OPENCL_1_2APIS
#include <stdio.h>
#include <stdlib.h>
 
#include <CL/cl.h>

#define MAX_SOURCE_SIZE (0x100000)
 
int main(void) {
    // Create the two input vectors
    int i;
    const int LIST_SIZE = 10;
    int* A = (int*)malloc(sizeof(int)*LIST_SIZE);
    int* B = (int*)malloc(sizeof(int)*LIST_SIZE);
    int* C = (int*)malloc(sizeof(int)*LIST_SIZE);
    for(i = 0; i < LIST_SIZE; i++) {
        A[i] = i;
        B[i] = LIST_SIZE - i;
        C[i] = 0;
    }
    
    // Load the kernel source code into the array source_str
    /*FILE *fp;
    char *source_str;
   
 
    fp = fopen("vector_add_kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    size_t source_size;
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );*/
    //size_t source_size;

    const char* source_str =
        "__kernel void vector_add(__global int *A, __global int *B, __global int *C) {\n"
        "    int i = get_global_id(0);\n"
        "    if(i>=10) return;\n"
        "    C[i] = A[i]+B[i];\n"
        "}"
    ;
    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;   
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1, 
            &device_id, &ret_num_devices);
 
    // Create an OpenCL context
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
 
    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device_id, 0, &ret);
 
    // Create memory buffers on the device for each vector 
    cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, 
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
            LIST_SIZE * sizeof(int), NULL, &ret);
 
    // Copy the lists A and B to their respective memory buffers
    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
            LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
 
    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1, 
            (const char **)&source_str, NULL, &ret);
 
    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
 
    // Set the arguments of the kernel
 
   ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
 
    // Execute the OpenCL kernel on the list
    size_t local_item_size = 64; // Divide work items into groups of 64
    size_t global_item_size = ((LIST_SIZE+local_item_size-1)/local_item_size)*local_item_size;  // Process the entire lists
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
            &global_item_size, &local_item_size, 0, NULL, NULL);
     clFinish(command_queue);
    // Read the memory buffer C on the device to the local variable C
    
    ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
 
    // Display the result to the screen
    for(i = 0; i < LIST_SIZE; i++)
        printf("%d + %d = %d\n", A[i], B[i], C[i]);
    
    // Clean up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(a_mem_obj);
    ret = clReleaseMemObject(b_mem_obj);
    ret = clReleaseMemObject(c_mem_obj);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);
    free(A);
    free(B);
    free(C);
    return 0;
}

The code looks oddly familiar to me... With my PC it works as intended.该代码对我来说看起来很熟悉......在我的电脑上它可以按预期工作。 I suppose the issue is that on your system no OpenCL device is detected.我想问题是在您的系统上没有检测到 OpenCL 设备。 With this code snippet, you can select a platform and device:使用此代码段,您可以选择平台和设备:

// Get platform and device information
    const int select_platform = 0;
    const int select_device = 0;
    cl_platform_id* platform_ids = NULL;
    cl_uint nr_platforms = 0;
    cl_device_id* device_ids = NULL;   
    cl_uint nr_devices = 0;
    cl_device_id device_id = NULL;
    cl_int ret = clGetPlatformIDs(0, NULL, &nr_platforms);
    if(ret!=CL_SUCCESS || nr_platforms==0) std::cerr << "no OpenCL platforms found" << std::endl;
    platform_ids = (cl_platform_id*)malloc(nr_platforms * sizeof(*platform_ids));
    ret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms);
    ret = clGetDeviceIDs(platform_ids[select_platform], CL_DEVICE_TYPE_ALL, 0, device_ids, &nr_devices);
    if(ret!=CL_SUCCESS || nr_devices==0) std::cerr << "no OpenCL devices found on that platform" << std::endl;
    device_ids = (cl_device_id*)malloc(nr_devices * sizeof(*device_ids));
    ret = clGetDeviceIDs(platform_ids[select_platform], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices);
    device_id = device_ids[select_device];
    char name[1024];
    clGetDeviceInfo(device_id, CL_DEVICE_NAME, 1024, name, NULL);
    std::cout << "selected device: " << name << std::endl;

Check different values of select_platform and select_device .检查select_platformselect_device的不同值。 If there is no device found on any platform, make sure to have compatible hardware and the latest graphics drivers or CPU runtime installed.如果在任何平台上都找不到设备,请确保安装了兼容的硬件和最新的图形驱动程序或 CPU 运行时。


To make OpenCL development much less painful, consider this OpenCL-Wrapper .为了减少 OpenCL 开发的痛苦,请考虑这个OpenCL-Wrapper This automatically selects the fastest available OpenCL device for you in 1 line of code:这会在 1 行代码中自动为您选择最快的可用 OpenCL 设备:

Device device(select_device_with_most_flops());

Alternatively, you can autiomatically select the device with most memory或者,您可以自动选择内存最多的设备

Device device(select_device_with_most_memory());

or a device with specified ID:或具有指定 ID 的设备:

Device device(select_device_with_id(1));

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM