opencl使用demo
时间: 2023-07-11 12:53:01 浏览: 164
以下是一个简单的 OpenCL 使用示例,它执行向量加法:
```c
#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
#define LENGTH 1024
const char *kernelSource =
"__kernel void vectorAdd(__global float *a, __global float *b, __global float *c) {\n"
" int i = get_global_id(0);\n"
" c[i] = a[i] + b[i];\n"
"}\n";
int main() {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_mem a, b, c;
cl_int err;
int i;
float *A, *B, *C;
// 初始化 A, B, C 数组
A = (float*) malloc(sizeof(float) * LENGTH);
B = (float*) malloc(sizeof(float) * LENGTH);
C = (float*) malloc(sizeof(float) * LENGTH);
for (i = 0; i < LENGTH; i++) {
A[i] = (float) i;
B[i] = (float) (LENGTH - i);
C[i] = 0.0f;
}
// 获取平台和设备
err = clGetPlatformIDs(1, &platform, NULL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
// 创建 OpenCL 上下文和命令队列
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
queue = clCreateCommandQueue(context, device, 0, &err);
// 创建和编译内核程序
program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err);
err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
// 创建内核
kernel = clCreateKernel(program, "vectorAdd", &err);
// 创建和设置缓冲区
a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err);
b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err);
c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH, NULL, &err);
err = clEnqueueWriteBuffer(queue, a, CL_TRUE, 0, sizeof(float) * LENGTH, A, 0, NULL, NULL);
err = clEnqueueWriteBuffer(queue, b, CL_TRUE, 0, sizeof(float) * LENGTH, B, 0, NULL, NULL);
// 设定内核参数
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c);
// 执行内核
size_t globalSize = LENGTH;
size_t localSize = 64;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL);
// 读取结果
err = clEnqueueReadBuffer(queue, c, CL_TRUE, 0, sizeof(float) * LENGTH, C, 0, NULL, NULL);
// 打印结果
for (i = 0; i < LENGTH; i++) {
printf("%f + %f = %f\n", A[i], B[i], C[i]);
}
// 释放内存和资源
free(A);
free(B);
free(C);
clReleaseMemObject(a);
clReleaseMemObject(b);
clReleaseMemObject(c);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
```
这个程序的作用是将两个向量相加,并打印结果。程序首先初始化了三个数组 A, B, C,然后获取 OpenCL 平台和设备,创建上下文和命令队列,创建和编译内核程序,创建内核,创建和设置缓冲区,设定内核参数,执行内核,读取结果,打印结果,最后释放内存和资源。
阅读全文