首页用C++ CUDA实现向量所有元素求和。

用C++ CUDA实现向量所有元素求和。

时间: 2023-02-21 20:40:05 浏览: 17

下面是一份用C语言 + CUDA实现向量所有元素求和的代码示例： ``` #include <stdio.h> #include <cuda.h> __global__ void sum_vector_kernel(const float *vec, float *res, int n) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n) { atomicAdd(res, vec[i]); } } float sum_vector_on_gpu(const float *vec, int n) { float *dev_vec, dev_res; cudaMalloc((void **)&dev_vec, n * sizeof(float)); cudaMalloc((void **)&dev_res, sizeof(float)); cudaMemcpy(dev_vec, vec, n * sizeof(float), cudaMemcpyHostToDevice); cudaMemset(dev_res, 0, sizeof(float)); int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; sum_vector_kernel<<<num_blocks, block_size>>>(dev_vec, dev_res, n); float res; cudaMemcpy(&res, dev_res, sizeof(float), cudaMemcpyDeviceToHost); cudaFree(dev_vec); cudaFree(dev_res); return res; } int main() { float vec[] = {1, 2, 3, 4, 5, 6, 7, 8}; int n = sizeof(vec) / sizeof(vec[0]); float res = sum_vector_on_gpu(vec, n); printf("Sum of the vector: %f\n", res); return 0; } ``` 这份代码利用了 CUDA 的并行计算能力，通过在 GPU 上执行 `sum_vector_kernel` 函数来计算向量元素的和。该函数使用了 `blockIdx.x` 和 `threadIdx.x` 来确定每个线程在数组中的索引，并使用 `atomicAdd` 操作对结果进行累加。最后，将结果从 GPU 设备内存拷贝到主机内存，并返回结果。

阅读全文