它优化后的示例代码
时间: 2023-08-20 19:04:06 浏览: 32
以下是使用多线程优化矩阵乘法程序的示例代码:
```
#include <iostream>
#include <fstream>
#include <random>
#include <sys/time.h>
#include <thread>
#include <vector>
#include <mutex>
#include <condition_variable>
#include <atomic>
#define M 1024
#define N 4096
#define MIN 1
#define MAX 50
#define NUM_THREADS 4
using namespace std;
double A[M * N];
double B[N * M];
double res[M * M];
mutex mtx;
condition_variable cv;
atomic<int> task_id(0);
bool finished = false;
void matmul(int tid) {
while (true) {
int id = task_id.fetch_add(1);
if (id >= M) {
break;
}
for (int j = 0; j < M; ++j) {
double t = 0.0;
for (int k = 0; k < N; ++k) {
t += A[id * N + k] * B[j * M + k];
}
mtx.lock();
res[id * M + j] = t;
mtx.unlock();
}
}
unique_lock<mutex> ul(mtx);
if (--NUM_THREADS == 0) {
finished = true;
}
cv.notify_one();
}
int main(int argc, const char * argv[]) {
// random_device rd;
default_random_engine eng(30);
uniform_real_distribution<double> distr(MIN, MAX);
// init matrix
for (int i = 0; i < M * N; ++i) {
A[i] = distr(eng);
}
for (int i = 0; i < M * N; ++i) {
B[i] = distr(eng);
}
struct timeval start, end;
gettimeofday(&start, NULL);
// matmul
vector<thread> threads(NUM_THREADS);
for (int i = 0; i < NUM_THREADS; ++i) {
threads[i] = thread(matmul, i);
}
unique_lock<mutex> ul(mtx);
while (!finished) {
cv.wait(ul);
}
gettimeofday(&end, NULL);
printf("Using time : %f ms\n", (end.tv_sec - start.tv_sec) * 1000 + (end.tv_usec - start.tv_usec) / 1000.0);
// output
FILE* out = fopen("result_4096.txt", "w");
for (int i = 0; i < M; ++i) {
for (int j = 0; j < N; ++j) {
fprintf(out, "%.6lf\t", res[i * M + j]);
}
fprintf(out, "\n");
}
// free
fclose(out);
return 0;
}
```
该程序使用了4个线程来并行计算矩阵乘法的结果,其中主线程负责分配任务和等待所有线程完成,子线程负责计算矩阵元素并将结果合并到结果矩阵中。程序使用了互斥锁和条件变量来同步线程之间的访问和任务分配,使用了原子操作来保证任务分配的正确性。该程序使用了C++11中的线程库和mutex库,也可以使用其他线程库来实现。