矩阵相乘的另一种算法是 FOX 算法,请写出 FOX 算法的并行程序。
#include "mpi.h"
#include <algorithm>
#include <fstream>
#include <cmath>
const int root_id = 0;
const int max_procs_size = 16;
int main(int argc,char *argv[])
{
double start_time, end_time, time;
int procs_id, procs_size;
MPI_Status status;
MPI_Request reqSend, reqRecv;
MPI_Init(&argc,&argv);
start_time = MPI_Wtime();
MPI_Comm_size(MPI_COMM_WORLD,&procs_size);
MPI_Comm_rank(MPI_COMM_WORLD,&procs_id);
// 参数检查
int N=0;
{
for (int i=1; i<argc; ++i ) ...{
char * pos =strstr(argv[i], "-N=");
if ( pos!=NULL) ...{
sscanf(pos, "-N=%d", &N);
break;
}
}
}
const int procs_size_sqrt = floor(sqrt(static_cast<double>(procs_size)));
const int n = N / procs_size_sqrt;
const int n_sqr = n*n;
if (procs_size<4 || procs_size> max_procs_size) ...{
printf("The fox algorithm requires at least 4 processors and at most %d processors. ",
max_procs_size);
MPI_Finalize();
return 0;
}