// // mpithreadpi.cpp // Neil Gershenfeld 3/3/20 // calculation of pi by an MPI thread sum // pi = 3.14159265358979323846 // #include <iostream> #include <chrono> #include <thread> #include <vector> #include <cstdint> #include <mpi.h> #define nloop 10 #define npts 1e9 unsigned int nthreads = std::thread::hardware_concurrency(); std::vector<double> results(nthreads); void partialsum(int index, int rank) { uint64_t start = npts*index+npts*nthreads*rank+1; uint64_t end = npts*(index+1)+npts*nthreads*rank+1; double result = 0; for (uint64_t i = start; i < end; ++i) result += 0.5/((i-0.75)*(i-0.25)); results[index] = result; } int main(int argc, char** argv) { double sum,pi,mflops,max; int i,j,rank,nproc; std::thread threads[nthreads]; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&nproc); if (rank == 0) { for (j = 0; j < nloop; ++j) { MPI_Barrier(MPI_COMM_WORLD); auto tstart = std::chrono::high_resolution_clock::now(); sum = 0.0; for (int i = 0; i < nthreads; ++i) threads[i] = std::thread(partialsum,i,rank); for (int i = 0; i < nthreads; ++i) { threads[i].join(); sum += results[i]; } MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); auto tend = std::chrono::high_resolution_clock::now(); auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count(); auto mflops = npts*nthreads*nproc*5.0/dt; if (mflops > max) max = mflops; std::cout << "npts: " << npts << " nthreads: " << nthreads << " nproc: " << nproc << " pi: " << pi << '\n'; std::cout << "time: " << 1e-6*dt << " estimated MFlops: " << mflops << " max: " << max << '\n'; } } else { for (j = 0; j < nloop; ++j) { MPI_Barrier(MPI_COMM_WORLD); sum = 0.0; for (int i = 0; i < nthreads; ++i) threads[i] = std::thread(partialsum,i,rank); for (int i = 0; i < nthreads; ++i) { threads[i].join(); sum += results[i]; } MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); } } MPI_Finalize(); return 0; }