//
// threadpi.cpp
// Neil Gershenfeld 3/1/20
// calculation of pi by a C++ thread sum
// pi = 3.14159265358979323846 
//
#include <iostream>
#include <chrono>
#include <thread>
#include <vector>
unsigned int npts = 2e7;
unsigned int nthreads = std::thread::hardware_concurrency();
std::vector<double> results(nthreads);
void sum(int index) {
   unsigned int start = npts*index+1;
   unsigned int end = npts*(index+1)+1;
   double result = 0;
   for (unsigned int i = start; i < end; ++i)
      result += 0.5/((i-0.75)*(i-0.25));
   results[index] = result;
   }
int main(void) {
   double pi = 0;
   std::thread threads[nthreads];
   auto tstart = std::chrono::high_resolution_clock::now();        
   for (int i = 0; i < nthreads; ++i)
      threads[i] = std::thread(sum,i);
   for (int i = 0; i < nthreads; ++i) {
      threads[i].join();
      pi += results[i];
      }
   auto tend = std::chrono::high_resolution_clock::now();        
	auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count();
   auto mflops = npts*nthreads*5.0/dt;
   std::cout << "npts: " << npts << " nthreads: " << nthreads << " pi: " << pi << '\n';
   std::cout << "time: " << 1e-6*dt << " estimated MFlops: " << mflops << '\n';
   return 0;
   }