Skip to content
Snippets Groups Projects
Commit 29d3f0dd authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

try peer

parent f932159b
No related branches found
No related tags found
No related merge requests found
Pipeline #5659 passed
//
// cudapip.cu
// Neil Gershenfeld 7/14/20
// calculation of pi by a CUDA multi-GPU peer sum
// pi = 3.14159265358979323846
//
#include <iostream>
#include <chrono>
#include <cstdint>
uint64_t blocks = 1024;
uint64_t threads = 1024;
uint64_t nloop = 1000000;
uint64_t npts = blocks*threads;
using namespace std;
__global__ void init(double *arr,uint64_t nloop,uint64_t npts,int ngpus,int index) {
uint64_t i = blockIdx.x*blockDim.x+threadIdx.x;
uint64_t start = nloop*i+npts*nloop*index+1;
uint64_t end = nloop*(i+1)+npts*nloop*index+1;
arr[i+index*npts] = 0;
for (uint64_t j = start; j < end; ++j)
arr[i+index*npts] += 0.5/((j-0.75)*(j-0.25));
}
void cudaCheck(string msg) {
cudaError err;
err = cudaGetLastError();
if (cudaSuccess != err)
cerr << msg << ": " << cudaGetErrorString(err) << endl;
}
int main(void) {
double *arr,*darr;
int ngpus;
cudaGetDeviceCount(&ngpus);
arr = new double[ngpus*npts];
cudaSetDevice(0);
cudaMalloc(&darr,ngpus*npts*sizeof(double));
for (int i = 1; i < ngpus; ++i) {
cudaSetDevice(i);
cudaDeviceEnablePeerAccess(0,0);
cudaCheck("peer access");
}
auto tstart = chrono::high_resolution_clock::now();
for (int i = 0; i < ngpus; ++i) {
cudaSetDevice(i);
init<<<blocks,threads>>>(darr,nloop,npts,ngpus,i);
}
for (int i = 1; i < ngpus; ++i) {
cudaSetDevice(i);
cudaDeviceSynchronize();
}
cudaSetDevice(0);
cudaMemcpy(arr,darr,ngpus*npts*sizeof(double),cudaMemcpyDeviceToHost);
double pi = 0;
for (int i = 0; i < ngpus*npts; ++i)
pi += arr[i];
auto tend = chrono::high_resolution_clock::now();
auto dt = chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count();
auto gflops = npts*nloop*ngpus*5.0/dt/1e3;
std::cout << "npts: " << npts << " nloop: " << nloop << " ngpus: " << ngpus << " pi: " << pi << '\n';
std::cout << "time: " << 1e-6*dt << " estimated GFlops: " << gflops << '\n';
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment