diff --git a/CUDA/cudapi.cu b/CUDA/cudapi.cu index dd7ac6aa1fe118796836a6ae8302a1e79a694431..73276a9ae9651416ff20c4e0ce677dbc2d925f15 100755 --- a/CUDA/cudapi.cu +++ b/CUDA/cudapi.cu @@ -28,16 +28,25 @@ void reduce(double *arr) { uint64_t len = npts >> 1; while (1) { reduce_sum<<<blocks,threads>>>(arr,len); + cudaCheck("reduce"); len = len >> 1; if (len == 0) return; } } +void cudaCheck(string msg) { + cudaError err; + err = cudaGetLastError(); + if (cudaSuccess != err) + cerr << msg << ": " << cudaGetErrorString(err) << endl; + } int main(void) { double harr[1],*darr; cudaMalloc(&darr,npts*sizeof(double)); + cudaCheck("cudaMalloc"); auto tstart = std::chrono::high_resolution_clock::now(); init<<<blocks,threads>>>(darr,nloop); + cudaCheck("init"); reduce(darr); cudaDeviceSynchronize(); auto tend = std::chrono::high_resolution_clock::now();