From 73d4808e876abdcaa27691cf6c6bb6fde2b7ee37 Mon Sep 17 00:00:00 2001 From: Neil Gershenfeld <gersh@cba.mit.edu> Date: Sat, 29 Feb 2020 17:12:37 -0500 Subject: [PATCH] wip --- Python/numbapig.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/numbapig.py b/Python/numbapig.py index f8a562e..71c8eb8 100644 --- a/Python/numbapig.py +++ b/Python/numbapig.py @@ -36,6 +36,7 @@ def CUDA_reduce(arr,NPTS): len = NPTS >> 1 while (1): CUDA_sum[grid_size,block_size](arr,len) + cuda.synchronize() len = len >> 1 if (len == 0): return @@ -64,6 +65,7 @@ CUDA_result(arr,result) # start_time = time.time() init[grid_size,block_size](arr) +cuda.synchronize() end_time = time.time() mflops = NPTS*4.0/(1.0e6*(end_time-start_time)) print("CUDA kernel array calculation:") @@ -83,6 +85,7 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # start_time = time.time() init[grid_size,block_size](arr) +cuda.synchronize() pi = Numba_reduce(arr) end_time = time.time() mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) @@ -104,8 +107,10 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # start_time = time.time() init[grid_size,block_size](arr) +cuda.synchronize() CUDA_reduce(arr,NPTS) CUDA_result(arr,result) +cuda.synchronize() end_time = time.time() pi = result.copy_to_host() mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) -- GitLab