From 73d4808e876abdcaa27691cf6c6bb6fde2b7ee37 Mon Sep 17 00:00:00 2001
From: Neil Gershenfeld <gersh@cba.mit.edu>
Date: Sat, 29 Feb 2020 17:12:37 -0500
Subject: [PATCH] wip

---
 Python/numbapig.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Python/numbapig.py b/Python/numbapig.py
index f8a562e..71c8eb8 100644
--- a/Python/numbapig.py
+++ b/Python/numbapig.py
@@ -36,6 +36,7 @@ def CUDA_reduce(arr,NPTS):
    len = NPTS >> 1
    while (1):
       CUDA_sum[grid_size,block_size](arr,len)
+      cuda.synchronize()
       len = len >> 1
       if (len == 0):
          return
@@ -64,6 +65,7 @@ CUDA_result(arr,result)
 #
 start_time = time.time()
 init[grid_size,block_size](arr)
+cuda.synchronize()
 end_time = time.time()
 mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
 print("CUDA kernel array calculation:")
@@ -83,6 +85,7 @@ print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
 #
 start_time = time.time()
 init[grid_size,block_size](arr)
+cuda.synchronize()
 pi = Numba_reduce(arr)
 end_time = time.time()
 mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
@@ -104,8 +107,10 @@ print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
 #
 start_time = time.time()
 init[grid_size,block_size](arr)
+cuda.synchronize()
 CUDA_reduce(arr,NPTS)
 CUDA_result(arr,result)
+cuda.synchronize()
 end_time = time.time()
 pi = result.copy_to_host()
 mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
-- 
GitLab