Skip to content
Snippets Groups Projects
Commit 73d4808e authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

wip

parent 7ecacb5c
Branches
No related tags found
No related merge requests found
Pipeline #5078 passed
...@@ -36,6 +36,7 @@ def CUDA_reduce(arr,NPTS): ...@@ -36,6 +36,7 @@ def CUDA_reduce(arr,NPTS):
len = NPTS >> 1 len = NPTS >> 1
while (1): while (1):
CUDA_sum[grid_size,block_size](arr,len) CUDA_sum[grid_size,block_size](arr,len)
cuda.synchronize()
len = len >> 1 len = len >> 1
if (len == 0): if (len == 0):
return return
...@@ -64,6 +65,7 @@ CUDA_result(arr,result) ...@@ -64,6 +65,7 @@ CUDA_result(arr,result)
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
cuda.synchronize()
end_time = time.time() end_time = time.time()
mflops = NPTS*4.0/(1.0e6*(end_time-start_time)) mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
print("CUDA kernel array calculation:") print("CUDA kernel array calculation:")
...@@ -83,6 +85,7 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) ...@@ -83,6 +85,7 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
cuda.synchronize()
pi = Numba_reduce(arr) pi = Numba_reduce(arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
...@@ -104,8 +107,10 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) ...@@ -104,8 +107,10 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
cuda.synchronize()
CUDA_reduce(arr,NPTS) CUDA_reduce(arr,NPTS)
CUDA_result(arr,result) CUDA_result(arr,result)
cuda.synchronize()
end_time = time.time() end_time = time.time()
pi = result.copy_to_host() pi = result.copy_to_host()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment