# # numbapig.py # Neil Gershenfeld 2/9/20 # calculation of pi by a Numba CUDA sum # pi = 3.14159265358979323846 # from numba import cuda import numpy as np import time # # problem size # block_size = 2**10 grid_size = 2**20 NPTS = grid_size*block_size # # CUDA kernels # @cuda.jit def init(arr): i = 1+cuda.grid(1) arr[i] = 0.5/((i-0.75)*(i-0.25)) @cuda.reduce def sum_reduce(a,b): return a+b # # compile kernels # arr = cuda.device_array(NPTS,np.float32) init[grid_size,block_size](arr) pi = sum_reduce(arr) # # array calc # start_time = time.time() init[grid_size,block_size](arr) end_time = time.time() mflops = NPTS*4.0/(1.0e6*(end_time-start_time)) print("Numba CUDA array calculation:") print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # # reduction # start_time = time.time() pi = sum_reduce(arr) end_time = time.time() mflops = NPTS*1.0/(1.0e6*(end_time-start_time)) print("Numba CUDA reduction:") print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # # both # start_time = time.time() init[grid_size,block_size](arr) pi = sum_reduce(arr) end_time = time.time() mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) print("Numba CUDA both:") print(" NPTS = %d, pi = %f"%(NPTS,pi)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))