Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#
# numbapig.py
# Neil Gershenfeld 2/9/20
# calculation of pi by a Numba CUDA sum
# pi = 3.14159265358979323846
#
from numba import cuda
import numpy as np
import time
#
# problem size
#
block_size = 2**10
grid_size = 2**20
NPTS = grid_size*block_size
#
# CUDA kernels
#
@cuda.jit
def init(arr):
i = 1+cuda.grid(1)
arr[i] = 0.5/((i-0.75)*(i-0.25))
@cuda.reduce
def sum_reduce(a,b):
return a+b
#
# compile kernels
#
arr = cuda.device_array(NPTS,np.float32)
init[grid_size,block_size](arr)
pi = sum_reduce(arr)
#
# array calc
#
start_time = time.time()
init[grid_size,block_size](arr)
end_time = time.time()
mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
print("Numba CUDA array calculation:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# reduction
#
start_time = time.time()
pi = sum_reduce(arr)
end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
print("Numba CUDA reduction:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# both
#
start_time = time.time()
init[grid_size,block_size](arr)
pi = sum_reduce(arr)
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("Numba CUDA both:")
print(" NPTS = %d, pi = %f"%(NPTS,pi))
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))