diff --git a/Python/numbapig.py b/Python/numbapig.py
index 7f68a257951173dbe807385795a0fd1bfcc94eb1..f8a562e66edc598d9cd7c6e83f3c8d28eb634d11 100644
--- a/Python/numbapig.py
+++ b/Python/numbapig.py
@@ -112,4 +112,17 @@ mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
 print("both with CUDA kernel reduction:")
 print("   NPTS = %d, pi = %f"%(NPTS,pi[0]))
 print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
+#
+# both with CUDA kernel reduction and transfer
+#
+start_time = time.time()
+init[grid_size,block_size](arr)
+CUDA_reduce(arr,NPTS)
+CUDA_result(arr,result)
+pi = result.copy_to_host()
+end_time = time.time()
+mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
+print("both with CUDA kernel reduction and transfer:")
+print("   NPTS = %d, pi = %f"%(NPTS,pi[0]))
+print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))