import numpy as np import pycuda.autoinit from pycuda import gpuarray from time import time from pycuda.elementwise import ElementwiseKernel host_data = np.float32(np.random.random(50000000)) gpu_2x_ker = ElementwiseKernel( "float *in, float *out", "out[i] = 2*in[i];", "gpu_2x_ker") # warm up test_data = gpuarray.to_gpu(host_data) gpu_2x_ker(test_data, gpuarray.empty_like(test_data)) def speed_comparison(): t1 = time() host_data_2x = host_data * np.float32(2) t2 = time() print('total time to compute on CPU: %f' % (t2 - t1)) device_data = gpuarray.to_gpu(host_data) # allocate memory for output device_data_2x = gpuarray.empty_like(device_data) t1 = time() gpu_2x_ker(device_data, device_data_2x) t2 = time() from_device = device_data_2x.get() print('total time to compute on GPU: %f' % (t2 - t1)) print( 'Is the host computation the same as the GPU computation? : {}'.format(np.allclose(from_device, host_data_2x))) if __name__ == '__main__': speed_comparison()