You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.0 KiB
Python

import numpy as np
import pycuda.autoinit
from pycuda import gpuarray
from time import time
from pycuda.elementwise import ElementwiseKernel
host_data = np.float32(np.random.random(50000000))
gpu_2x_ker = ElementwiseKernel(
"float *in, float *out",
"out[i] = 2*in[i];",
"gpu_2x_ker")
# warm up
test_data = gpuarray.to_gpu(host_data)
gpu_2x_ker(test_data, gpuarray.empty_like(test_data))
def speed_comparison():
t1 = time()
host_data_2x = host_data * np.float32(2)
t2 = time()
print('total time to compute on CPU: %f' % (t2 - t1))
device_data = gpuarray.to_gpu(host_data)
# allocate memory for output
device_data_2x = gpuarray.empty_like(device_data)
t1 = time()
gpu_2x_ker(device_data, device_data_2x)
t2 = time()
from_device = device_data_2x.get()
print('total time to compute on GPU: %f' % (t2 - t1))
print(
'Is the host computation the same as the GPU computation? : {}'.format(np.allclose(from_device, host_data_2x)))
if __name__ == '__main__':
speed_comparison()