You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.0 KiB
Python
38 lines
1.0 KiB
Python
4 years ago
|
import numpy as np
|
||
|
import pycuda.autoinit
|
||
|
from pycuda import gpuarray
|
||
|
from time import time
|
||
|
from pycuda.elementwise import ElementwiseKernel
|
||
|
|
||
|
host_data = np.float32(np.random.random(50000000))
|
||
|
|
||
|
gpu_2x_ker = ElementwiseKernel(
|
||
|
"float *in, float *out",
|
||
|
"out[i] = 2*in[i];",
|
||
|
"gpu_2x_ker")
|
||
|
|
||
|
# warm up
|
||
|
test_data = gpuarray.to_gpu(host_data)
|
||
|
gpu_2x_ker(test_data, gpuarray.empty_like(test_data))
|
||
|
|
||
|
|
||
|
def speed_comparison():
|
||
|
t1 = time()
|
||
|
host_data_2x = host_data * np.float32(2)
|
||
|
t2 = time()
|
||
|
print('total time to compute on CPU: %f' % (t2 - t1))
|
||
|
device_data = gpuarray.to_gpu(host_data)
|
||
|
# allocate memory for output
|
||
|
device_data_2x = gpuarray.empty_like(device_data)
|
||
|
t1 = time()
|
||
|
gpu_2x_ker(device_data, device_data_2x)
|
||
|
t2 = time()
|
||
|
from_device = device_data_2x.get()
|
||
|
print('total time to compute on GPU: %f' % (t2 - t1))
|
||
|
print(
|
||
|
'Is the host computation the same as the GPU computation? : {}'.format(np.allclose(from_device, host_data_2x)))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
speed_comparison()
|