You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
841 B
Python
31 lines
841 B
Python
import numpy as np
|
|
from pycuda import gpuarray
|
|
from time import time
|
|
|
|
# -- initialize the device
|
|
import pycuda.autoinit
|
|
|
|
print((gpuarray.to_gpu(np.array([1], dtype=np.float32)) * 1).get()) # this call speeds up the following gpu
|
|
# calculation, because at this point the gpu code gets compiled and cached for the next calls
|
|
|
|
|
|
# compare cpu and gpu
|
|
host_data = np.float32(np.random.random(50000000))
|
|
|
|
t1 = time()
|
|
host_data_2x = host_data * np.float32(2)
|
|
t2 = time()
|
|
|
|
print('total time to compute on CPU: %f' % (t2 - t1))
|
|
|
|
device_data = gpuarray.to_gpu(host_data)
|
|
|
|
t1 = time()
|
|
device_data_2x = device_data * np.float32(2)
|
|
t2 = time()
|
|
|
|
from_device = device_data_2x.get()
|
|
|
|
print('total time to compute on GPU: %f' % (t2 - t1))
|
|
print('Is the host computation the same as the GPU computation? : {}'.format(np.allclose(from_device, host_data_2x)))
|