You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
748 B
Python
25 lines
748 B
Python
4 years ago
|
import pycuda.autoinit
|
||
|
import pycuda.driver as drv
|
||
|
import numpy as np
|
||
|
from pycuda import gpuarray
|
||
|
from pycuda.compiler import SourceModule
|
||
|
|
||
|
ker = SourceModule("""
|
||
|
__global__ void scalar_multiply_kernel(float *outvec, float scalar, float *vec)
|
||
|
{
|
||
|
int i = threadIdx.x;
|
||
|
outvec[i] = scalar*vec[i];
|
||
|
}
|
||
|
""")
|
||
|
|
||
|
scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel")
|
||
|
|
||
|
testvec = np.random.randn(512).astype(np.float32)
|
||
|
testvec_gpu = gpuarray.to_gpu(testvec)
|
||
|
outvec_gpu = gpuarray.empty_like(testvec_gpu)
|
||
|
|
||
|
scalar_multiply_gpu(outvec_gpu, np.float32(2), testvec_gpu, block=(512, 1, 1), grid=(1, 1, 1))
|
||
|
print("Does our kernel work correctly? : {}".format(np.allclose(outvec_gpu.get(), 2 * testvec)))
|
||
|
print(outvec_gpu.get())
|
||
|
print(2 * testvec)
|