master
Max Ehrlicher-Schmidt 4 years ago
parent b5c7cda269
commit c854f55af0

@ -111,7 +111,7 @@ if __name__ == "__main__":
parser.add_option("-e", "--end", dest="end", parser.add_option("-e", "--end", dest="end",
help="numbers to check without even numbers", default="5000000000", type="int") help="numbers to check without even numbers", default="5000000000", type="int")
parser.add_option("--numbers-per-step", dest="numbers_per_step", parser.add_option("--numbers-per-step", dest="numbers_per_step",
help="amount of uneven numbers checked in each step (even number are skipped)", default="4194304", help="amount of uneven numbers checked in each step (even number are skipped)", default="8000000",
type="int") type="int")
parser.add_option("--output", dest="output", parser.add_option("--output", dest="output",
help="name of the file, where the primes should be stored", default="primes.txt", type="string") help="name of the file, where the primes should be stored", default="primes.txt", type="string")
@ -124,7 +124,7 @@ if __name__ == "__main__":
block_size = 1024 block_size = 1024
start = 1 start = 1
grid_size = int(math.ceil(options.numbers_per_step / block_size)) # half of numbers per grid_size = int(math.ceil(options.numbers_per_step / block_size))
resulting_numbers_per_step = block_size * grid_size resulting_numbers_per_step = block_size * grid_size
last_number_checked = start - 1 last_number_checked = start - 1

@ -10,15 +10,14 @@ __global__ void scalar_multiply_kernel(float *outvec, float scalar, float *vec)
int i = threadIdx.x; int i = threadIdx.x;
outvec[i] = scalar*vec[i]; outvec[i] = scalar*vec[i];
} }
""") """) # compile kernel function
scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel") scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel") # get kernel function reference
testvec = np.random.randn(512).astype(np.float32) host_vector = np.random.randn(512).astype(np.float32) # create array of 512 random numbers
testvec_gpu = gpuarray.to_gpu(testvec) device_vector = gpuarray.to_gpu(host_vector) # copy into GPUs global memory
outvec_gpu = gpuarray.empty_like(testvec_gpu) out_device_vector = gpuarray.empty_like(device_vector) # allocate a chunk of empty memory to GPUs global memory
scalar_multiply_gpu(outvec_gpu, np.float32(2), testvec_gpu, block=(512, 1, 1), grid=(1, 1, 1)) scalar_multiply_gpu(out_device_vector, np.float32(2), device_vector, block=(512, 1, 1), grid=(1, 1, 1)) # launch the kernel
print("Does our kernel work correctly? : {}".format(np.allclose(outvec_gpu.get(), 2 * testvec))) print("Does our kernel work correctly? : {}".format(np.allclose(out_device_vector.get(), 2 * host_vector)))
print(outvec_gpu.get()) print(out_device_vector.get())
print(2 * testvec)

Loading…
Cancel
Save