master
Max Ehrlicher-Schmidt 4 years ago
parent b5c7cda269
commit c854f55af0

@ -111,7 +111,7 @@ if __name__ == "__main__":
parser.add_option("-e", "--end", dest="end",
help="numbers to check without even numbers", default="5000000000", type="int")
parser.add_option("--numbers-per-step", dest="numbers_per_step",
help="amount of uneven numbers checked in each step (even number are skipped)", default="4194304",
help="amount of uneven numbers checked in each step (even number are skipped)", default="8000000",
type="int")
parser.add_option("--output", dest="output",
help="name of the file, where the primes should be stored", default="primes.txt", type="string")
@ -124,7 +124,7 @@ if __name__ == "__main__":
block_size = 1024
start = 1
grid_size = int(math.ceil(options.numbers_per_step / block_size)) # half of numbers per
grid_size = int(math.ceil(options.numbers_per_step / block_size))
resulting_numbers_per_step = block_size * grid_size
last_number_checked = start - 1

@ -10,15 +10,14 @@ __global__ void scalar_multiply_kernel(float *outvec, float scalar, float *vec)
int i = threadIdx.x;
outvec[i] = scalar*vec[i];
}
""")
""") # compile kernel function
scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel")
scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel") # get kernel function reference
testvec = np.random.randn(512).astype(np.float32)
testvec_gpu = gpuarray.to_gpu(testvec)
outvec_gpu = gpuarray.empty_like(testvec_gpu)
host_vector = np.random.randn(512).astype(np.float32) # create array of 512 random numbers
device_vector = gpuarray.to_gpu(host_vector) # copy into GPUs global memory
out_device_vector = gpuarray.empty_like(device_vector) # allocate a chunk of empty memory to GPUs global memory
scalar_multiply_gpu(outvec_gpu, np.float32(2), testvec_gpu, block=(512, 1, 1), grid=(1, 1, 1))
print("Does our kernel work correctly? : {}".format(np.allclose(outvec_gpu.get(), 2 * testvec)))
print(outvec_gpu.get())
print(2 * testvec)
scalar_multiply_gpu(out_device_vector, np.float32(2), device_vector, block=(512, 1, 1), grid=(1, 1, 1)) # launch the kernel
print("Does our kernel work correctly? : {}".format(np.allclose(out_device_vector.get(), 2 * host_vector)))
print(out_device_vector.get())

Loading…
Cancel
Save