diff --git a/primesGPU.py b/primesGPU.py index 84af71d..9df20af 100644 --- a/primesGPU.py +++ b/primesGPU.py @@ -111,7 +111,7 @@ if __name__ == "__main__": parser.add_option("-e", "--end", dest="end", help="numbers to check without even numbers", default="5000000000", type="int") parser.add_option("--numbers-per-step", dest="numbers_per_step", - help="amount of uneven numbers checked in each step (even number are skipped)", default="4194304", + help="amount of uneven numbers checked in each step (even number are skipped)", default="8000000", type="int") parser.add_option("--output", dest="output", help="name of the file, where the primes should be stored", default="primes.txt", type="string") @@ -124,7 +124,7 @@ if __name__ == "__main__": block_size = 1024 start = 1 - grid_size = int(math.ceil(options.numbers_per_step / block_size)) # half of numbers per + grid_size = int(math.ceil(options.numbers_per_step / block_size)) resulting_numbers_per_step = block_size * grid_size last_number_checked = start - 1 diff --git a/simpleCustomKernel.py b/simpleCustomKernel.py index 188f8d1..df6f1b4 100644 --- a/simpleCustomKernel.py +++ b/simpleCustomKernel.py @@ -10,15 +10,14 @@ __global__ void scalar_multiply_kernel(float *outvec, float scalar, float *vec) int i = threadIdx.x; outvec[i] = scalar*vec[i]; } -""") +""") # compile kernel function -scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel") +scalar_multiply_gpu = ker.get_function("scalar_multiply_kernel") # get kernel function reference -testvec = np.random.randn(512).astype(np.float32) -testvec_gpu = gpuarray.to_gpu(testvec) -outvec_gpu = gpuarray.empty_like(testvec_gpu) +host_vector = np.random.randn(512).astype(np.float32) # create array of 512 random numbers +device_vector = gpuarray.to_gpu(host_vector) # copy into GPUs global memory +out_device_vector = gpuarray.empty_like(device_vector) # allocate a chunk of empty memory to GPUs global memory -scalar_multiply_gpu(outvec_gpu, np.float32(2), testvec_gpu, block=(512, 1, 1), grid=(1, 1, 1)) -print("Does our kernel work correctly? : {}".format(np.allclose(outvec_gpu.get(), 2 * testvec))) -print(outvec_gpu.get()) -print(2 * testvec) +scalar_multiply_gpu(out_device_vector, np.float32(2), device_vector, block=(512, 1, 1), grid=(1, 1, 1)) # launch the kernel +print("Does our kernel work correctly? : {}".format(np.allclose(out_device_vector.get(), 2 * host_vector))) +print(out_device_vector.get())