From 7eb96a69ec2a1c3d641150c7bc844af43e388bd2 Mon Sep 17 00:00:00 2001 From: Max Ehrlicher-Schmidt Date: Wed, 2 Dec 2020 18:40:52 +0100 Subject: [PATCH] Improve time measuring --- .gitignore | 2 ++ primesGPU.py | 36 ++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 95b0275..b5686f7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /shelf/ /workspace.xml .idea +primes.txt +timings.csv \ No newline at end of file diff --git a/primesGPU.py b/primesGPU.py index 4dcc443..20dc252 100644 --- a/primesGPU.py +++ b/primesGPU.py @@ -33,10 +33,10 @@ check_prime(unsigned long long *input, bool *output) """) ker2 = SourceModule(""" -__global__ void check_prime2(const unsigned long *IN, bool *OUT) { +__global__ void check_prime2(const unsigned long long *IN, bool *OUT) { int id = threadIdx.x + blockDim.x * blockIdx.x; - unsigned long num = IN[id]; - unsigned long limit = (unsigned long) sqrt((double) num) + 1; + unsigned long long num = IN[id]; + unsigned long long limit = (unsigned long long) sqrt((double) num) + 1; if (num == 2 || num == 3) { OUT[id] = true; @@ -45,7 +45,7 @@ __global__ void check_prime2(const unsigned long *IN, bool *OUT) { return; } if (limit < 9) { - for (unsigned long i = 3; i <= limit; i++) { + for (unsigned long long i = 3; i <= limit; i++) { if (num % i == 0) { return; } @@ -54,7 +54,7 @@ __global__ void check_prime2(const unsigned long *IN, bool *OUT) { if (num > 3 && num % 3 == 0) { return; } - for (unsigned long i = 9; i <= (limit + 6); i += 6) { + for (unsigned long long i = 9; i <= (limit + 6); i += 6) { if (num % (i - 2) == 0 || num % (i - 4) == 0) { return; } @@ -76,26 +76,32 @@ def calc_primes(start: int = 1, grid_size: int = 1000, block_size: int = 1024): if start % 2 == 0: start = start + 1 - testvec = np.arange(start, block_size * grid_size * 2 + start, step=2).astype(np.uint) + startEvent = drv.Event() + endEvent = drv.Event() + + testvec = np.arange(start, block_size * grid_size * 2 + start, step=2).astype(np.ulonglong) testvec_gpu = gpuarray.to_gpu(testvec) outvec_gpu = gpuarray.to_gpu(np.full(block_size * grid_size, False, dtype=bool)) - t1 = time() + startEvent.record() check_prime(testvec_gpu, outvec_gpu, block=(block_size, 1, 1), grid=(grid_size, 1, 1)) + endEvent.record() + endEvent.synchronize() + kernel_execution_time = startEvent.time_till(endEvent) + result = outvec_gpu.get() - t2 = time() for idx, val in enumerate(result): if val: primes.append(testvec[idx]) - print(len(primes)) - print('checked ' + str(block_size * grid_size) + ' numbers') + print('checked ' + str(block_size * grid_size) + ' numbers' + ' (' + str(start) + ' - ' + str( + start + block_size * grid_size) + ')') print('last prime: ' + str(primes[-1])) - print('The GPU needed ' + str(t2 - t1) + ' seconds') + print('The GPU needed ' + str(kernel_execution_time) + ' milliseconds') with open(options.timings_output, 'a') as file: - file.write(str(start) + "," + str((t2 - t1) * 1000) + "\n") + file.write(str(start) + "," + str(kernel_execution_time) + "\n") return primes @@ -103,7 +109,7 @@ def calc_primes(start: int = 1, grid_size: int = 1000, block_size: int = 1024): if __name__ == "__main__": parser = OptionParser() parser.add_option("-e", "--end", dest="end", - help="numbers to check without even numbers", default="1000000000", type="int") + help="numbers to check without even numbers", default="5000000000", type="int") parser.add_option("--numbers-per-step", dest="numbers_per_step", help="amount of uneven numbers checked in each step (even number are skipped)", default="8388608", type="int") @@ -122,9 +128,11 @@ if __name__ == "__main__": with open(options.timings_output, 'w') as file: file.write("offset, duration\n") + with open(options.output, 'w') as file: + file.write("") while last_number_checked < options.end: calculated_primes = calc_primes(last_number_checked + 1, grid_size, block_size) - with open(options.output, 'w') as file: + with open(options.output, 'a') as file: file.write("\n".join([str(p) for p in calculated_primes])) last_number_checked = last_number_checked + resulting_numbers_per_step * 2