You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
90 lines
2.2 KiB
Python
90 lines
2.2 KiB
Python
4 years ago
|
from time import time
|
||
|
|
||
|
import pycuda.autoinit
|
||
|
import pycuda.driver as drv
|
||
|
import numpy as np
|
||
|
from pycuda import gpuarray
|
||
|
from pycuda.compiler import SourceModule
|
||
|
|
||
|
ker = SourceModule("""
|
||
|
__global__ void
|
||
|
check_prime(unsigned long long *input, bool *output)
|
||
|
{
|
||
|
int i = threadIdx.x + blockDim.x * blockIdx.x;
|
||
|
|
||
|
unsigned long long num = input[i];
|
||
|
if (num == 2) {
|
||
|
output[i] = true;
|
||
|
return;
|
||
|
} else if (num < 3 || num % 2 == 0) {
|
||
|
return;
|
||
|
}
|
||
|
unsigned long long limit = (long) sqrt((double) num) + 1;
|
||
|
for (unsigned long long i = 3; i <= limit; i += 2) {
|
||
|
if (num % i == 0) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
output[i] = true;
|
||
|
}
|
||
|
""")
|
||
|
|
||
|
ker2 = SourceModule("""
|
||
|
__global__ void check_prime2(const unsigned long *IN, bool *OUT) {
|
||
|
int id = threadIdx.x + blockDim.x * blockIdx.x;
|
||
|
unsigned long num = IN[id];
|
||
|
unsigned long limit = (unsigned long) sqrt((double) num) + 1;
|
||
|
|
||
|
if (num == 2 || num == 3) {
|
||
|
OUT[id] = true;
|
||
|
return;
|
||
|
} else if (num == 1 || num % 2 == 0) {
|
||
|
return;
|
||
|
}
|
||
|
if (limit < 9) {
|
||
|
for (unsigned long i = 3; i <= limit; i++) {
|
||
|
if (num % i == 0) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
if (num > 3 && num % 3 == 0) {
|
||
|
return;
|
||
|
}
|
||
|
for (unsigned long i = 9; i <= (limit + 6); i += 6) {
|
||
|
if (num % (i - 2) == 0 || num % (i - 4) == 0) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
OUT[id] = true;
|
||
|
}
|
||
|
""")
|
||
|
|
||
|
block_size = 1024
|
||
|
grid_size = 50000
|
||
|
|
||
|
check_prime = ker2.get_function("check_prime2")
|
||
|
|
||
|
testvec = np.arange(1, block_size * grid_size * 2, step=2).astype(np.uint)
|
||
|
|
||
|
testvec_gpu = gpuarray.to_gpu(testvec)
|
||
|
outvec_gpu = gpuarray.to_gpu(np.full(block_size * grid_size, False, dtype=bool))
|
||
|
t1 = time()
|
||
|
check_prime(testvec_gpu, outvec_gpu, block=(block_size, 1, 1), grid=(grid_size, 1, 1))
|
||
|
result = outvec_gpu.get()
|
||
|
t2 = time()
|
||
|
|
||
|
primes = []
|
||
|
|
||
|
for idx, val in enumerate(result):
|
||
|
if val:
|
||
|
primes.append(idx)
|
||
|
|
||
|
#print(primes)
|
||
|
print(len(primes))
|
||
|
print('checked the first ' + str(block_size * grid_size) + ' numbers')
|
||
|
print('last prime: ' + str(primes[-1]))
|
||
|
print('The GPU needed ' + str(t2 - t1) + ' seconds')
|