From 50d202d3a23e24b40139fe98101fce90a6fc49b2 Mon Sep 17 00:00:00 2001 From: trivernis Date: Fri, 27 Nov 2020 20:07:49 +0100 Subject: [PATCH] Add prime validation and fix calculation Signed-off-by: trivernis --- Cargo.lock | 123 +++++++++++++++++++++++++++++--- Cargo.toml | 3 +- src/kernel_controller/kernel.cl | 22 +++++- src/kernel_controller/mod.rs | 90 ++++++++++++++++------- src/main.rs | 38 ++++++++-- 5 files changed, 233 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c47ac7..7889ca8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -106,6 +106,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "const_fn" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c478836e029dcef17fb47c89023448c64f781a046e0300e257ad8225ae59afab" + [[package]] name = "crossbeam" version = "0.7.3" @@ -113,11 +119,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e" dependencies = [ "cfg-if 0.1.10", - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", + "crossbeam-channel 0.4.4", + "crossbeam-deque 0.7.3", + "crossbeam-epoch 0.8.2", "crossbeam-queue", - "crossbeam-utils", + "crossbeam-utils 0.7.2", ] [[package]] @@ -126,21 +132,42 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" dependencies = [ - "crossbeam-utils", + "crossbeam-utils 0.7.2", "maybe-uninit", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils 0.8.1", +] + [[package]] name = "crossbeam-deque" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", + "crossbeam-epoch 0.8.2", + "crossbeam-utils 0.7.2", "maybe-uninit", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch 0.9.1", + "crossbeam-utils 0.8.1", +] + [[package]] name = "crossbeam-epoch" version = "0.8.2" @@ -149,10 +176,24 @@ checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" dependencies = [ "autocfg", "cfg-if 0.1.10", - "crossbeam-utils", + "crossbeam-utils 0.7.2", "lazy_static", "maybe-uninit", - "memoffset", + "memoffset 0.5.6", + "scopeguard", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d" +dependencies = [ + "cfg-if 1.0.0", + "const_fn", + "crossbeam-utils 0.8.1", + "lazy_static", + "memoffset 0.6.1", "scopeguard", ] @@ -163,7 +204,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570" dependencies = [ "cfg-if 0.1.10", - "crossbeam-utils", + "crossbeam-utils 0.7.2", "maybe-uninit", ] @@ -178,6 +219,23 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crossbeam-utils" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "lazy_static", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + [[package]] name = "enum_primitive" version = "0.1.1" @@ -290,6 +348,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.4.3" @@ -393,6 +460,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "object" version = "0.22.0" @@ -544,6 +621,31 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque 0.8.0", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel 0.5.0", + "crossbeam-deque 0.8.0", + "crossbeam-utils 0.8.1", + "lazy_static", + "num_cpus", +] + [[package]] name = "rdrand" version = "0.4.0" @@ -566,6 +668,7 @@ dependencies = [ "lazy_static", "ocl", "parking_lot", + "rayon", "structopt", ] diff --git a/Cargo.toml b/Cargo.toml index db7e5ae..7ecc5aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,4 +10,5 @@ edition = "2018" ocl = "0.19.3" structopt = "0.3.20" lazy_static = "1.4.0" -parking_lot = "0.11.1" \ No newline at end of file +parking_lot = "0.11.1" +rayon = "1.5.0" \ No newline at end of file diff --git a/src/kernel_controller/kernel.cl b/src/kernel_controller/kernel.cl index 053579d..79c8bd6 100644 --- a/src/kernel_controller/kernel.cl +++ b/src/kernel_controller/kernel.cl @@ -10,7 +10,10 @@ __kernel void check_prime_cached(const uint LOWER_PRIME_COUNT, __global const ul ulong num = IN[id]; ulong limit = (ulong) native_sqrt((double) num) + 1; - if (num < 3 || num % 2 == 0) { + if (num == 2 || num == 3) { + OUT[id] = true; + return; + } else if (num == 1) { return; } else { for (uint i = 0; i < LOWER_PRIME_COUNT; i++) { @@ -31,10 +34,23 @@ __kernel void check_prime(__global const ulong *IN, __global bool *OUT) { ulong num = IN[id]; ulong limit = (ulong) native_sqrt((double) num) + 1; - if (num < 3 || num % 2 == 0) { + if (num == 2 || num == 3) { + OUT[id] = true; + return; + } else if (num == 1 || num % 2 == 0) { return; + } + if (limit < 9) { + for (ulong i = 3; i <= limit; i++) { + if (num % i == 0) { + return; + } + } } else { - for (ulong i = 9; i <= limit; i += 6) { + if (num > 3 && num % 3 == 0) { + return; + } + for (ulong i = 9; i <= (limit + 6); i += 6) { if (num % (i - 2) == 0 || num % (i - 4) == 0) { return; } diff --git a/src/kernel_controller/mod.rs b/src/kernel_controller/mod.rs index fdd9ab6..5e2495c 100644 --- a/src/kernel_controller/mod.rs +++ b/src/kernel_controller/mod.rs @@ -8,7 +8,6 @@ use ocl::core::DeviceInfo; use ocl::enums::DeviceInfoResult; use ocl::ProQue; use parking_lot::Mutex; -use std::cmp::max; use std::mem::size_of; use std::sync::Arc; use std::time::Instant; @@ -45,6 +44,50 @@ impl KernelController { } } + /// Filters all primes from the input without using a precalculated list of primes + /// for divisibility checks + pub fn filter_primes_simple(&self, input: Vec) -> ocl::Result> { + let input_buffer = self.pro_que.buffer_builder().len(input.len()).build()?; + input_buffer.write(&input[..]).enq()?; + + let output_buffer = self + .pro_que + .buffer_builder() + .len(input.len()) + .fill_val(0u8) + .build()?; + + let kernel = self + .pro_que + .kernel_builder("check_prime") + .arg(&input_buffer) + .arg(&output_buffer) + .global_work_size(input.len()) + .build()?; + + let start = Instant::now(); + unsafe { + kernel.enq()?; + } + + let mut output = vec![0u8; output_buffer.len()]; + output_buffer.read(&mut output).enq()?; + println!( + "GPU IO + Calculation took {} ms", + start.elapsed().as_secs_f64() * 1000f64 + ); + let primes = input + .iter() + .enumerate() + .filter(|(index, _)| output[*index] == 1) + .map(|(_, v)| *v) + .collect::>(); + + Ok(primes) + } + + /// Filters the primes from a list of numbers by using a precalculated list of primes to check + /// for divisibility pub fn filter_primes(&self, input: Vec) -> ocl::Result> { lazy_static::lazy_static! {static ref PRIME_CACHE: Arc>> = Arc::new(Mutex::new(Vec::new()));} if PRIME_CACHE.lock().len() == 0 { @@ -128,36 +171,28 @@ impl KernelController { fn get_primes(max_number: u64) -> Vec { let start = Instant::now(); let mut primes = Vec::with_capacity((max_number as f64).sqrt() as usize); - let mut num = 3; + let mut num = 1; while num < max_number { let mut is_prime = true; - if num == 2 { + if num == 2 || num == 3 { is_prime = true; - } else if num < 3 || num % 2 == 0 { + } else if num == 1 || num % 2 == 0 { is_prime = false; } else { let check_stop = (num as f64).sqrt().ceil() as u64; - let mut free_check_start = 9; - - for prime in primes.iter().take_while(|num| **num < check_stop) { - let prime = *prime; - free_check_start = prime; - if num % prime == 0 { - is_prime = false; - break; - } - } - if free_check_start < check_stop && is_prime { - free_check_start -= free_check_start % 3; - if free_check_start % 2 == 0 { - free_check_start -= 3; + + if check_stop <= 9 { + for i in (3..check_stop).step_by(2) { + if num % i == 0 { + is_prime = false; + } } - for i in (max(free_check_start, 9)..check_stop).step_by(6) { + } else { + for i in (9..(check_stop + 6)).step_by(6) { if num % (i - 2) == 0 || num % (i - 4) == 0 { is_prime = false; - break; } } } @@ -170,21 +205,26 @@ fn get_primes(max_number: u64) -> Vec { println!( "Generated {} primes on the cpu in {} ms", primes.len(), - start.elapsed().as_secs_f64() * 1000f64 + start.elapsed().as_secs_f64() * 1000f64, ); primes } #[allow(dead_code)] -fn is_prime(number: u64) -> bool { - if number < 3 || number % 2 == 0 { +pub fn is_prime(number: u64) -> bool { + if number == 2 || number == 3 { + return true; + } + if number == 1 || number % 2 == 0 { return false; } - for i in (9..(number as f64).sqrt().ceil() as u64).step_by(6) { - if number % (i - 2) == 0 || number % (i - 4) == 0 { + let limit = (number as f64).sqrt().ceil() as u64; + for i in (3..limit).step_by(2) { + if number % i == 0 { return false; } } + return true; } diff --git a/src/main.rs b/src/main.rs index 09585f3..5f216e1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,8 @@ * See LICENSE for more information */ -use crate::kernel_controller::KernelController; +use crate::kernel_controller::{is_prime, KernelController}; +use rayon::prelude::*; use std::fs::{File, OpenOptions}; use std::io::{BufWriter, Write}; use std::mem; @@ -41,6 +42,12 @@ struct CalculatePrimes { #[structopt(long = "numbers-per-step", default_value = "33554432")] numbers_per_step: usize, + + #[structopt(long = "no-cache")] + no_cache: bool, + + #[structopt(long = "cpu-validate")] + cpu_validate: bool, } fn main() -> ocl::Result<()> { @@ -78,7 +85,9 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - if offset % 2 == 0 { offset += 1; } - sender.send(vec![2]).unwrap(); + if offset < 2 { + sender.send(vec![2]).unwrap(); + } loop { let start = Instant::now(); let numbers = (offset..(prime_opts.numbers_per_step as u64 * 2 + offset)) @@ -89,7 +98,11 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - numbers.len(), offset ); - let primes = controller.filter_primes(numbers)?; + let primes = if prime_opts.no_cache { + controller.filter_primes_simple(numbers)? + } else { + controller.filter_primes(numbers)? + }; let elapsed_ms = start.elapsed().as_secs_f64() * 1000f64; println!( @@ -98,11 +111,28 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - elapsed_ms, prime_opts.numbers_per_step as f64 / start.elapsed().as_secs_f64() ); - println!(); timings .write_all(format!("{},{},{}\n", offset, primes.len(), elapsed_ms).as_bytes()) .unwrap(); timings.flush().unwrap(); + + if prime_opts.cpu_validate { + println!("Validating..."); + let failures = primes + .par_iter() + .filter(|n| !is_prime(**n)) + .collect::>(); + if failures.len() > 0 { + println!( + "{} failures in prime calculation: {:?}", + failures.len(), + failures + ); + } else { + println!("No failures found."); + } + } + println!(); sender.send(primes).unwrap(); if (prime_opts.numbers_per_step as u128 * 2 + offset as u128)