From c093c2594663617615ac53d90895884cc2699058 Mon Sep 17 00:00:00 2001 From: trivernis Date: Thu, 26 Nov 2020 11:46:05 +0100 Subject: [PATCH] Add prime caching to speed up division tests Signed-off-by: trivernis --- .gitignore | 3 +- Cargo.lock | 66 +++++++++++++++++++++++++++++++++ Cargo.toml | 3 +- src/kernel_controller/kernel.cl | 9 +++-- src/kernel_controller/mod.rs | 40 ++++++++++++++++---- src/main.rs | 22 ++++++++++- 6 files changed, 128 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 69d2e67..1f283e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target .idea -primes.txt \ No newline at end of file +primes.txt +timings.csv \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 818ac24..3c47ac7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,6 +97,15 @@ dependencies = [ "vec_map", ] +[[package]] +name = "cloudabi" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4344512281c643ae7638bbabc3af17a11307803ec8f0fcad9fae512a8bf36467" +dependencies = [ + "bitflags", +] + [[package]] name = "crossbeam" version = "0.7.3" @@ -236,6 +245,15 @@ dependencies = [ "libc", ] +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -248,6 +266,15 @@ version = "0.2.80" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" +[[package]] +name = "lock_api" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +dependencies = [ + "scopeguard", +] + [[package]] name = "maybe-uninit" version = "2.0.0" @@ -411,6 +438,32 @@ dependencies = [ "num", ] +[[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b" +dependencies = [ + "cfg-if 0.1.10", + "cloudabi", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -500,12 +553,19 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + [[package]] name = "rust-opencl-demo" version = "0.1.0" dependencies = [ "lazy_static", "ocl", + "parking_lot", "structopt", ] @@ -542,6 +602,12 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" +[[package]] +name = "smallvec" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85" + [[package]] name = "strsim" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 45f10e6..db7e5ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,4 +9,5 @@ edition = "2018" [dependencies] ocl = "0.19.3" structopt = "0.3.20" -lazy_static = "1.4.0" \ No newline at end of file +lazy_static = "1.4.0" +parking_lot = "0.11.1" \ No newline at end of file diff --git a/src/kernel_controller/kernel.cl b/src/kernel_controller/kernel.cl index f9e17df..55b3419 100644 --- a/src/kernel_controller/kernel.cl +++ b/src/kernel_controller/kernel.cl @@ -5,16 +5,17 @@ */ -__kernel void check_prime(const int LOWER_PRIME_COUNT, __global const int *LOWER_PRIMES, __global const long *IN, __global bool *OUT) { +__kernel void check_prime(const int LOWER_PRIME_COUNT, __global const long *LOWER_PRIMES, __global const long *IN, __global bool *OUT) { int id = get_global_id(0); long num = IN[id]; bool prime = true; + long limit = (long) sqrt((double) num) + 1; if (num < 3 || num % 2 == 0) { prime = false; } else { for (int i = 0; i < LOWER_PRIME_COUNT; i++) { - if (LOWER_PRIMES[i] >= num) { + if (LOWER_PRIMES[i] >= limit) { break; } if (num % LOWER_PRIMES[i] == 0) { @@ -29,8 +30,8 @@ __kernel void check_prime(const int LOWER_PRIME_COUNT, __global const int *LOWER start -= 3; } - if (prime && start < num) { - for (long i = start; i <= sqrt((double) num); i += 6) { + if (prime && start < limit) { + for (long i = start; i <= limit; i += 6) { if (num % (i - 2) == 0 || num % (i - 4) == 0) { prime = false; break; diff --git a/src/kernel_controller/mod.rs b/src/kernel_controller/mod.rs index f078ebf..f38f6bb 100644 --- a/src/kernel_controller/mod.rs +++ b/src/kernel_controller/mod.rs @@ -5,6 +5,9 @@ */ use ocl::ProQue; +use parking_lot::Mutex; +use std::sync::Arc; +use std::time::Instant; pub struct KernelController { pro_que: ProQue, @@ -20,10 +23,15 @@ impl KernelController { } pub fn filter_primes(&self, input: Vec) -> ocl::Result> { - lazy_static::lazy_static! {static ref PRIMES: Vec = get_lower_primes();} + lazy_static::lazy_static! {static ref PRIME_CACHE: Arc>> = Arc::new(Mutex::new(get_lower_primes(2048)));} - let prime_buffer = self.pro_que.buffer_builder().len(PRIMES.len()).build()?; - prime_buffer.write(&PRIMES[..]).enq()?; + let prime_buffer = self + .pro_que + .buffer_builder() + .len(PRIME_CACHE.lock().len()) + .build()?; + + prime_buffer.write(&PRIME_CACHE.lock()[..]).enq()?; let input_buffer = self.pro_que.buffer_builder().len(input.len()).build()?; input_buffer.write(&input[..]).enq()?; @@ -55,27 +63,43 @@ impl KernelController { let mut input_o = vec![0i64; input_buffer.len()]; input_buffer.read(&mut input_o).enq()?; - Ok(input + let primes = input .iter() .enumerate() .filter(|(index, _)| output[*index] == 1) .map(|(_, v)| *v) - .collect()) + .collect::>(); + + let start = Instant::now(); + let mut prime_cache = PRIME_CACHE.lock(); + + if prime_cache.len() < 1024 * 1024 * 1024 { + prime_cache.append(&mut primes.clone()); + prime_cache.sort(); + prime_cache.dedup(); + } + println!( + "Prime caching took: {} ms, size: {}", + start.elapsed().as_secs_f64() * 1000f64, + prime_cache.len(), + ); + + Ok(primes) } } /// Returns a list of prime numbers that can be used to speed up the divisibility check -fn get_lower_primes() -> Vec { +fn get_lower_primes(count: usize) -> Vec { let mut primes = Vec::new(); let mut num = 3; - while primes.len() < 1024 { + while primes.len() < count { let mut is_prime = true; if num < 3 || num % 2 == 0 { is_prime = false; } else { - for i in (3..((num as f32).sqrt().ceil() as i32)).step_by(2) { + for i in (3..((num as f64).sqrt().ceil() as i64)).step_by(2) { if num % i == 0 { is_prime = false; break; diff --git a/src/main.rs b/src/main.rs index 9c0eeb7..fbc68d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,6 +36,9 @@ struct CalculatePrimes { #[structopt(short = "o", long = "output", default_value = "primes.txt")] output_file: PathBuf, + + #[structopt(long = "timings-output", default_value = "timings.csv")] + timings_file: PathBuf, } fn main() -> ocl::Result<()> { @@ -56,6 +59,17 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - .open(prime_opts.output_file) .unwrap(), ); + let mut timings = BufWriter::new( + OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(prime_opts.timings_file) + .unwrap(), + ); + timings + .write_all("offset,count,duration\n".as_bytes()) + .unwrap(); let (sender, handle) = create_write_thread(output); let mut offset = prime_opts.start_offset; @@ -69,12 +83,18 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - .collect::>(); println!("Filtering primes from {} numbers", numbers.len()); let primes = controller.filter_primes(numbers)?; + let elapsed_ms = start.elapsed().as_secs_f64() * 1000f64; + println!( "Calculated {} primes in {:.4} ms: {:.4} checks/s", primes.len(), - start.elapsed().as_secs_f64() * 1000f64, + elapsed_ms, COUNT as f64 / start.elapsed().as_secs_f64() ); + timings + .write_all(format!("{},{},{}\n", offset, primes.len(), elapsed_ms).as_bytes()) + .unwrap(); + timings.flush().unwrap(); sender.send(primes).unwrap(); if (COUNT as i128 * 2 + offset as i128) > prime_opts.max_number as i128 {