diff --git a/src/kernel_controller/mod.rs b/src/kernel_controller/mod.rs index 5e2495c..60fb75d 100644 --- a/src/kernel_controller/mod.rs +++ b/src/kernel_controller/mod.rs @@ -10,7 +10,7 @@ use ocl::ProQue; use parking_lot::Mutex; use std::mem::size_of; use std::sync::Arc; -use std::time::Instant; +use std::time::{Duration, Instant}; pub struct KernelController { pro_que: ProQue, @@ -46,7 +46,7 @@ impl KernelController { /// Filters all primes from the input without using a precalculated list of primes /// for divisibility checks - pub fn filter_primes_simple(&self, input: Vec) -> ocl::Result> { + pub fn filter_primes_simple(&self, input: Vec) -> ocl::Result { let input_buffer = self.pro_que.buffer_builder().len(input.len()).build()?; input_buffer.write(&input[..]).enq()?; @@ -72,10 +72,13 @@ impl KernelController { let mut output = vec![0u8; output_buffer.len()]; output_buffer.read(&mut output).enq()?; + let gpu_calc_duration = start.elapsed(); println!( "GPU IO + Calculation took {} ms", - start.elapsed().as_secs_f64() * 1000f64 + gpu_calc_duration.as_secs_f64() * 1000f64 ); + + let filter_start = Instant::now(); let primes = input .iter() .enumerate() @@ -83,12 +86,16 @@ impl KernelController { .map(|(_, v)| *v) .collect::>(); - Ok(primes) + Ok(PrimeCalculationResult { + primes, + filter_duration: filter_start.elapsed(), + gpu_duration: gpu_calc_duration, + }) } /// Filters the primes from a list of numbers by using a precalculated list of primes to check /// for divisibility - pub fn filter_primes(&self, input: Vec) -> ocl::Result> { + pub fn filter_primes(&self, input: Vec) -> ocl::Result { lazy_static::lazy_static! {static ref PRIME_CACHE: Arc>> = Arc::new(Mutex::new(Vec::new()));} if PRIME_CACHE.lock().len() == 0 { PRIME_CACHE.lock().append(&mut get_primes( @@ -132,22 +139,23 @@ impl KernelController { let mut output = vec![0u8; output_buffer.len()]; output_buffer.read(&mut output).enq()?; - let mut input_o = vec![0u64; input_buffer.len()]; - input_buffer.read(&mut input_o).enq()?; + let gpu_calc_duration = start.elapsed(); println!( "GPU IO + Calculation took {} ms", - start.elapsed().as_secs_f64() * 1000f64 + gpu_calc_duration.as_secs_f64() * 1000f64 ); + let prime_filter_start = Instant::now(); let primes = input .iter() .enumerate() .filter(|(index, _)| output[*index] == 1) .map(|(_, v)| *v) .collect::>(); + let filter_duration = prime_filter_start.elapsed(); - let start = Instant::now(); + let prime_calc_start = Instant::now(); let mut prime_cache = PRIME_CACHE.lock(); if (prime_cache.len() + primes.len()) * size_of::() @@ -157,13 +165,18 @@ impl KernelController { prime_cache.sort(); prime_cache.dedup(); } + let cache_duration = prime_calc_start.elapsed(); println!( "Prime caching took: {} ms, size: {}", - start.elapsed().as_secs_f64() * 1000f64, + cache_duration.as_secs_f64() * 1000f64, prime_cache.len(), ); - Ok(primes) + Ok(PrimeCalculationResult { + primes, + gpu_duration: gpu_calc_duration, + filter_duration, + }) } } @@ -228,3 +241,9 @@ pub fn is_prime(number: u64) -> bool { return true; } + +pub struct PrimeCalculationResult { + pub primes: Vec, + pub gpu_duration: Duration, + pub filter_duration: Duration, +} diff --git a/src/main.rs b/src/main.rs index 5f216e1..e31d19c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ mod kernel_controller; #[derive(StructOpt, Clone, Debug)] #[structopt()] enum Opts { + /// Calculates primes on the GPU #[structopt(name = "calculate-primes")] CalculatePrimes(CalculatePrimes), } @@ -34,18 +35,25 @@ struct CalculatePrimes { #[structopt(long = "end", default_value = "9223372036854775807")] max_number: u64, + /// The output file for the calculated prime numbers #[structopt(short = "o", long = "output", default_value = "primes.txt")] output_file: PathBuf, + /// The output file for timings #[structopt(long = "timings-output", default_value = "timings.csv")] timings_file: PathBuf, + /// The amount of numbers that are checked per step. Even numbers are ignored so the + /// Range actually goes to numbers_per_step * 2. #[structopt(long = "numbers-per-step", default_value = "33554432")] numbers_per_step: usize, + /// If the prime numbers should be used for the divisibility check instead of using + /// an optimized auto-increment loop. #[structopt(long = "no-cache")] no_cache: bool, + /// If the calculated prime numbers should be validated on the cpu by a simple prime algorithm #[structopt(long = "cpu-validate")] cpu_validate: bool, } @@ -77,7 +85,7 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - .unwrap(), ); timings - .write_all("offset,count,duration\n".as_bytes()) + .write_all("offset,count,gpu_duration,filter_duration,duration\n".as_bytes()) .unwrap(); let (sender, handle) = create_write_thread(output); @@ -98,11 +106,12 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - numbers.len(), offset ); - let primes = if prime_opts.no_cache { + let prime_result = if prime_opts.no_cache { controller.filter_primes_simple(numbers)? } else { controller.filter_primes(numbers)? }; + let primes = prime_result.primes; let elapsed_ms = start.elapsed().as_secs_f64() * 1000f64; println!( @@ -112,25 +121,22 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - prime_opts.numbers_per_step as f64 / start.elapsed().as_secs_f64() ); timings - .write_all(format!("{},{},{}\n", offset, primes.len(), elapsed_ms).as_bytes()) + .write_all( + format!( + "{},{},{},{},{}\n", + offset, + primes.len(), + prime_result.gpu_duration.as_secs_f64() * 1000f64, + prime_result.filter_duration.as_secs_f64() * 1000f64, + elapsed_ms + ) + .as_bytes(), + ) .unwrap(); timings.flush().unwrap(); if prime_opts.cpu_validate { - println!("Validating..."); - let failures = primes - .par_iter() - .filter(|n| !is_prime(**n)) - .collect::>(); - if failures.len() > 0 { - println!( - "{} failures in prime calculation: {:?}", - failures.len(), - failures - ); - } else { - println!("No failures found."); - } + validate_primes_on_cpu(&primes) } println!(); sender.send(primes).unwrap(); @@ -149,6 +155,23 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) - Ok(()) } +fn validate_primes_on_cpu(primes: &Vec) { + println!("Validating..."); + let failures = primes + .par_iter() + .filter(|n| !is_prime(**n)) + .collect::>(); + if failures.len() > 0 { + println!( + "{} failures in prime calculation: {:?}", + failures.len(), + failures + ); + } else { + println!("No failures found."); + } +} + fn create_write_thread(mut writer: BufWriter) -> (Sender>, JoinHandle<()>) { let (tx, rx) = channel(); let handle = thread::spawn(move || {