diff --git a/Cargo.lock b/Cargo.lock index 55b1ec7..e94c4c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -783,6 +783,7 @@ dependencies = [ "indicatif", "lazy_static", "log", + "num_cpus", "ocl", "ocl-stream", "parking_lot", diff --git a/Cargo.toml b/Cargo.toml index d7ac1b7..bfd8729 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,4 +20,5 @@ colored = "2.0.0" chrono = "0.4.19" indicatif = "0.15.0" clap = "2.33.3" -std-semaphore = "0.1.0" \ No newline at end of file +std-semaphore = "0.1.0" +num_cpus = "1.13.0" \ No newline at end of file diff --git a/src/benching/result.rs b/src/benching/result.rs index 30007a4..beb5199 100644 --- a/src/benching/result.rs +++ b/src/benching/result.rs @@ -12,7 +12,7 @@ pub struct ProfiledResult where T: Send + Sync + Clone, { - gpu_duration: Duration, + duration: Duration, value: T, } @@ -20,17 +20,14 @@ impl ProfiledResult where T: Send + Sync + Clone, { - /// Creates a new profiled result with the given duraiton and value - pub fn new(gpu_duration: Duration, value: T) -> Self { - Self { - gpu_duration, - value, - } + /// Creates a new profiled result with the given duration and value + pub fn new(duration: Duration, value: T) -> Self { + Self { duration, value } } - /// Returns the execution duration on the gpu - pub fn gpu_duration(&self) -> &Duration { - &self.gpu_duration + /// Returns the execution duration + pub fn duration(&self) -> &Duration { + &self.duration } /// Returns the value of the result diff --git a/src/kernel_controller/mod.rs b/src/kernel_controller/mod.rs index 983d8e0..72e2bc7 100644 --- a/src/kernel_controller/mod.rs +++ b/src/kernel_controller/mod.rs @@ -46,6 +46,8 @@ impl KernelController { DeviceInfo::DriverVersion, DeviceInfo::ExecutionCapabilities, DeviceInfo::MaxComputeUnits, + DeviceInfo::MaxWorkItemSizes, + DeviceInfo::MaxWorkItemDimensions, DeviceInfo::MaxWorkGroupSize, DeviceInfo::MaxClockFrequency, DeviceInfo::GlobalMemSize, diff --git a/src/kernel_controller/primes.rs b/src/kernel_controller/primes.rs index 67dc4d5..eef0df0 100644 --- a/src/kernel_controller/primes.rs +++ b/src/kernel_controller/primes.rs @@ -21,6 +21,49 @@ use std_semaphore::Semaphore; const MEMORY_LIMIT: u64 = 4 * 1024 * 1024 * 1024; impl KernelController { + /// Calculates prime number on the cpu + pub fn calculate_primes_cpu( + &mut self, + mut start: u64, + stop: u64, + step: usize, + ) -> OCLStream>> { + if start % 2 == 0 { + start += 1; + } + log::debug!( + "Calculating primes between {} and {} with {} number per step on the cpu", + start, + stop, + step, + ); + let offset = Arc::new(AtomicU64::new(start)); + let pb = get_progress_bar((stop - start) / (step * 2) as u64); + + self.executor.execute_bounded(step * 10, move |ctx| { + loop { + if offset.load(Ordering::SeqCst) >= stop { + log::trace!("Stop reached."); + break; + } + let offset = offset.fetch_add(step as u64 * 2, Ordering::SeqCst); + log::trace!("Calculating {} primes beginning from {}", step, offset); + let start = Instant::now(); + + let primes = (offset..(step as u64 * 2 + offset)) + .step_by(2) + .filter(|n| is_prime(*n)) + .collect::>(); + + ctx.sender() + .send(ProfiledResult::new(start.elapsed(), primes))?; + pb.tick(); + } + + Ok(()) + }) + } + /// Calculates prime numbers on the gpu pub fn calculate_primes( &self, @@ -199,29 +242,8 @@ fn get_primes(max_number: u64) -> Vec { let mut num = 1; while num < max_number { - let mut is_prime = true; + let is_prime = is_prime(num); - if num == 2 || num == 3 { - is_prime = true; - } else if num == 1 || num % 2 == 0 { - is_prime = false; - } else { - let check_stop = (num as f64).sqrt().ceil() as u64; - - if check_stop <= 9 { - for i in (3..check_stop).step_by(2) { - if num % i == 0 { - is_prime = false; - } - } - } else { - for i in (9..(check_stop + 6)).step_by(6) { - if num % (i - 2) == 0 || num % (i - 4) == 0 { - is_prime = false; - } - } - } - } if is_prime { primes.push(num) } @@ -236,22 +258,32 @@ fn get_primes(max_number: u64) -> Vec { primes } -/// Checks if a number is a prime number -pub fn is_prime(number: u64) -> bool { - if number == 2 || number == 3 { - return true; - } - if number == 1 || number % 2 == 0 { - return false; - } - let limit = (number as f64).sqrt().ceil() as u64; - for i in (3..limit).step_by(2) { - if number % i == 0 { - return false; +/// Checks if a given number is a prime number +pub(crate) fn is_prime(num: u64) -> bool { + let mut is_prime = true; + + if num == 2 || num == 3 { + is_prime = true; + } else if num == 1 || num % 2 == 0 { + is_prime = false; + } else { + let check_stop = (num as f64).sqrt().ceil() as u64; + + if check_stop <= 9 { + for i in (3..check_stop).step_by(2) { + if num % i == 0 { + is_prime = false; + } + } + } else { + for i in (9..(check_stop + 6)).step_by(6) { + if num % (i - 2) == 0 || num % (i - 4) == 0 { + is_prime = false; + } + } } } - - return true; + is_prime } #[inline] diff --git a/src/main.rs b/src/main.rs index 65103c1..27499a8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,7 +62,7 @@ fn calculate_primes( let csv_file = open_write_buffered(&prime_opts.timings_file); let mut csv_writer = - ThreadedCSVWriter::new(csv_file, &["timestamp", "first", "count", "gpu_duration"]); + ThreadedCSVWriter::new(csv_file, &["timestamp", "first", "count", "duration"]); let output_writer = if use_stdout { ThreadedWriter::new(io::stdout(), |v: Vec| { @@ -82,13 +82,21 @@ fn calculate_primes( }) }; - let mut stream = controller.calculate_primes( - prime_opts.start_offset, - prime_opts.max_number, - prime_opts.numbers_per_step, - prime_opts.local_size.unwrap_or(128), - !prime_opts.no_cache, - ); + let mut stream = if prime_opts.use_cpu { + controller.calculate_primes_cpu( + prime_opts.start_offset, + prime_opts.max_number, + prime_opts.numbers_per_step, + ) + } else { + controller.calculate_primes( + prime_opts.start_offset, + prime_opts.max_number, + prime_opts.numbers_per_step, + prime_opts.local_size.unwrap_or(128), + !prime_opts.no_cache, + ) + }; while let Ok(r) = stream.next() { let primes = r.value(); if prime_opts.cpu_validate { @@ -98,14 +106,14 @@ fn calculate_primes( log::debug!( "Calculated {} primes in {:?}, offset: {}", primes.len(), - r.gpu_duration(), + r.duration(), first ); csv_writer.add_row(vec![ Local::now().format("%Y-%m-%dT%H:%M:%S.%f").to_string(), first.to_string(), primes.len().to_string(), - duration_to_ms_string(r.gpu_duration()), + duration_to_ms_string(r.duration()), ]); output_writer.write(primes.clone()); } diff --git a/src/utils/args.rs b/src/utils/args.rs index b43f833..ba2cc5f 100644 --- a/src/utils/args.rs +++ b/src/utils/args.rs @@ -67,6 +67,10 @@ pub struct CalculatePrimes { /// If the calculated prime numbers should be validated on the cpu by a simple prime algorithm #[structopt(long = "cpu-validate")] pub cpu_validate: bool, + + /// Calculates primes on the cpu instead using the same algorithm + #[structopt(long = "use-cpu")] + pub use_cpu: bool, } #[derive(StructOpt, Clone, Debug)]