Add flag to benchmark prime calculation speed on the cpu

Signed-off-by: Trivernis <trivernis@protonmail.com>
main
Trivernis 3 years ago
parent 0f2fa4039d
commit 385e7ab492
No known key found for this signature in database
GPG Key ID: EB543D89E02BC83F

1
Cargo.lock generated

@ -783,6 +783,7 @@ dependencies = [
"indicatif",
"lazy_static",
"log",
"num_cpus",
"ocl",
"ocl-stream",
"parking_lot",

@ -20,4 +20,5 @@ colored = "2.0.0"
chrono = "0.4.19"
indicatif = "0.15.0"
clap = "2.33.3"
std-semaphore = "0.1.0"
std-semaphore = "0.1.0"
num_cpus = "1.13.0"

@ -12,7 +12,7 @@ pub struct ProfiledResult<T>
where
T: Send + Sync + Clone,
{
gpu_duration: Duration,
duration: Duration,
value: T,
}
@ -20,17 +20,14 @@ impl<T> ProfiledResult<T>
where
T: Send + Sync + Clone,
{
/// Creates a new profiled result with the given duraiton and value
pub fn new(gpu_duration: Duration, value: T) -> Self {
Self {
gpu_duration,
value,
}
/// Creates a new profiled result with the given duration and value
pub fn new(duration: Duration, value: T) -> Self {
Self { duration, value }
}
/// Returns the execution duration on the gpu
pub fn gpu_duration(&self) -> &Duration {
&self.gpu_duration
/// Returns the execution duration
pub fn duration(&self) -> &Duration {
&self.duration
}
/// Returns the value of the result

@ -46,6 +46,8 @@ impl KernelController {
DeviceInfo::DriverVersion,
DeviceInfo::ExecutionCapabilities,
DeviceInfo::MaxComputeUnits,
DeviceInfo::MaxWorkItemSizes,
DeviceInfo::MaxWorkItemDimensions,
DeviceInfo::MaxWorkGroupSize,
DeviceInfo::MaxClockFrequency,
DeviceInfo::GlobalMemSize,

@ -21,6 +21,49 @@ use std_semaphore::Semaphore;
const MEMORY_LIMIT: u64 = 4 * 1024 * 1024 * 1024;
impl KernelController {
/// Calculates prime number on the cpu
pub fn calculate_primes_cpu(
&mut self,
mut start: u64,
stop: u64,
step: usize,
) -> OCLStream<ProfiledResult<Vec<u64>>> {
if start % 2 == 0 {
start += 1;
}
log::debug!(
"Calculating primes between {} and {} with {} number per step on the cpu",
start,
stop,
step,
);
let offset = Arc::new(AtomicU64::new(start));
let pb = get_progress_bar((stop - start) / (step * 2) as u64);
self.executor.execute_bounded(step * 10, move |ctx| {
loop {
if offset.load(Ordering::SeqCst) >= stop {
log::trace!("Stop reached.");
break;
}
let offset = offset.fetch_add(step as u64 * 2, Ordering::SeqCst);
log::trace!("Calculating {} primes beginning from {}", step, offset);
let start = Instant::now();
let primes = (offset..(step as u64 * 2 + offset))
.step_by(2)
.filter(|n| is_prime(*n))
.collect::<Vec<u64>>();
ctx.sender()
.send(ProfiledResult::new(start.elapsed(), primes))?;
pb.tick();
}
Ok(())
})
}
/// Calculates prime numbers on the gpu
pub fn calculate_primes(
&self,
@ -199,29 +242,8 @@ fn get_primes(max_number: u64) -> Vec<u64> {
let mut num = 1;
while num < max_number {
let mut is_prime = true;
let is_prime = is_prime(num);
if num == 2 || num == 3 {
is_prime = true;
} else if num == 1 || num % 2 == 0 {
is_prime = false;
} else {
let check_stop = (num as f64).sqrt().ceil() as u64;
if check_stop <= 9 {
for i in (3..check_stop).step_by(2) {
if num % i == 0 {
is_prime = false;
}
}
} else {
for i in (9..(check_stop + 6)).step_by(6) {
if num % (i - 2) == 0 || num % (i - 4) == 0 {
is_prime = false;
}
}
}
}
if is_prime {
primes.push(num)
}
@ -236,22 +258,32 @@ fn get_primes(max_number: u64) -> Vec<u64> {
primes
}
/// Checks if a number is a prime number
pub fn is_prime(number: u64) -> bool {
if number == 2 || number == 3 {
return true;
}
if number == 1 || number % 2 == 0 {
return false;
}
let limit = (number as f64).sqrt().ceil() as u64;
for i in (3..limit).step_by(2) {
if number % i == 0 {
return false;
/// Checks if a given number is a prime number
pub(crate) fn is_prime(num: u64) -> bool {
let mut is_prime = true;
if num == 2 || num == 3 {
is_prime = true;
} else if num == 1 || num % 2 == 0 {
is_prime = false;
} else {
let check_stop = (num as f64).sqrt().ceil() as u64;
if check_stop <= 9 {
for i in (3..check_stop).step_by(2) {
if num % i == 0 {
is_prime = false;
}
}
} else {
for i in (9..(check_stop + 6)).step_by(6) {
if num % (i - 2) == 0 || num % (i - 4) == 0 {
is_prime = false;
}
}
}
}
return true;
is_prime
}
#[inline]

@ -62,7 +62,7 @@ fn calculate_primes(
let csv_file = open_write_buffered(&prime_opts.timings_file);
let mut csv_writer =
ThreadedCSVWriter::new(csv_file, &["timestamp", "first", "count", "gpu_duration"]);
ThreadedCSVWriter::new(csv_file, &["timestamp", "first", "count", "duration"]);
let output_writer = if use_stdout {
ThreadedWriter::new(io::stdout(), |v: Vec<u64>| {
@ -82,13 +82,21 @@ fn calculate_primes(
})
};
let mut stream = controller.calculate_primes(
prime_opts.start_offset,
prime_opts.max_number,
prime_opts.numbers_per_step,
prime_opts.local_size.unwrap_or(128),
!prime_opts.no_cache,
);
let mut stream = if prime_opts.use_cpu {
controller.calculate_primes_cpu(
prime_opts.start_offset,
prime_opts.max_number,
prime_opts.numbers_per_step,
)
} else {
controller.calculate_primes(
prime_opts.start_offset,
prime_opts.max_number,
prime_opts.numbers_per_step,
prime_opts.local_size.unwrap_or(128),
!prime_opts.no_cache,
)
};
while let Ok(r) = stream.next() {
let primes = r.value();
if prime_opts.cpu_validate {
@ -98,14 +106,14 @@ fn calculate_primes(
log::debug!(
"Calculated {} primes in {:?}, offset: {}",
primes.len(),
r.gpu_duration(),
r.duration(),
first
);
csv_writer.add_row(vec![
Local::now().format("%Y-%m-%dT%H:%M:%S.%f").to_string(),
first.to_string(),
primes.len().to_string(),
duration_to_ms_string(r.gpu_duration()),
duration_to_ms_string(r.duration()),
]);
output_writer.write(primes.clone());
}

@ -67,6 +67,10 @@ pub struct CalculatePrimes {
/// If the calculated prime numbers should be validated on the cpu by a simple prime algorithm
#[structopt(long = "cpu-validate")]
pub cpu_validate: bool,
/// Calculates primes on the cpu instead using the same algorithm
#[structopt(long = "use-cpu")]
pub use_cpu: bool,
}
#[derive(StructOpt, Clone, Debug)]

Loading…
Cancel
Save