Add comments and more stats to timings.csv

Signed-off-by: trivernis <trivernis@protonmail.com>
pull/1/head
trivernis 4 years ago
parent 50d202d3a2
commit 5a5d25a08b
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

@ -10,7 +10,7 @@ use ocl::ProQue;
use parking_lot::Mutex;
use std::mem::size_of;
use std::sync::Arc;
use std::time::Instant;
use std::time::{Duration, Instant};
pub struct KernelController {
pro_que: ProQue,
@ -46,7 +46,7 @@ impl KernelController {
/// Filters all primes from the input without using a precalculated list of primes
/// for divisibility checks
pub fn filter_primes_simple(&self, input: Vec<u64>) -> ocl::Result<Vec<u64>> {
pub fn filter_primes_simple(&self, input: Vec<u64>) -> ocl::Result<PrimeCalculationResult> {
let input_buffer = self.pro_que.buffer_builder().len(input.len()).build()?;
input_buffer.write(&input[..]).enq()?;
@ -72,10 +72,13 @@ impl KernelController {
let mut output = vec![0u8; output_buffer.len()];
output_buffer.read(&mut output).enq()?;
let gpu_calc_duration = start.elapsed();
println!(
"GPU IO + Calculation took {} ms",
start.elapsed().as_secs_f64() * 1000f64
gpu_calc_duration.as_secs_f64() * 1000f64
);
let filter_start = Instant::now();
let primes = input
.iter()
.enumerate()
@ -83,12 +86,16 @@ impl KernelController {
.map(|(_, v)| *v)
.collect::<Vec<u64>>();
Ok(primes)
Ok(PrimeCalculationResult {
primes,
filter_duration: filter_start.elapsed(),
gpu_duration: gpu_calc_duration,
})
}
/// Filters the primes from a list of numbers by using a precalculated list of primes to check
/// for divisibility
pub fn filter_primes(&self, input: Vec<u64>) -> ocl::Result<Vec<u64>> {
pub fn filter_primes(&self, input: Vec<u64>) -> ocl::Result<PrimeCalculationResult> {
lazy_static::lazy_static! {static ref PRIME_CACHE: Arc<Mutex<Vec<u64>>> = Arc::new(Mutex::new(Vec::new()));}
if PRIME_CACHE.lock().len() == 0 {
PRIME_CACHE.lock().append(&mut get_primes(
@ -132,22 +139,23 @@ impl KernelController {
let mut output = vec![0u8; output_buffer.len()];
output_buffer.read(&mut output).enq()?;
let mut input_o = vec![0u64; input_buffer.len()];
input_buffer.read(&mut input_o).enq()?;
let gpu_calc_duration = start.elapsed();
println!(
"GPU IO + Calculation took {} ms",
start.elapsed().as_secs_f64() * 1000f64
gpu_calc_duration.as_secs_f64() * 1000f64
);
let prime_filter_start = Instant::now();
let primes = input
.iter()
.enumerate()
.filter(|(index, _)| output[*index] == 1)
.map(|(_, v)| *v)
.collect::<Vec<u64>>();
let filter_duration = prime_filter_start.elapsed();
let start = Instant::now();
let prime_calc_start = Instant::now();
let mut prime_cache = PRIME_CACHE.lock();
if (prime_cache.len() + primes.len()) * size_of::<i64>()
@ -157,13 +165,18 @@ impl KernelController {
prime_cache.sort();
prime_cache.dedup();
}
let cache_duration = prime_calc_start.elapsed();
println!(
"Prime caching took: {} ms, size: {}",
start.elapsed().as_secs_f64() * 1000f64,
cache_duration.as_secs_f64() * 1000f64,
prime_cache.len(),
);
Ok(primes)
Ok(PrimeCalculationResult {
primes,
gpu_duration: gpu_calc_duration,
filter_duration,
})
}
}
@ -228,3 +241,9 @@ pub fn is_prime(number: u64) -> bool {
return true;
}
pub struct PrimeCalculationResult {
pub primes: Vec<u64>,
pub gpu_duration: Duration,
pub filter_duration: Duration,
}

@ -20,6 +20,7 @@ mod kernel_controller;
#[derive(StructOpt, Clone, Debug)]
#[structopt()]
enum Opts {
/// Calculates primes on the GPU
#[structopt(name = "calculate-primes")]
CalculatePrimes(CalculatePrimes),
}
@ -34,18 +35,25 @@ struct CalculatePrimes {
#[structopt(long = "end", default_value = "9223372036854775807")]
max_number: u64,
/// The output file for the calculated prime numbers
#[structopt(short = "o", long = "output", default_value = "primes.txt")]
output_file: PathBuf,
/// The output file for timings
#[structopt(long = "timings-output", default_value = "timings.csv")]
timings_file: PathBuf,
/// The amount of numbers that are checked per step. Even numbers are ignored so the
/// Range actually goes to numbers_per_step * 2.
#[structopt(long = "numbers-per-step", default_value = "33554432")]
numbers_per_step: usize,
/// If the prime numbers should be used for the divisibility check instead of using
/// an optimized auto-increment loop.
#[structopt(long = "no-cache")]
no_cache: bool,
/// If the calculated prime numbers should be validated on the cpu by a simple prime algorithm
#[structopt(long = "cpu-validate")]
cpu_validate: bool,
}
@ -77,7 +85,7 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) -
.unwrap(),
);
timings
.write_all("offset,count,duration\n".as_bytes())
.write_all("offset,count,gpu_duration,filter_duration,duration\n".as_bytes())
.unwrap();
let (sender, handle) = create_write_thread(output);
@ -98,11 +106,12 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) -
numbers.len(),
offset
);
let primes = if prime_opts.no_cache {
let prime_result = if prime_opts.no_cache {
controller.filter_primes_simple(numbers)?
} else {
controller.filter_primes(numbers)?
};
let primes = prime_result.primes;
let elapsed_ms = start.elapsed().as_secs_f64() * 1000f64;
println!(
@ -112,25 +121,22 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) -
prime_opts.numbers_per_step as f64 / start.elapsed().as_secs_f64()
);
timings
.write_all(format!("{},{},{}\n", offset, primes.len(), elapsed_ms).as_bytes())
.write_all(
format!(
"{},{},{},{},{}\n",
offset,
primes.len(),
prime_result.gpu_duration.as_secs_f64() * 1000f64,
prime_result.filter_duration.as_secs_f64() * 1000f64,
elapsed_ms
)
.as_bytes(),
)
.unwrap();
timings.flush().unwrap();
if prime_opts.cpu_validate {
println!("Validating...");
let failures = primes
.par_iter()
.filter(|n| !is_prime(**n))
.collect::<Vec<&u64>>();
if failures.len() > 0 {
println!(
"{} failures in prime calculation: {:?}",
failures.len(),
failures
);
} else {
println!("No failures found.");
}
validate_primes_on_cpu(&primes)
}
println!();
sender.send(primes).unwrap();
@ -149,6 +155,23 @@ fn calculate_primes(prime_opts: CalculatePrimes, controller: KernelController) -
Ok(())
}
fn validate_primes_on_cpu(primes: &Vec<u64>) {
println!("Validating...");
let failures = primes
.par_iter()
.filter(|n| !is_prime(**n))
.collect::<Vec<&u64>>();
if failures.len() > 0 {
println!(
"{} failures in prime calculation: {:?}",
failures.len(),
failures
);
} else {
println!("No failures found.");
}
}
fn create_write_thread(mut writer: BufWriter<File>) -> (Sender<Vec<u64>>, JoinHandle<()>) {
let (tx, rx) = channel();
let handle = thread::spawn(move || {

Loading…
Cancel
Save