Add bdf read and write implementation

master
trivernis 5 years ago
parent 4ef3c00693
commit 4494234a59

@ -1,4 +1,4 @@
use crate::lib::hash::{sha_checksum, PassKey}; use crate::lib::hash::sha_checksum;
use cfb_mode::stream_cipher::{NewStreamCipher, StreamCipher}; use cfb_mode::stream_cipher::{NewStreamCipher, StreamCipher};
use cfb_mode::Cfb; use cfb_mode::Cfb;
use des::Des; use des::Des;
@ -33,7 +33,7 @@ pub fn decrypt_data(data: &[u8], key: &[u8]) -> Vec<u8> {
/// Decrypts data using a dictionary /// Decrypts data using a dictionary
pub fn decrypt_with_dictionary( pub fn decrypt_with_dictionary(
data: &[u8], data: &[u8],
dict: Vec<PassKey>, dict: Vec<(&String, Vec<u8>)>,
checksum: &[u8], checksum: &[u8],
) -> Option<Vec<u8>> { ) -> Option<Vec<u8>> {
let decrypted = Mutex::<Option<Vec<u8>>>::new(None); let decrypted = Mutex::<Option<Vec<u8>>>::new(None);

@ -6,6 +6,7 @@ use std::convert::{TryFrom, TryInto};
use std::fs::File; use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write}; use std::io::{BufReader, BufWriter, Read, Write};
use std::io::{Error, ErrorKind}; use std::io::{Error, ErrorKind};
use xz2::read::{XzDecoder, XzEncoder};
pub const LZMA: &str = "lzma"; pub const LZMA: &str = "lzma";
@ -18,8 +19,9 @@ pub const ENTRIES_PER_CHUNK: u32 = 100_000;
pub struct BDFReader { pub struct BDFReader {
reader: BufReader<File>, reader: BufReader<File>,
metadata: Option<MetaChunk>, pub metadata: Option<MetaChunk>,
lookup_table: Option<HashLookupTable>, pub lookup_table: Option<HashLookupTable>,
compressed: bool,
} }
pub struct BDFWriter { pub struct BDFWriter {
@ -28,6 +30,7 @@ pub struct BDFWriter {
lookup_table: HashLookupTable, lookup_table: HashLookupTable,
data_entries: Vec<DataEntry>, data_entries: Vec<DataEntry>,
head_written: bool, head_written: bool,
compressed: bool,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -40,10 +43,10 @@ pub struct GenericChunk {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct MetaChunk { pub struct MetaChunk {
chunk_count: u32, pub chunk_count: u32,
entries_per_chunk: u32, entries_per_chunk: u32,
entry_count: u64, pub entry_count: u64,
compression_method: Option<String>, pub compression_method: Option<String>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -60,18 +63,19 @@ pub struct HashEntry {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct DataEntry { pub struct DataEntry {
plain: String, pub plain: String,
hashes: HashMap<String, Vec<u8>>, hashes: HashMap<String, Vec<u8>>,
} }
impl BDFWriter { impl BDFWriter {
pub fn new(writer: BufWriter<File>, entry_count: u64, compress: bool) -> Self { pub fn new(writer: BufWriter<File>, entry_count: u64, compress: bool) -> Self {
Self { Self {
metadata: MetaChunk::new(entry_count, 0, compress), metadata: MetaChunk::new(entry_count, ENTRIES_PER_CHUNK, compress),
lookup_table: HashLookupTable::new(HashMap::new()), lookup_table: HashLookupTable::new(HashMap::new()),
data_entries: Vec::new(), data_entries: Vec::new(),
writer, writer,
head_written: false, head_written: false,
compressed: compress,
} }
} }
@ -115,6 +119,9 @@ impl BDFWriter {
} }
let mut data_chunk = let mut data_chunk =
GenericChunk::from_data_entries(&self.data_entries, &self.lookup_table); GenericChunk::from_data_entries(&self.data_entries, &self.lookup_table);
if self.compressed {
data_chunk.compress()?;
}
let data = data_chunk.serialize(); let data = data_chunk.serialize();
self.writer.write(data.as_slice())?; self.writer.write(data.as_slice())?;
self.data_entries = Vec::new(); self.data_entries = Vec::new();
@ -133,6 +140,7 @@ impl BDFReader {
metadata: None, metadata: None,
lookup_table: None, lookup_table: None,
reader, reader,
compressed: false,
} }
} }
@ -142,6 +150,16 @@ impl BDFReader {
return Err(Error::new(ErrorKind::InvalidData, "invalid BDF Header")); return Err(Error::new(ErrorKind::InvalidData, "invalid BDF Header"));
} }
let meta_chunk: MetaChunk = self.next_chunk()?.try_into()?; let meta_chunk: MetaChunk = self.next_chunk()?.try_into()?;
if let Some(method) = &meta_chunk.compression_method {
if *method == LZMA.to_string() {
self.compressed = true;
} else {
return Err(Error::new(
ErrorKind::Other,
"unsupported compression method",
));
}
}
self.metadata = Some(meta_chunk); self.metadata = Some(meta_chunk);
if let Some(chunk) = &self.metadata { if let Some(chunk) = &self.metadata {
@ -195,13 +213,17 @@ impl BDFReader {
let mut crc_raw = [0u8; 4]; let mut crc_raw = [0u8; 4];
let _ = self.reader.read_exact(&mut crc_raw)?; let _ = self.reader.read_exact(&mut crc_raw)?;
let crc = BigEndian::read_u32(&mut crc_raw); let crc = BigEndian::read_u32(&mut crc_raw);
let mut gen_chunk = GenericChunk {
Ok(GenericChunk {
length, length,
name, name,
data, data,
crc, crc,
}) };
if gen_chunk.name == DTBL_CHUNK_NAME.to_string() && self.compressed {
gen_chunk.decompress()?;
}
Ok(gen_chunk)
} }
} }
@ -227,9 +249,12 @@ impl GenericChunk {
&mut self, &mut self,
lookup_table: &HashLookupTable, lookup_table: &HashLookupTable,
) -> Result<Vec<DataEntry>, Error> { ) -> Result<Vec<DataEntry>, Error> {
if self.name == HTBL_CHUNK_NAME.to_string() {
return Err(Error::new(ErrorKind::Other, "this is not a data chunk"));
}
let mut entries: Vec<DataEntry> = Vec::new(); let mut entries: Vec<DataEntry> = Vec::new();
let mut position = 0; let mut position = 0;
while self.data.len() > position { while self.data.len() > (position + 8) {
let entry_length_raw = &self.data[position..position + 4]; let entry_length_raw = &self.data[position..position + 4];
position += 4; position += 4;
let entry_length = BigEndian::read_u32(entry_length_raw); let entry_length = BigEndian::read_u32(entry_length_raw);
@ -240,7 +265,15 @@ impl GenericChunk {
let pw_plain_raw = &self.data[position..position + pw_length as usize]; let pw_plain_raw = &self.data[position..position + pw_length as usize];
position += pw_length as usize; position += pw_length as usize;
let pw_plain = String::from_utf8(pw_plain_raw.to_vec()) let pw_plain = String::from_utf8(pw_plain_raw.to_vec())
.expect("failed to parse plain password string"); .map_err(|err| {
format!(
"failed to parse plain password string ({}-{}): {:?}",
position,
position + pw_length as usize,
err
)
})
.unwrap();
let mut hash_values: HashMap<String, Vec<u8>> = HashMap::new(); let mut hash_values: HashMap<String, Vec<u8>> = HashMap::new();
while position < entry_end { while position < entry_end {
let entry_id_raw = &self.data[position..position + 4]; let entry_id_raw = &self.data[position..position + 4];
@ -283,6 +316,35 @@ impl GenericChunk {
crc: crc_sum, crc: crc_sum,
} }
} }
pub fn compress(&mut self) -> Result<(), Error> {
let data = self.data.as_slice();
let mut compressor = XzEncoder::new(data, 6);
let mut compressed: Vec<u8> = Vec::new();
compressor.read_to_end(&mut compressed)?;
self.length = compressed.len() as u32;
self.data = compressed;
Ok(())
}
pub fn decompress(&mut self) -> Result<(), Error> {
let data = self.data.as_slice();
let mut decompressor = XzDecoder::new(data);
let mut decompressed: Vec<u8> = Vec::new();
decompressor.read_to_end(&mut decompressed)?;
let crc = crc32::checksum_ieee(decompressed.as_slice());
if crc != self.crc {
return Err(Error::new(
ErrorKind::InvalidData,
"the crc doesn't match the decrypted data",
));
}
self.length = decompressed.len() as u32;
self.data = decompressed;
Ok(())
}
} }
impl From<&MetaChunk> for GenericChunk { impl From<&MetaChunk> for GenericChunk {
@ -321,7 +383,7 @@ impl MetaChunk {
} else { } else {
None None
}; };
let chunk_count = (entry_count as f32 / entries_per_chunk as f32).ceil() as u32; let chunk_count = (entry_count as f64 / entries_per_chunk as f64).ceil() as u32;
Self { Self {
chunk_count, chunk_count,
@ -494,6 +556,11 @@ impl DataEntry {
self.hashes.insert(name, value); self.hashes.insert(name, value);
} }
/// Returns the hash value for a given name of a hash function
pub fn get_hash_value(&self, name: String) -> Option<&Vec<u8>> {
self.hashes.get(&name)
}
/// Serializes the entry to a vector of bytes /// Serializes the entry to a vector of bytes
pub fn serialize(&self, lookup_table: HashLookupTable) -> Vec<u8> { pub fn serialize(&self, lookup_table: HashLookupTable) -> Vec<u8> {
let mut pw_plain_raw = self.plain.clone().into_bytes(); let mut pw_plain_raw = self.plain.clone().into_bytes();

@ -3,8 +3,8 @@ pub mod lib;
use crate::lib::crypt::{ use crate::lib::crypt::{
decrypt_brute_brute_force, decrypt_data, decrypt_with_dictionary, encrypt_data, decrypt_brute_brute_force, decrypt_data, decrypt_with_dictionary, encrypt_data,
}; };
use crate::lib::hash::{create_key, sha_checksum, PassKey}; use crate::lib::hash::{create_key, sha256, sha_checksum};
use crate::lib::rainbowutils::{BDFWriter, DataEntry, HashEntry}; use crate::lib::rainbowutils::{BDFReader, BDFWriter, DataEntry, HashEntry, HashLookupTable};
use pbr::ProgressBar; use pbr::ProgressBar;
use rayon::prelude::*; use rayon::prelude::*;
use rayon::str; use rayon::str;
@ -12,9 +12,10 @@ use regex::Regex;
use rpassword; use rpassword;
use rpassword::read_password_from_tty; use rpassword::read_password_from_tty;
use spinners::{Spinner, Spinners}; use spinners::{Spinner, Spinners};
use std::collections::HashMap;
use std::fs; use std::fs;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write}; use std::io::{BufReader, BufWriter};
use std::sync::mpsc::sync_channel; use std::sync::mpsc::sync_channel;
use std::thread; use std::thread;
use std::time::Duration; use std::time::Duration;
@ -85,8 +86,12 @@ struct CreateDictionary {
input: String, input: String,
/// The output dictionary file /// The output dictionary file
#[structopt(short = "o", long = "output", default_value = "dictionary.tsv")] #[structopt(short = "o", long = "output", default_value = "dictionary.bdf")]
output: String, output: String,
/// If the dictionary file should be compressed
#[structopt(short = "c", long = "compress")]
compress: bool,
} }
fn main() { fn main() {
@ -160,7 +165,7 @@ fn create_dictionary(_opts: &Opts, args: &CreateDictionary) {
let input: String = (*args.input).parse().unwrap(); let input: String = (*args.input).parse().unwrap();
// TODO: Some form of removing duplicates (without itertools) // TODO: Some form of removing duplicates (without itertools)
let fout = File::create(args.output.clone()).unwrap(); let fout = File::create(args.output.clone()).unwrap();
let mut writer = BufWriter::new(fout); let writer = BufWriter::new(fout);
let handle; let handle;
{ {
let content = fs::read_to_string(input).expect("Failed to read content"); let content = fs::read_to_string(input).expect("Failed to read content");
@ -169,24 +174,28 @@ fn create_dictionary(_opts: &Opts, args: &CreateDictionary) {
let mut pb = ProgressBar::new(entry_count); let mut pb = ProgressBar::new(entry_count);
pb.set_max_refresh_rate(Some(Duration::from_millis(200))); pb.set_max_refresh_rate(Some(Duration::from_millis(200)));
let (rx, tx) = sync_channel::<DataEntry>(100_000_000); let (rx, tx) = sync_channel::<DataEntry>(100_000_000);
let mut bdf_file = BDFWriter::new(writer, entry_count, false); let mut bdf_file = BDFWriter::new(writer, entry_count, args.compress);
bdf_file.add_lookup_entry(HashEntry::new(SHA256.to_string(), 32)); bdf_file
.add_lookup_entry(HashEntry::new(SHA256.to_string(), 32))
.expect("Failed to add lookup entry");
handle = thread::spawn(move || { handle = thread::spawn(move || {
for entry in tx { for entry in tx {
bdf_file.add_data_entry(entry); if let Err(e) = bdf_file.add_data_entry(entry) {
println!("{:?}", e);
}
pb.inc(); pb.inc();
} }
pb.finish(); pb.finish();
bdf_file.flush(); bdf_file.flush().expect("failed to flush the file data");
bdf_file bdf_file
.flush_writer() .flush_writer()
.expect("Failed to flush the file writer."); .expect("failed to flush the file writer");
}); });
let re = Regex::new("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F\\t\\r\\a\\n]").unwrap(); let re = Regex::new("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F\\t\\r\\a\\n]").unwrap();
lines lines
.map(|line| -> String { re.replace_all(line, "").to_string() }) .map(|line| -> String { re.replace_all(line, "").to_string() })
.map(|pw| -> DataEntry { .map(|pw| -> DataEntry {
let key = create_key(pw.replace("\t", "").as_ref()); let key = sha256(&pw);
let mut data_entry = DataEntry::new(pw); let mut data_entry = DataEntry::new(pw);
data_entry.add_hash_value(SHA256.to_string(), key); data_entry.add_hash_value(SHA256.to_string(), key);
@ -208,8 +217,6 @@ fn spinner(text: &str) -> Spinner {
Spinner::new(Spinners::Dots2, text.into()) Spinner::new(Spinners::Dots2, text.into())
} }
const LINES_PER_CHUNK: usize = 10000000;
fn decrypt_with_dictionary_file( fn decrypt_with_dictionary_file(
filename: String, filename: String,
data: &Vec<u8>, data: &Vec<u8>,
@ -218,36 +225,37 @@ fn decrypt_with_dictionary_file(
let sp = spinner("Reading dictionary..."); let sp = spinner("Reading dictionary...");
let f = File::open(&filename).expect("Failed to open dictionary file."); let f = File::open(&filename).expect("Failed to open dictionary file.");
let reader = BufReader::new(f); let reader = BufReader::new(f);
let mut pb = let mut bdf_file = BDFReader::new(reader);
ProgressBar::new((get_line_count(&filename) as f64 / LINES_PER_CHUNK as f64).ceil() as u64); bdf_file
let (rx, tx) = sync_channel::<Vec<String>>(10); .read_metadata()
.expect("failed to read the metadata of the file");
let mut chunk_count = 0;
if let Some(meta) = &bdf_file.metadata {
chunk_count = meta.chunk_count;
}
let mut pb = ProgressBar::new(chunk_count as u64);
let (rx, tx) = sync_channel::<Vec<DataEntry>>(10);
let _handle = thread::spawn(move || { let _handle = thread::spawn(move || {
let mut line_vec: Vec<String> = vec![]; let mut lookup_table = HashLookupTable::new(HashMap::new());
reader.lines().for_each(|line_result| { if let Ok(table) = bdf_file.read_lookup_table() {
if line_vec.len() > LINES_PER_CHUNK { lookup_table = table.clone();
if let Err(_) = rx.send(line_vec.clone()) {} }
line_vec.clear(); while let Ok(next_chunk) = &mut bdf_file.next_chunk() {
} if let Ok(entries) = next_chunk.data_entries(&lookup_table) {
match line_result { if let Err(_) = rx.send(entries) {}
Ok(line) => line_vec.push(line),
Err(err) => eprintln!("Failed with err {}", err),
} }
}); }
if let Err(_) = rx.send(line_vec.clone()) {}
line_vec.clear();
}); });
sp.stop(); sp.stop();
let mut result_data: Option<Vec<u8>> = None; let mut result_data: Option<Vec<u8>> = None;
for lines in tx { for entries in tx {
let pw_table: Vec<PassKey> = lines let pw_table: Vec<(&String, Vec<u8>)> = entries
.par_iter() .par_iter()
.map(|line| { .map(|entry: &DataEntry| {
let parts: Vec<&str> = line.split("\t").collect::<Vec<&str>>(); let pw = &entry.plain;
let pw = parts[0].parse().unwrap(); let key: &Vec<u8> = entry.get_hash_value(SHA256.to_string()).unwrap();
let key_str: String = parts[1].parse().unwrap();
let key = base64::decode(&key_str).unwrap();
(pw, key) (pw, key[0..8].to_vec())
}) })
.collect(); .collect();
pb.inc(); pb.inc();
@ -259,9 +267,3 @@ fn decrypt_with_dictionary_file(
pb.finish(); pb.finish();
result_data result_data
} }
fn get_line_count(fname: &str) -> usize {
let f = File::open(fname).expect("Failed to open file to get the line count.");
let reader = BufReader::new(f);
return reader.lines().count();
}

Loading…
Cancel
Save