diff --git a/Cargo.lock b/Cargo.lock index 17e411a..7999774 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,6 +116,11 @@ dependencies = [ "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "cc" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "cfb-mode" version = "0.3.2" @@ -225,6 +230,7 @@ dependencies = [ "sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "spinners 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -309,6 +315,16 @@ name = "libc" version = "0.2.66" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "lzma-sys" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "memchr" version = "2.3.3" @@ -352,6 +368,11 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "pkg-config" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ppv-lite86" version = "0.2.6" @@ -774,6 +795,14 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "xz2" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lzma-sys 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" @@ -792,6 +821,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" +"checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" "checksum cfb-mode 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "190e7b55d3a27cf8879becf61035a141cbc783f3258a41d16d1706719f991345" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" @@ -813,12 +843,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum hermit-abi 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "e2c55f143919fbc0bc77e427fe2d74cf23786d7c1875666f2fde3ac3c659bb67" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" +"checksum lzma-sys 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "53e48818fd597d46155132bbbb9505d6d1b3d360b4ee25cfa91c406f8a90fe91" "checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" "checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" "checksum num_cpus 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6" "checksum numtoa 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" "checksum opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" "checksum pbr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4403eb718d70c03ee279e51737782902c68cca01e870a33b6a2f9dfb50b9cd83" +"checksum pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)" = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" "checksum proc-macro-error 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "052b3c9af39c7e5e94245f820530487d19eb285faedcb40e0c3275132293f242" "checksum proc-macro-error-attr 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d175bef481c7902e63e3165627123fff3502f06ac043d3ef42d08c1246da9253" @@ -871,3 +903,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +"checksum xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c179869f34fc7c01830d3ce7ea2086bc3a07e0d35289b667d0a8bf910258926c" diff --git a/Cargo.toml b/Cargo.toml index 2b9c057..507c5e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,4 +19,5 @@ pbr = "1.0.2" spinners = "1.2.0" regex = "1.3.4" byteorder = "1.3.4" -crc = "1.8.1" \ No newline at end of file +crc = "1.8.1" +xz2 = "0.1.6" \ No newline at end of file diff --git a/src/lib/hash.rs b/src/lib/hash.rs index c194220..d32f6d6 100644 --- a/src/lib/hash.rs +++ b/src/lib/hash.rs @@ -12,6 +12,15 @@ pub fn create_key(pw: &str) -> Vec { key.to_vec().clone() } +/// Hashes a text to a 32 bytes long key. +pub fn sha256(pw: &str) -> Vec { + let mut hasher = Sha256::default(); + hasher.input(pw); + let result = hasher.result(); + + result.to_vec().clone() +} + /// Creates a sha256 hashsum from the input data pub fn sha_checksum(data: &Vec) -> Vec { let mut hasher = Sha256::default(); diff --git a/src/lib/rainbowutils.rs b/src/lib/rainbowutils.rs index 19fd6ad..72d01af 100644 --- a/src/lib/rainbowutils.rs +++ b/src/lib/rainbowutils.rs @@ -4,7 +4,7 @@ use rayon::prelude::*; use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::fs::File; -use std::io::{BufReader, BufWriter, Read}; +use std::io::{BufReader, BufWriter, Read, Write}; use std::io::{Error, ErrorKind}; pub const LZMA: &str = "lzma"; @@ -14,6 +14,7 @@ pub const NULL_BYTES: &[u8; 4] = &[0u8; 4]; pub const META_CHUNK_NAME: &str = "META"; pub const HTBL_CHUNK_NAME: &str = "HTBL"; pub const DTBL_CHUNK_NAME: &str = "DTBL"; +pub const ENTRIES_PER_CHUNK: u32 = 100_000; pub struct BDFReader { reader: BufReader, @@ -25,6 +26,8 @@ pub struct BDFWriter { writer: BufWriter, metadata: MetaChunk, lookup_table: HashLookupTable, + data_entries: Vec, + head_written: bool, } #[derive(Debug, Clone)] @@ -39,7 +42,7 @@ pub struct GenericChunk { pub struct MetaChunk { chunk_count: u32, entries_per_chunk: u32, - entry_count: u32, + entry_count: u64, compression_method: Option, } @@ -62,13 +65,66 @@ pub struct DataEntry { } impl BDFWriter { - pub fn new(writer: BufWriter, compress: bool) -> Self { + pub fn new(writer: BufWriter, entry_count: u64, compress: bool) -> Self { Self { - metadata: MetaChunk::new(0, 0, compress), + metadata: MetaChunk::new(entry_count, 0, compress), lookup_table: HashLookupTable::new(HashMap::new()), + data_entries: Vec::new(), writer, + head_written: false, } } + + /// Adds an entry to the hash lookup table + /// If the lookup table has already been written to the file, an error ris returned + pub fn add_lookup_entry(&mut self, mut entry: HashEntry) -> Result { + if self.head_written { + return Err(Error::new( + ErrorKind::Other, + "the head has already been written", + )); + } + let id = self.lookup_table.entries.len() as u32; + entry.id = id; + self.lookup_table.entries.insert(id, entry); + + Ok(id) + } + + /// Adds a data entry to the file. + /// If the number of entries per chunk is reached, + /// the data will be written to the file + pub fn add_data_entry(&mut self, data_entry: DataEntry) -> Result<(), Error> { + self.data_entries.push(data_entry); + if self.data_entries.len() >= ENTRIES_PER_CHUNK as usize { + self.flush()?; + } + + Ok(()) + } + + /// Writes the data to the file + pub fn flush(&mut self) -> Result<(), Error> { + if !self.head_written { + self.writer.write(BDF_HDR)?; + let mut generic_meta = GenericChunk::from(&self.metadata); + self.writer.write(generic_meta.serialize().as_slice())?; + let mut generic_lookup = GenericChunk::from(&self.lookup_table); + self.writer.write(generic_lookup.serialize().as_slice())?; + self.head_written = true; + } + let mut data_chunk = + GenericChunk::from_data_entries(&self.data_entries, &self.lookup_table); + let data = data_chunk.serialize(); + self.writer.write(data.as_slice())?; + self.data_entries = Vec::new(); + + Ok(()) + } + + pub fn flush_writer(&mut self) -> Result<(), Error> { + self.writer.flush() + } } impl BDFReader { @@ -207,8 +263,8 @@ impl GenericChunk { /// Constructs the chunk from a Vec of Data entries and a hash lookup table pub fn from_data_entries( - entries: Vec, - lookup_table: HashLookupTable, + entries: &Vec, + lookup_table: &HashLookupTable, ) -> GenericChunk { let mut serialized_data: Vec = Vec::new(); let serialized_entries: Vec> = entries @@ -229,8 +285,8 @@ impl GenericChunk { } } -impl From for GenericChunk { - fn from(chunk: MetaChunk) -> GenericChunk { +impl From<&MetaChunk> for GenericChunk { + fn from(chunk: &MetaChunk) -> GenericChunk { let serialized_data = chunk.serialize(); let crc_sum = crc32::checksum_ieee(serialized_data.as_slice()); @@ -243,8 +299,8 @@ impl From for GenericChunk { } } -impl From for GenericChunk { - fn from(chunk: HashLookupTable) -> GenericChunk { +impl From<&HashLookupTable> for GenericChunk { + fn from(chunk: &HashLookupTable) -> GenericChunk { let serialized_data = chunk.serialize(); let crc_sum = crc32::checksum_ieee(serialized_data.as_slice()); @@ -259,7 +315,7 @@ impl From for GenericChunk { impl MetaChunk { /// Creates a new meta chunk - pub fn new(entry_count: u32, entries_per_chunk: u32, compress: bool) -> Self { + pub fn new(entry_count: u64, entries_per_chunk: u32, compress: bool) -> Self { let compression_method = if compress { Some(LZMA.to_string()) } else { @@ -284,8 +340,8 @@ impl MetaChunk { let mut entries_pc_raw = [0u8; 4]; BigEndian::write_u32(&mut entries_pc_raw, self.entries_per_chunk); serialized_data.append(&mut entries_pc_raw.to_vec()); - let mut total_entries_raw = [0u8; 4]; - BigEndian::write_u32(&mut total_entries_raw, self.entry_count); + let mut total_entries_raw = [0u8; 8]; + BigEndian::write_u64(&mut total_entries_raw, self.entry_count); serialized_data.append(&mut total_entries_raw.to_vec()); let mut compression_method = self.compression_method.clone(); if let Some(method) = &mut compression_method { @@ -308,16 +364,16 @@ impl TryFrom for MetaChunk { "chunk name doesn't match", )); } - if chunk.data.len() < 16 { + if chunk.data.len() < 20 { return Err(Error::new(ErrorKind::InvalidData, "invalid chunk data")); } let chunk_count_raw = &chunk.data[0..4]; let entries_per_chunk = &chunk.data[4..8]; - let total_number_of_entries = &chunk.data[8..12]; - let compression_method_raw = chunk.data[12..16].to_vec(); + let total_number_of_entries = &chunk.data[8..16]; + let compression_method_raw = chunk.data[16..20].to_vec(); let chunk_count = BigEndian::read_u32(chunk_count_raw); let entries_per_chunk = BigEndian::read_u32(entries_per_chunk); - let entry_count = BigEndian::read_u32(total_number_of_entries); + let entry_count = BigEndian::read_u64(total_number_of_entries); let compression_method = if &compression_method_raw != NULL_BYTES { Some( String::from_utf8(compression_method_raw) @@ -398,6 +454,15 @@ impl TryFrom for HashLookupTable { } impl HashEntry { + pub fn new(name: String, output_length: u32) -> Self { + Self { + id: 0, + name, + output_length, + } + } + + /// Serializes the entry to a vector of bytes pub fn serialize(&self) -> Vec { let mut serialized: Vec = Vec::new(); let mut id_raw = [0u8; 4]; @@ -417,6 +482,19 @@ impl HashEntry { } impl DataEntry { + pub fn new(plain: String) -> Self { + Self { + hashes: HashMap::new(), + plain, + } + } + + /// Adds a hash to the hash values + pub fn add_hash_value(&mut self, name: String, value: Vec) { + self.hashes.insert(name, value); + } + + /// Serializes the entry to a vector of bytes pub fn serialize(&self, lookup_table: HashLookupTable) -> Vec { let mut pw_plain_raw = self.plain.clone().into_bytes(); let mut pw_length_raw = [0u8; 4]; diff --git a/src/main.rs b/src/main.rs index f5024dd..fa8031d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use crate::lib::crypt::{ decrypt_brute_brute_force, decrypt_data, decrypt_with_dictionary, encrypt_data, }; use crate::lib::hash::{create_key, sha_checksum, PassKey}; +use crate::lib::rainbowutils::{BDFWriter, DataEntry, HashEntry}; use pbr::ProgressBar; use rayon::prelude::*; use rayon::str; @@ -153,6 +154,7 @@ fn decrypt(_opts: &Opts, args: &Decrypt) { } } +const SHA256: &str = "sha256"; /// Creates a dictionary from an input file and writes it to the output file fn create_dictionary(_opts: &Opts, args: &CreateDictionary) { let input: String = (*args.input).parse().unwrap(); @@ -163,30 +165,36 @@ fn create_dictionary(_opts: &Opts, args: &CreateDictionary) { { let content = fs::read_to_string(input).expect("Failed to read content"); let lines = content.par_lines(); - let mut pb; - { - pb = ProgressBar::new(lines.clone().count() as u64); - } + let entry_count = lines.clone().count() as u64; + let mut pb = ProgressBar::new(entry_count); pb.set_max_refresh_rate(Some(Duration::from_millis(200))); - let (rx, tx) = sync_channel::(100_000_000); + let (rx, tx) = sync_channel::(100_000_000); + let mut bdf_file = BDFWriter::new(writer, entry_count, false); + bdf_file.add_lookup_entry(HashEntry::new(SHA256.to_string(), 32)); handle = thread::spawn(move || { - for line in tx { - writer.write(&line.as_bytes()).unwrap(); + for entry in tx { + bdf_file.add_data_entry(entry); pb.inc(); } pb.finish(); - writer.flush().expect("Failed to flush the file writer."); + bdf_file.flush(); + bdf_file + .flush_writer() + .expect("Failed to flush the file writer."); }); let re = Regex::new("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F\\t\\r\\a\\n]").unwrap(); lines .map(|line| -> String { re.replace_all(line, "").to_string() }) - .map(|pw| -> String { + .map(|pw| -> DataEntry { let key = create_key(pw.replace("\t", "").as_ref()); - let key_base64 = base64::encode(key.as_slice()); - format!("{}\t{}\n", pw, key_base64) + let mut data_entry = DataEntry::new(pw); + data_entry.add_hash_value(SHA256.to_string(), key); + + data_entry }) - .for_each_with(rx, |rx, line| { - rx.send(line).expect("Failed to send value to channel."); + .for_each_with(rx, |rx, data_entry| { + rx.send(data_entry) + .expect("Failed to send value to channel."); }); } if let Err(_err) = handle.join() {