commit e6a8dc74e6990f2f96e0254fea8859214f0784c4 Author: trivernis Date: Fri Mar 13 13:59:24 2020 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0480932 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +**/*.rs.bk +Cargo.lock +*.bdf \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..5c98b42 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,2 @@ +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/.idea/bdflib.iml b/.idea/bdflib.iml new file mode 100644 index 0000000..b7b4242 --- /dev/null +++ b/.idea/bdflib.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..28a804d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..9f77491 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..fc0ede4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "bdflib" +version = "0.1.0" +authors = ["trivernis "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +crc = "1.8.1" +xz2 = "0.1.6" +byteorder = "1.3.4" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a12e117 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +bdflib diff --git a/src/chunks.rs b/src/chunks.rs new file mode 100644 index 0000000..c96f747 --- /dev/null +++ b/src/chunks.rs @@ -0,0 +1,409 @@ +use byteorder::{BigEndian, ByteOrder}; +use crc::crc32; +use std::collections::HashMap; +use std::convert::{TryFrom}; +use std::io::{Read}; +use std::io::{Error, ErrorKind}; +use xz2::read::{XzDecoder, XzEncoder}; + +pub const LZMA: &str = "lzma"; + +pub const BDF_HDR: &[u8; 11] = b"BDF\x01RAINBOW"; +pub const NULL_BYTES: &[u8; 4] = &[0u8; 4]; +pub const META_CHUNK_NAME: &str = "META"; +pub const HTBL_CHUNK_NAME: &str = "HTBL"; +pub const DTBL_CHUNK_NAME: &str = "DTBL"; + +#[derive(Debug, Clone)] +pub struct GenericChunk { + pub length: u32, + pub(crate) name: String, + pub data: Vec, + pub crc: u32, +} + +#[derive(Debug, Clone)] +pub struct MetaChunk { + pub chunk_count: u32, + entries_per_chunk: u32, + pub entry_count: u64, + pub compression_method: Option, +} + +#[derive(Debug, Clone)] +pub struct HashLookupTable { + pub entries: HashMap, +} + +#[derive(Debug, Clone)] +pub struct HashEntry { + pub(crate) id: u32, + output_length: u32, + name: String, +} + +#[derive(Debug, Clone)] +pub struct DataEntry { + pub plain: String, + hashes: HashMap>, +} + +impl GenericChunk { + /// Serializes the chunk to a vector of bytes + pub fn serialize(&mut self) -> Vec { + let mut serialized: Vec = Vec::new(); + let mut length_raw = [0u8; 4]; + BigEndian::write_u32(&mut length_raw, self.length); + serialized.append(&mut length_raw.to_vec()); + let name_raw = self.name.as_bytes(); + serialized.append(&mut name_raw.to_vec()); + serialized.append(&mut self.data); + let mut crc_raw = [0u8; 4]; + BigEndian::write_u32(&mut crc_raw, self.crc); + serialized.append(&mut crc_raw.to_vec()); + + serialized + } + + /// Returns the data entries of the chunk + pub fn data_entries( + &mut self, + lookup_table: &HashLookupTable, + ) -> Result, Error> { + if self.name == HTBL_CHUNK_NAME.to_string() { + return Err(Error::new(ErrorKind::Other, "this is not a data chunk")); + } + let mut entries: Vec = Vec::new(); + let mut position = 0; + while self.data.len() > (position + 8) { + let entry_length_raw = &self.data[position..position + 4]; + position += 4; + let entry_length = BigEndian::read_u32(entry_length_raw); + let entry_end = position + entry_length as usize; + let pw_length_raw = &self.data[position..position + 4]; + position += 4; + let pw_length = BigEndian::read_u32(pw_length_raw); + let pw_plain_raw = &self.data[position..position + pw_length as usize]; + position += pw_length as usize; + let pw_plain = String::from_utf8(pw_plain_raw.to_vec()) + .map_err(|err| { + format!( + "failed to parse plain password string ({}-{}): {:?}", + position, + position + pw_length as usize, + err + ) + }) + .unwrap(); + let mut hash_values: HashMap> = HashMap::new(); + while position < entry_end { + let entry_id_raw = &self.data[position..position + 4]; + position += 4; + let entry_id = BigEndian::read_u32(entry_id_raw); + if let Some(hash_entry) = lookup_table.entries.get(&entry_id) { + let hash = &self.data[position..position + hash_entry.output_length as usize]; + position += hash_entry.output_length as usize; + hash_values.insert(hash_entry.name.clone(), hash.to_vec()); + } + } + entries.push(DataEntry { + plain: pw_plain, + hashes: hash_values, + }) + } + + Ok(entries) + } + + /// Constructs the chunk from a Vec of Data entries and a hash lookup table + pub fn from_data_entries( + entries: &Vec, + lookup_table: &HashLookupTable, + ) -> GenericChunk { + let mut serialized_data: Vec = Vec::new(); + entries.iter().for_each(|entry| { + serialized_data.append(&mut entry.serialize(&lookup_table)); + }); + let crc_sum = crc32::checksum_ieee(serialized_data.as_slice()); + + GenericChunk { + length: serialized_data.len() as u32, + name: DTBL_CHUNK_NAME.to_string(), + data: serialized_data, + crc: crc_sum, + } + } + + pub fn compress(&mut self) -> Result<(), Error> { + let data = self.data.as_slice(); + let mut compressor = XzEncoder::new(data, 6); + let mut compressed: Vec = Vec::new(); + compressor.read_to_end(&mut compressed)?; + self.length = compressed.len() as u32; + self.data = compressed; + + Ok(()) + } + + pub fn decompress(&mut self) -> Result<(), Error> { + let data = self.data.as_slice(); + let mut decompressor = XzDecoder::new(data); + let mut decompressed: Vec = Vec::new(); + decompressor.read_to_end(&mut decompressed)?; + let crc = crc32::checksum_ieee(decompressed.as_slice()); + if crc != self.crc { + return Err(Error::new( + ErrorKind::InvalidData, + "the crc doesn't match the decrypted data", + )); + } + self.length = decompressed.len() as u32; + self.data = decompressed; + + Ok(()) + } +} + +impl From<&MetaChunk> for GenericChunk { + fn from(chunk: &MetaChunk) -> GenericChunk { + let serialized_data = chunk.serialize(); + let crc_sum = crc32::checksum_ieee(serialized_data.as_slice()); + + GenericChunk { + length: serialized_data.len() as u32, + name: META_CHUNK_NAME.to_string(), + data: serialized_data, + crc: crc_sum, + } + } +} + +impl From<&HashLookupTable> for GenericChunk { + fn from(chunk: &HashLookupTable) -> GenericChunk { + let serialized_data = chunk.serialize(); + let crc_sum = crc32::checksum_ieee(serialized_data.as_slice()); + + GenericChunk { + length: serialized_data.len() as u32, + name: HTBL_CHUNK_NAME.to_string(), + data: serialized_data, + crc: crc_sum, + } + } +} + +impl MetaChunk { + /// Creates a new meta chunk + pub fn new(entry_count: u64, entries_per_chunk: u32, compress: bool) -> Self { + let compression_method = if compress { + Some(LZMA.to_string()) + } else { + None + }; + let chunk_count = (entry_count as f64 / entries_per_chunk as f64).ceil() as u32; + + Self { + chunk_count, + entry_count, + entries_per_chunk, + compression_method, + } + } + + /// Serializes the chunk into bytes + pub fn serialize(&self) -> Vec { + let mut serialized_data: Vec = Vec::new(); + let mut chunk_count_raw = [0u8; 4]; + BigEndian::write_u32(&mut chunk_count_raw, self.chunk_count); + serialized_data.append(&mut chunk_count_raw.to_vec()); + let mut entries_pc_raw = [0u8; 4]; + BigEndian::write_u32(&mut entries_pc_raw, self.entries_per_chunk); + serialized_data.append(&mut entries_pc_raw.to_vec()); + let mut total_entries_raw = [0u8; 8]; + BigEndian::write_u64(&mut total_entries_raw, self.entry_count); + serialized_data.append(&mut total_entries_raw.to_vec()); + let mut compression_method = self.compression_method.clone(); + if let Some(method) = &mut compression_method { + serialized_data.append(&mut method.clone().into_bytes()); + } else { + serialized_data.append(&mut vec![0, 0, 0, 0]); + } + + serialized_data + } +} + +impl TryFrom for MetaChunk { + type Error = Error; + + fn try_from(chunk: GenericChunk) -> Result { + if &chunk.name != META_CHUNK_NAME { + return Err(Error::new( + ErrorKind::InvalidData, + "chunk name doesn't match", + )); + } + if chunk.data.len() < 20 { + return Err(Error::new(ErrorKind::InvalidData, "invalid chunk data")); + } + let chunk_count_raw = &chunk.data[0..4]; + let entries_per_chunk = &chunk.data[4..8]; + let total_number_of_entries = &chunk.data[8..16]; + let compression_method_raw = chunk.data[16..20].to_vec(); + let chunk_count = BigEndian::read_u32(chunk_count_raw); + let entries_per_chunk = BigEndian::read_u32(entries_per_chunk); + let entry_count = BigEndian::read_u64(total_number_of_entries); + let compression_method = if &compression_method_raw != NULL_BYTES { + Some( + String::from_utf8(compression_method_raw) + .expect("Failed to parse compression method name!"), + ) + } else { + None + }; + + Ok(MetaChunk { + chunk_count, + entries_per_chunk, + entry_count, + compression_method, + }) + } +} + +impl HashLookupTable { + pub fn new(entries: HashMap) -> Self { + Self { entries } + } + + /// Returns an entry by the name of the hash function + pub fn get_entry(&self, name: &String) -> Option<(&u32, &HashEntry)> { + self.entries.iter().find(|(_, entry)| entry.name == *name) + } + + /// Serializes the lookup table into a vector of bytes + pub fn serialize(&self) -> Vec { + let mut serialized_full: Vec = Vec::new(); + for (_, entry) in &self.entries { + serialized_full.append(entry.serialize().as_mut()) + } + + serialized_full + } +} + +impl TryFrom for HashLookupTable { + type Error = Error; + + fn try_from(chunk: GenericChunk) -> Result { + if &chunk.name != HTBL_CHUNK_NAME { + return Err(Error::new( + ErrorKind::InvalidData, + "chunk name doesn't match", + )); + } + let mut hash_entries: HashMap = HashMap::new(); + let mut position = 0; + while chunk.data.len() > (position + 12) { + let id_raw = &chunk.data[position..position + 4]; + position += 4; + let output_length_raw = &chunk.data[position..position + 4]; + position += 4; + let name_length_raw = &chunk.data[position..position + 4]; + position += 4; + let id = BigEndian::read_u32(id_raw); + let output_length = BigEndian::read_u32(output_length_raw); + let name_length = BigEndian::read_u32(name_length_raw); + let name_raw = &chunk.data[position..position + name_length as usize]; + let name = + String::from_utf8(name_raw.to_vec()).expect("Failed to parse hash function name!"); + hash_entries.insert( + id, + HashEntry { + id, + output_length, + name, + }, + ); + } + Ok(HashLookupTable { + entries: hash_entries, + }) + } +} + +impl HashEntry { + pub fn new(name: String, output_length: u32) -> Self { + Self { + id: 0, + name, + output_length, + } + } + + /// Serializes the entry to a vector of bytes + pub fn serialize(&self) -> Vec { + let mut serialized: Vec = Vec::new(); + let mut id_raw = [0u8; 4]; + BigEndian::write_u32(&mut id_raw, self.id); + serialized.append(&mut id_raw.to_vec()); + let mut output_length_raw = [0u8; 4]; + BigEndian::write_u32(&mut output_length_raw, self.output_length); + serialized.append(&mut output_length_raw.to_vec()); + let mut name_raw = self.name.clone().into_bytes(); + let mut name_length_raw = [0u8; 4]; + BigEndian::write_u32(&mut name_length_raw, name_raw.len() as u32); + serialized.append(&mut name_length_raw.to_vec()); + serialized.append(&mut name_raw); + + serialized + } +} + +impl DataEntry { + pub fn new(plain: String) -> Self { + Self { + hashes: HashMap::new(), + plain, + } + } + + /// Adds a hash to the hash values + pub fn add_hash_value(&mut self, name: String, value: Vec) { + self.hashes.insert(name, value); + } + + /// Returns the hash value for a given name of a hash function + pub fn get_hash_value(&self, name: String) -> Option<&Vec> { + self.hashes.get(&name) + } + + /// Serializes the entry to a vector of bytes + pub fn serialize(&self, lookup_table: &HashLookupTable) -> Vec { + let mut pw_plain_raw = self.plain.clone().into_bytes(); + let mut pw_length_raw = [0u8; 4]; + BigEndian::write_u32(&mut pw_length_raw, pw_plain_raw.len() as u32); + let mut hash_data: Vec = Vec::new(); + for (name, value) in &self.hashes { + if let Some((id, _)) = lookup_table.get_entry(&name) { + let mut id_raw = [0u8; 4]; + BigEndian::write_u32(&mut id_raw, *id); + hash_data.append(&mut id_raw.to_vec()); + hash_data.append(&mut value.clone()) + } + } + + let mut length_total_raw = [0u8; 4]; + BigEndian::write_u32( + &mut length_total_raw, + 4 + pw_plain_raw.len() as u32 + hash_data.len() as u32, + ); + let mut serialized_data: Vec = Vec::new(); + serialized_data.append(&mut length_total_raw.to_vec()); + serialized_data.append(&mut pw_length_raw.to_vec()); + serialized_data.append(&mut pw_plain_raw); + serialized_data.append(&mut hash_data); + + serialized_data + } +} diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..aa29318 --- /dev/null +++ b/src/io.rs @@ -0,0 +1,185 @@ +use super::chunks::*; +use std::io::{Write, BufWriter, ErrorKind, BufReader, Read}; +use std::fs::File; +use std::collections::HashMap; +use std::io::Error; +use byteorder::{BigEndian, ByteOrder}; +use std::convert::TryInto; + +const ENTRIES_PER_CHUNK: u32 = 100_000; + +pub struct BDFReader { + reader: BufReader, + pub metadata: Option, + pub lookup_table: Option, + compressed: bool, +} + +pub struct BDFWriter { + writer: BufWriter, + metadata: MetaChunk, + lookup_table: HashLookupTable, + data_entries: Vec, + head_written: bool, + compressed: bool, +} + +impl BDFWriter { + pub fn new(writer: BufWriter, entry_count: u64, compress: bool) -> Self { + Self { + metadata: MetaChunk::new(entry_count, ENTRIES_PER_CHUNK, compress), + lookup_table: HashLookupTable::new(HashMap::new()), + data_entries: Vec::new(), + writer, + head_written: false, + compressed: compress, + } + } + + /// Adds an entry to the hash lookup table + /// If the lookup table has already been written to the file, an error ris returned + pub fn add_lookup_entry(&mut self, mut entry: HashEntry) -> Result { + if self.head_written { + return Err(Error::new( + ErrorKind::Other, + "the head has already been written", + )); + } + let id = self.lookup_table.entries.len() as u32; + entry.id = id; + self.lookup_table.entries.insert(id, entry); + + Ok(id) + } + + /// Adds a data entry to the file. + /// If the number of entries per chunk is reached, + /// the data will be written to the file + pub fn add_data_entry(&mut self, data_entry: DataEntry) -> Result<(), Error> { + self.data_entries.push(data_entry); + if self.data_entries.len() >= ENTRIES_PER_CHUNK as usize { + self.flush()?; + } + + Ok(()) + } + + /// Writes the data to the file + pub fn flush(&mut self) -> Result<(), Error> { + if !self.head_written { + self.writer.write(BDF_HDR)?; + let mut generic_meta = GenericChunk::from(&self.metadata); + self.writer.write(generic_meta.serialize().as_slice())?; + let mut generic_lookup = GenericChunk::from(&self.lookup_table); + self.writer.write(generic_lookup.serialize().as_slice())?; + self.head_written = true; + } + let mut data_chunk = + GenericChunk::from_data_entries(&self.data_entries, &self.lookup_table); + if self.compressed { + data_chunk.compress()?; + } + let data = data_chunk.serialize(); + self.writer.write(data.as_slice())?; + self.data_entries = Vec::new(); + + Ok(()) + } + + pub fn flush_writer(&mut self) -> Result<(), Error> { + self.writer.flush() + } +} + +impl BDFReader { + pub fn new(reader: BufReader) -> Self { + Self { + metadata: None, + lookup_table: None, + reader, + compressed: false, + } + } + + /// Verifies the header of the file and reads and stores the metadata + pub fn read_metadata(&mut self) -> Result<&MetaChunk, Error> { + if !self.validate_header() { + return Err(Error::new(ErrorKind::InvalidData, "invalid BDF Header")); + } + let meta_chunk: MetaChunk = self.next_chunk()?.try_into()?; + if let Some(method) = &meta_chunk.compression_method { + if *method == LZMA.to_string() { + self.compressed = true; + } else { + return Err(Error::new( + ErrorKind::Other, + "unsupported compression method", + )); + } + } + self.metadata = Some(meta_chunk); + + if let Some(chunk) = &self.metadata { + Ok(&chunk) + } else { + Err(Error::new( + ErrorKind::Other, + "Failed to read self assigned metadata.", + )) + } + } + + /// Reads the lookup table of the file. + /// This function should be called after the read_metadata function was called + pub fn read_lookup_table(&mut self) -> Result<&HashLookupTable, Error> { + match &self.metadata { + None => self.read_metadata()?, + Some(t) => t, + }; + let lookup_table: HashLookupTable = self.next_chunk()?.try_into()?; + self.lookup_table = Some(lookup_table); + + if let Some(chunk) = &self.lookup_table { + Ok(&chunk) + } else { + Err(Error::new( + ErrorKind::Other, + "failed to read self assigned chunk", + )) + } + } + + /// Validates the header of the file + fn validate_header(&mut self) -> bool { + let mut header = [0u8; 11]; + let _ = self.reader.read(&mut header); + + header == BDF_HDR.as_ref() + } + + /// Returns the next chunk if one is available. + pub fn next_chunk(&mut self) -> Result { + let mut length_raw = [0u8; 4]; + let _ = self.reader.read_exact(&mut length_raw)?; + let length = BigEndian::read_u32(&mut length_raw); + let mut name_raw = [0u8; 4]; + let _ = self.reader.read_exact(&mut name_raw)?; + let name = String::from_utf8(name_raw.to_vec()).expect("Failed to parse name string."); + let mut data = vec![0u8; length as usize]; + let _ = self.reader.read_exact(&mut data)?; + let mut crc_raw = [0u8; 4]; + let _ = self.reader.read_exact(&mut crc_raw)?; + let crc = BigEndian::read_u32(&mut crc_raw); + let mut gen_chunk = GenericChunk { + length, + name, + data, + crc, + }; + if gen_chunk.name == DTBL_CHUNK_NAME.to_string() && self.compressed { + gen_chunk.decompress()?; + } + + Ok(gen_chunk) + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..30fe66e --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,55 @@ +#[cfg(test)] +mod tests { + use super::io::BDFWriter; + use std::io::{BufWriter, Error}; + use std::fs::File; + use crate::chunks::{HashEntry, DataEntry}; + + const FOO: &str = "foo"; + const BAR: &str = "bar"; + + #[test] + fn it_writes_uncompressed() -> Result<(), Error> { + let file = File::create("tmp.bdf")?; + let f = BufWriter::new(file); + let mut writer = BDFWriter::new(f, 2, false); + writer.add_lookup_entry(HashEntry::new(BAR.to_string(), 5))?; + writer.add_lookup_entry(HashEntry::new(FOO.to_string(), 4))?; + let mut entry_1 = DataEntry::new("lol".to_string()); + entry_1.add_hash_value(FOO.to_string(), vec![0, 1, 0, 2]); + entry_1.add_hash_value(BAR.to_string(), vec![0, 2, 3, 4, 5]); + writer.add_data_entry(entry_1)?; + let mut entry_2 = DataEntry::new("lel".to_string()); + entry_2.add_hash_value(BAR.to_string(), vec![0, 3, 2, 1, 5]); + entry_2.add_hash_value(FOO.to_string(), vec![4, 5, 2, 3]); + writer.add_data_entry(entry_2)?; + writer.flush()?; + writer.flush_writer()?; + + Ok(()) + } + + #[test] + fn it_writes_compressed() -> Result<(), Error> { + let file = File::create("tmp-compressed.bdf")?; + let f = BufWriter::new(file); + let mut writer = BDFWriter::new(f, 2, true); + writer.add_lookup_entry(HashEntry::new(FOO.to_string(), 4))?; + writer.add_lookup_entry(HashEntry::new(BAR.to_string(), 5))?; + let mut entry_1 = DataEntry::new("lol".to_string()); + entry_1.add_hash_value(FOO.to_string(), vec![2, 4, 0, 2]); + entry_1.add_hash_value(BAR.to_string(), vec![5, 2, 1, 4, 5]); + writer.add_data_entry(entry_1)?; + let mut entry_2 = DataEntry::new("lel".to_string()); + entry_2.add_hash_value(BAR.to_string(), vec![0, 3, 2, 1, 5]); + entry_2.add_hash_value(FOO.to_string(), vec![4, 5, 2, 3]); + writer.add_data_entry(entry_2)?; + writer.flush()?; + writer.flush_writer()?; + + Ok(()) + } +} + +pub mod chunks; +pub mod io; \ No newline at end of file