Initial commit
commit
e6a8dc74e6
@ -0,0 +1,4 @@
|
|||||||
|
/target
|
||||||
|
**/*.rs.bk
|
||||||
|
Cargo.lock
|
||||||
|
*.bdf
|
@ -0,0 +1,2 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/workspace.xml
|
@ -0,0 +1,14 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="CPP_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/examples" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/benches" isTestSource="true" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="JavaScriptSettings">
|
||||||
|
<option name="languageLevel" value="ES6" />
|
||||||
|
</component>
|
||||||
|
</project>
|
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/bdflib.iml" filepath="$PROJECT_DIR$/.idea/bdflib.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
@ -0,0 +1,13 @@
|
|||||||
|
[package]
|
||||||
|
name = "bdflib"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["trivernis <trivernis@gmail.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
|
||||||
|
crc = "1.8.1"
|
||||||
|
xz2 = "0.1.6"
|
||||||
|
byteorder = "1.3.4"
|
@ -0,0 +1,409 @@
|
|||||||
|
use byteorder::{BigEndian, ByteOrder};
|
||||||
|
use crc::crc32;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::convert::{TryFrom};
|
||||||
|
use std::io::{Read};
|
||||||
|
use std::io::{Error, ErrorKind};
|
||||||
|
use xz2::read::{XzDecoder, XzEncoder};
|
||||||
|
|
||||||
|
pub const LZMA: &str = "lzma";
|
||||||
|
|
||||||
|
pub const BDF_HDR: &[u8; 11] = b"BDF\x01RAINBOW";
|
||||||
|
pub const NULL_BYTES: &[u8; 4] = &[0u8; 4];
|
||||||
|
pub const META_CHUNK_NAME: &str = "META";
|
||||||
|
pub const HTBL_CHUNK_NAME: &str = "HTBL";
|
||||||
|
pub const DTBL_CHUNK_NAME: &str = "DTBL";
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct GenericChunk {
|
||||||
|
pub length: u32,
|
||||||
|
pub(crate) name: String,
|
||||||
|
pub data: Vec<u8>,
|
||||||
|
pub crc: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct MetaChunk {
|
||||||
|
pub chunk_count: u32,
|
||||||
|
entries_per_chunk: u32,
|
||||||
|
pub entry_count: u64,
|
||||||
|
pub compression_method: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct HashLookupTable {
|
||||||
|
pub entries: HashMap<u32, HashEntry>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct HashEntry {
|
||||||
|
pub(crate) id: u32,
|
||||||
|
output_length: u32,
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DataEntry {
|
||||||
|
pub plain: String,
|
||||||
|
hashes: HashMap<String, Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GenericChunk {
|
||||||
|
/// Serializes the chunk to a vector of bytes
|
||||||
|
pub fn serialize(&mut self) -> Vec<u8> {
|
||||||
|
let mut serialized: Vec<u8> = Vec::new();
|
||||||
|
let mut length_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut length_raw, self.length);
|
||||||
|
serialized.append(&mut length_raw.to_vec());
|
||||||
|
let name_raw = self.name.as_bytes();
|
||||||
|
serialized.append(&mut name_raw.to_vec());
|
||||||
|
serialized.append(&mut self.data);
|
||||||
|
let mut crc_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut crc_raw, self.crc);
|
||||||
|
serialized.append(&mut crc_raw.to_vec());
|
||||||
|
|
||||||
|
serialized
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the data entries of the chunk
|
||||||
|
pub fn data_entries(
|
||||||
|
&mut self,
|
||||||
|
lookup_table: &HashLookupTable,
|
||||||
|
) -> Result<Vec<DataEntry>, Error> {
|
||||||
|
if self.name == HTBL_CHUNK_NAME.to_string() {
|
||||||
|
return Err(Error::new(ErrorKind::Other, "this is not a data chunk"));
|
||||||
|
}
|
||||||
|
let mut entries: Vec<DataEntry> = Vec::new();
|
||||||
|
let mut position = 0;
|
||||||
|
while self.data.len() > (position + 8) {
|
||||||
|
let entry_length_raw = &self.data[position..position + 4];
|
||||||
|
position += 4;
|
||||||
|
let entry_length = BigEndian::read_u32(entry_length_raw);
|
||||||
|
let entry_end = position + entry_length as usize;
|
||||||
|
let pw_length_raw = &self.data[position..position + 4];
|
||||||
|
position += 4;
|
||||||
|
let pw_length = BigEndian::read_u32(pw_length_raw);
|
||||||
|
let pw_plain_raw = &self.data[position..position + pw_length as usize];
|
||||||
|
position += pw_length as usize;
|
||||||
|
let pw_plain = String::from_utf8(pw_plain_raw.to_vec())
|
||||||
|
.map_err(|err| {
|
||||||
|
format!(
|
||||||
|
"failed to parse plain password string ({}-{}): {:?}",
|
||||||
|
position,
|
||||||
|
position + pw_length as usize,
|
||||||
|
err
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
let mut hash_values: HashMap<String, Vec<u8>> = HashMap::new();
|
||||||
|
while position < entry_end {
|
||||||
|
let entry_id_raw = &self.data[position..position + 4];
|
||||||
|
position += 4;
|
||||||
|
let entry_id = BigEndian::read_u32(entry_id_raw);
|
||||||
|
if let Some(hash_entry) = lookup_table.entries.get(&entry_id) {
|
||||||
|
let hash = &self.data[position..position + hash_entry.output_length as usize];
|
||||||
|
position += hash_entry.output_length as usize;
|
||||||
|
hash_values.insert(hash_entry.name.clone(), hash.to_vec());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entries.push(DataEntry {
|
||||||
|
plain: pw_plain,
|
||||||
|
hashes: hash_values,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Constructs the chunk from a Vec of Data entries and a hash lookup table
|
||||||
|
pub fn from_data_entries(
|
||||||
|
entries: &Vec<DataEntry>,
|
||||||
|
lookup_table: &HashLookupTable,
|
||||||
|
) -> GenericChunk {
|
||||||
|
let mut serialized_data: Vec<u8> = Vec::new();
|
||||||
|
entries.iter().for_each(|entry| {
|
||||||
|
serialized_data.append(&mut entry.serialize(&lookup_table));
|
||||||
|
});
|
||||||
|
let crc_sum = crc32::checksum_ieee(serialized_data.as_slice());
|
||||||
|
|
||||||
|
GenericChunk {
|
||||||
|
length: serialized_data.len() as u32,
|
||||||
|
name: DTBL_CHUNK_NAME.to_string(),
|
||||||
|
data: serialized_data,
|
||||||
|
crc: crc_sum,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compress(&mut self) -> Result<(), Error> {
|
||||||
|
let data = self.data.as_slice();
|
||||||
|
let mut compressor = XzEncoder::new(data, 6);
|
||||||
|
let mut compressed: Vec<u8> = Vec::new();
|
||||||
|
compressor.read_to_end(&mut compressed)?;
|
||||||
|
self.length = compressed.len() as u32;
|
||||||
|
self.data = compressed;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decompress(&mut self) -> Result<(), Error> {
|
||||||
|
let data = self.data.as_slice();
|
||||||
|
let mut decompressor = XzDecoder::new(data);
|
||||||
|
let mut decompressed: Vec<u8> = Vec::new();
|
||||||
|
decompressor.read_to_end(&mut decompressed)?;
|
||||||
|
let crc = crc32::checksum_ieee(decompressed.as_slice());
|
||||||
|
if crc != self.crc {
|
||||||
|
return Err(Error::new(
|
||||||
|
ErrorKind::InvalidData,
|
||||||
|
"the crc doesn't match the decrypted data",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
self.length = decompressed.len() as u32;
|
||||||
|
self.data = decompressed;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&MetaChunk> for GenericChunk {
|
||||||
|
fn from(chunk: &MetaChunk) -> GenericChunk {
|
||||||
|
let serialized_data = chunk.serialize();
|
||||||
|
let crc_sum = crc32::checksum_ieee(serialized_data.as_slice());
|
||||||
|
|
||||||
|
GenericChunk {
|
||||||
|
length: serialized_data.len() as u32,
|
||||||
|
name: META_CHUNK_NAME.to_string(),
|
||||||
|
data: serialized_data,
|
||||||
|
crc: crc_sum,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&HashLookupTable> for GenericChunk {
|
||||||
|
fn from(chunk: &HashLookupTable) -> GenericChunk {
|
||||||
|
let serialized_data = chunk.serialize();
|
||||||
|
let crc_sum = crc32::checksum_ieee(serialized_data.as_slice());
|
||||||
|
|
||||||
|
GenericChunk {
|
||||||
|
length: serialized_data.len() as u32,
|
||||||
|
name: HTBL_CHUNK_NAME.to_string(),
|
||||||
|
data: serialized_data,
|
||||||
|
crc: crc_sum,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MetaChunk {
|
||||||
|
/// Creates a new meta chunk
|
||||||
|
pub fn new(entry_count: u64, entries_per_chunk: u32, compress: bool) -> Self {
|
||||||
|
let compression_method = if compress {
|
||||||
|
Some(LZMA.to_string())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
let chunk_count = (entry_count as f64 / entries_per_chunk as f64).ceil() as u32;
|
||||||
|
|
||||||
|
Self {
|
||||||
|
chunk_count,
|
||||||
|
entry_count,
|
||||||
|
entries_per_chunk,
|
||||||
|
compression_method,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes the chunk into bytes
|
||||||
|
pub fn serialize(&self) -> Vec<u8> {
|
||||||
|
let mut serialized_data: Vec<u8> = Vec::new();
|
||||||
|
let mut chunk_count_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut chunk_count_raw, self.chunk_count);
|
||||||
|
serialized_data.append(&mut chunk_count_raw.to_vec());
|
||||||
|
let mut entries_pc_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut entries_pc_raw, self.entries_per_chunk);
|
||||||
|
serialized_data.append(&mut entries_pc_raw.to_vec());
|
||||||
|
let mut total_entries_raw = [0u8; 8];
|
||||||
|
BigEndian::write_u64(&mut total_entries_raw, self.entry_count);
|
||||||
|
serialized_data.append(&mut total_entries_raw.to_vec());
|
||||||
|
let mut compression_method = self.compression_method.clone();
|
||||||
|
if let Some(method) = &mut compression_method {
|
||||||
|
serialized_data.append(&mut method.clone().into_bytes());
|
||||||
|
} else {
|
||||||
|
serialized_data.append(&mut vec![0, 0, 0, 0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
serialized_data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<GenericChunk> for MetaChunk {
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn try_from(chunk: GenericChunk) -> Result<MetaChunk, Error> {
|
||||||
|
if &chunk.name != META_CHUNK_NAME {
|
||||||
|
return Err(Error::new(
|
||||||
|
ErrorKind::InvalidData,
|
||||||
|
"chunk name doesn't match",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if chunk.data.len() < 20 {
|
||||||
|
return Err(Error::new(ErrorKind::InvalidData, "invalid chunk data"));
|
||||||
|
}
|
||||||
|
let chunk_count_raw = &chunk.data[0..4];
|
||||||
|
let entries_per_chunk = &chunk.data[4..8];
|
||||||
|
let total_number_of_entries = &chunk.data[8..16];
|
||||||
|
let compression_method_raw = chunk.data[16..20].to_vec();
|
||||||
|
let chunk_count = BigEndian::read_u32(chunk_count_raw);
|
||||||
|
let entries_per_chunk = BigEndian::read_u32(entries_per_chunk);
|
||||||
|
let entry_count = BigEndian::read_u64(total_number_of_entries);
|
||||||
|
let compression_method = if &compression_method_raw != NULL_BYTES {
|
||||||
|
Some(
|
||||||
|
String::from_utf8(compression_method_raw)
|
||||||
|
.expect("Failed to parse compression method name!"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(MetaChunk {
|
||||||
|
chunk_count,
|
||||||
|
entries_per_chunk,
|
||||||
|
entry_count,
|
||||||
|
compression_method,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HashLookupTable {
|
||||||
|
pub fn new(entries: HashMap<u32, HashEntry>) -> Self {
|
||||||
|
Self { entries }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an entry by the name of the hash function
|
||||||
|
pub fn get_entry(&self, name: &String) -> Option<(&u32, &HashEntry)> {
|
||||||
|
self.entries.iter().find(|(_, entry)| entry.name == *name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes the lookup table into a vector of bytes
|
||||||
|
pub fn serialize(&self) -> Vec<u8> {
|
||||||
|
let mut serialized_full: Vec<u8> = Vec::new();
|
||||||
|
for (_, entry) in &self.entries {
|
||||||
|
serialized_full.append(entry.serialize().as_mut())
|
||||||
|
}
|
||||||
|
|
||||||
|
serialized_full
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<GenericChunk> for HashLookupTable {
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn try_from(chunk: GenericChunk) -> Result<HashLookupTable, Error> {
|
||||||
|
if &chunk.name != HTBL_CHUNK_NAME {
|
||||||
|
return Err(Error::new(
|
||||||
|
ErrorKind::InvalidData,
|
||||||
|
"chunk name doesn't match",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let mut hash_entries: HashMap<u32, HashEntry> = HashMap::new();
|
||||||
|
let mut position = 0;
|
||||||
|
while chunk.data.len() > (position + 12) {
|
||||||
|
let id_raw = &chunk.data[position..position + 4];
|
||||||
|
position += 4;
|
||||||
|
let output_length_raw = &chunk.data[position..position + 4];
|
||||||
|
position += 4;
|
||||||
|
let name_length_raw = &chunk.data[position..position + 4];
|
||||||
|
position += 4;
|
||||||
|
let id = BigEndian::read_u32(id_raw);
|
||||||
|
let output_length = BigEndian::read_u32(output_length_raw);
|
||||||
|
let name_length = BigEndian::read_u32(name_length_raw);
|
||||||
|
let name_raw = &chunk.data[position..position + name_length as usize];
|
||||||
|
let name =
|
||||||
|
String::from_utf8(name_raw.to_vec()).expect("Failed to parse hash function name!");
|
||||||
|
hash_entries.insert(
|
||||||
|
id,
|
||||||
|
HashEntry {
|
||||||
|
id,
|
||||||
|
output_length,
|
||||||
|
name,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(HashLookupTable {
|
||||||
|
entries: hash_entries,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HashEntry {
|
||||||
|
pub fn new(name: String, output_length: u32) -> Self {
|
||||||
|
Self {
|
||||||
|
id: 0,
|
||||||
|
name,
|
||||||
|
output_length,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes the entry to a vector of bytes
|
||||||
|
pub fn serialize(&self) -> Vec<u8> {
|
||||||
|
let mut serialized: Vec<u8> = Vec::new();
|
||||||
|
let mut id_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut id_raw, self.id);
|
||||||
|
serialized.append(&mut id_raw.to_vec());
|
||||||
|
let mut output_length_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut output_length_raw, self.output_length);
|
||||||
|
serialized.append(&mut output_length_raw.to_vec());
|
||||||
|
let mut name_raw = self.name.clone().into_bytes();
|
||||||
|
let mut name_length_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut name_length_raw, name_raw.len() as u32);
|
||||||
|
serialized.append(&mut name_length_raw.to_vec());
|
||||||
|
serialized.append(&mut name_raw);
|
||||||
|
|
||||||
|
serialized
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DataEntry {
|
||||||
|
pub fn new(plain: String) -> Self {
|
||||||
|
Self {
|
||||||
|
hashes: HashMap::new(),
|
||||||
|
plain,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a hash to the hash values
|
||||||
|
pub fn add_hash_value(&mut self, name: String, value: Vec<u8>) {
|
||||||
|
self.hashes.insert(name, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the hash value for a given name of a hash function
|
||||||
|
pub fn get_hash_value(&self, name: String) -> Option<&Vec<u8>> {
|
||||||
|
self.hashes.get(&name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes the entry to a vector of bytes
|
||||||
|
pub fn serialize(&self, lookup_table: &HashLookupTable) -> Vec<u8> {
|
||||||
|
let mut pw_plain_raw = self.plain.clone().into_bytes();
|
||||||
|
let mut pw_length_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut pw_length_raw, pw_plain_raw.len() as u32);
|
||||||
|
let mut hash_data: Vec<u8> = Vec::new();
|
||||||
|
for (name, value) in &self.hashes {
|
||||||
|
if let Some((id, _)) = lookup_table.get_entry(&name) {
|
||||||
|
let mut id_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(&mut id_raw, *id);
|
||||||
|
hash_data.append(&mut id_raw.to_vec());
|
||||||
|
hash_data.append(&mut value.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut length_total_raw = [0u8; 4];
|
||||||
|
BigEndian::write_u32(
|
||||||
|
&mut length_total_raw,
|
||||||
|
4 + pw_plain_raw.len() as u32 + hash_data.len() as u32,
|
||||||
|
);
|
||||||
|
let mut serialized_data: Vec<u8> = Vec::new();
|
||||||
|
serialized_data.append(&mut length_total_raw.to_vec());
|
||||||
|
serialized_data.append(&mut pw_length_raw.to_vec());
|
||||||
|
serialized_data.append(&mut pw_plain_raw);
|
||||||
|
serialized_data.append(&mut hash_data);
|
||||||
|
|
||||||
|
serialized_data
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,185 @@
|
|||||||
|
use super::chunks::*;
|
||||||
|
use std::io::{Write, BufWriter, ErrorKind, BufReader, Read};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::io::Error;
|
||||||
|
use byteorder::{BigEndian, ByteOrder};
|
||||||
|
use std::convert::TryInto;
|
||||||
|
|
||||||
|
const ENTRIES_PER_CHUNK: u32 = 100_000;
|
||||||
|
|
||||||
|
pub struct BDFReader {
|
||||||
|
reader: BufReader<File>,
|
||||||
|
pub metadata: Option<MetaChunk>,
|
||||||
|
pub lookup_table: Option<HashLookupTable>,
|
||||||
|
compressed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BDFWriter {
|
||||||
|
writer: BufWriter<File>,
|
||||||
|
metadata: MetaChunk,
|
||||||
|
lookup_table: HashLookupTable,
|
||||||
|
data_entries: Vec<DataEntry>,
|
||||||
|
head_written: bool,
|
||||||
|
compressed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BDFWriter {
|
||||||
|
pub fn new(writer: BufWriter<File>, entry_count: u64, compress: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
metadata: MetaChunk::new(entry_count, ENTRIES_PER_CHUNK, compress),
|
||||||
|
lookup_table: HashLookupTable::new(HashMap::new()),
|
||||||
|
data_entries: Vec::new(),
|
||||||
|
writer,
|
||||||
|
head_written: false,
|
||||||
|
compressed: compress,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds an entry to the hash lookup table
|
||||||
|
/// If the lookup table has already been written to the file, an error ris returned
|
||||||
|
pub fn add_lookup_entry(&mut self, mut entry: HashEntry) -> Result<u32, Error> {
|
||||||
|
if self.head_written {
|
||||||
|
return Err(Error::new(
|
||||||
|
ErrorKind::Other,
|
||||||
|
"the head has already been written",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let id = self.lookup_table.entries.len() as u32;
|
||||||
|
entry.id = id;
|
||||||
|
self.lookup_table.entries.insert(id, entry);
|
||||||
|
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a data entry to the file.
|
||||||
|
/// If the number of entries per chunk is reached,
|
||||||
|
/// the data will be written to the file
|
||||||
|
pub fn add_data_entry(&mut self, data_entry: DataEntry) -> Result<(), Error> {
|
||||||
|
self.data_entries.push(data_entry);
|
||||||
|
if self.data_entries.len() >= ENTRIES_PER_CHUNK as usize {
|
||||||
|
self.flush()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes the data to the file
|
||||||
|
pub fn flush(&mut self) -> Result<(), Error> {
|
||||||
|
if !self.head_written {
|
||||||
|
self.writer.write(BDF_HDR)?;
|
||||||
|
let mut generic_meta = GenericChunk::from(&self.metadata);
|
||||||
|
self.writer.write(generic_meta.serialize().as_slice())?;
|
||||||
|
let mut generic_lookup = GenericChunk::from(&self.lookup_table);
|
||||||
|
self.writer.write(generic_lookup.serialize().as_slice())?;
|
||||||
|
self.head_written = true;
|
||||||
|
}
|
||||||
|
let mut data_chunk =
|
||||||
|
GenericChunk::from_data_entries(&self.data_entries, &self.lookup_table);
|
||||||
|
if self.compressed {
|
||||||
|
data_chunk.compress()?;
|
||||||
|
}
|
||||||
|
let data = data_chunk.serialize();
|
||||||
|
self.writer.write(data.as_slice())?;
|
||||||
|
self.data_entries = Vec::new();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flush_writer(&mut self) -> Result<(), Error> {
|
||||||
|
self.writer.flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BDFReader {
|
||||||
|
pub fn new(reader: BufReader<File>) -> Self {
|
||||||
|
Self {
|
||||||
|
metadata: None,
|
||||||
|
lookup_table: None,
|
||||||
|
reader,
|
||||||
|
compressed: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verifies the header of the file and reads and stores the metadata
|
||||||
|
pub fn read_metadata(&mut self) -> Result<&MetaChunk, Error> {
|
||||||
|
if !self.validate_header() {
|
||||||
|
return Err(Error::new(ErrorKind::InvalidData, "invalid BDF Header"));
|
||||||
|
}
|
||||||
|
let meta_chunk: MetaChunk = self.next_chunk()?.try_into()?;
|
||||||
|
if let Some(method) = &meta_chunk.compression_method {
|
||||||
|
if *method == LZMA.to_string() {
|
||||||
|
self.compressed = true;
|
||||||
|
} else {
|
||||||
|
return Err(Error::new(
|
||||||
|
ErrorKind::Other,
|
||||||
|
"unsupported compression method",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.metadata = Some(meta_chunk);
|
||||||
|
|
||||||
|
if let Some(chunk) = &self.metadata {
|
||||||
|
Ok(&chunk)
|
||||||
|
} else {
|
||||||
|
Err(Error::new(
|
||||||
|
ErrorKind::Other,
|
||||||
|
"Failed to read self assigned metadata.",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads the lookup table of the file.
|
||||||
|
/// This function should be called after the read_metadata function was called
|
||||||
|
pub fn read_lookup_table(&mut self) -> Result<&HashLookupTable, Error> {
|
||||||
|
match &self.metadata {
|
||||||
|
None => self.read_metadata()?,
|
||||||
|
Some(t) => t,
|
||||||
|
};
|
||||||
|
let lookup_table: HashLookupTable = self.next_chunk()?.try_into()?;
|
||||||
|
self.lookup_table = Some(lookup_table);
|
||||||
|
|
||||||
|
if let Some(chunk) = &self.lookup_table {
|
||||||
|
Ok(&chunk)
|
||||||
|
} else {
|
||||||
|
Err(Error::new(
|
||||||
|
ErrorKind::Other,
|
||||||
|
"failed to read self assigned chunk",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validates the header of the file
|
||||||
|
fn validate_header(&mut self) -> bool {
|
||||||
|
let mut header = [0u8; 11];
|
||||||
|
let _ = self.reader.read(&mut header);
|
||||||
|
|
||||||
|
header == BDF_HDR.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next chunk if one is available.
|
||||||
|
pub fn next_chunk(&mut self) -> Result<GenericChunk, Error> {
|
||||||
|
let mut length_raw = [0u8; 4];
|
||||||
|
let _ = self.reader.read_exact(&mut length_raw)?;
|
||||||
|
let length = BigEndian::read_u32(&mut length_raw);
|
||||||
|
let mut name_raw = [0u8; 4];
|
||||||
|
let _ = self.reader.read_exact(&mut name_raw)?;
|
||||||
|
let name = String::from_utf8(name_raw.to_vec()).expect("Failed to parse name string.");
|
||||||
|
let mut data = vec![0u8; length as usize];
|
||||||
|
let _ = self.reader.read_exact(&mut data)?;
|
||||||
|
let mut crc_raw = [0u8; 4];
|
||||||
|
let _ = self.reader.read_exact(&mut crc_raw)?;
|
||||||
|
let crc = BigEndian::read_u32(&mut crc_raw);
|
||||||
|
let mut gen_chunk = GenericChunk {
|
||||||
|
length,
|
||||||
|
name,
|
||||||
|
data,
|
||||||
|
crc,
|
||||||
|
};
|
||||||
|
if gen_chunk.name == DTBL_CHUNK_NAME.to_string() && self.compressed {
|
||||||
|
gen_chunk.decompress()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(gen_chunk)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,55 @@
|
|||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::io::BDFWriter;
|
||||||
|
use std::io::{BufWriter, Error};
|
||||||
|
use std::fs::File;
|
||||||
|
use crate::chunks::{HashEntry, DataEntry};
|
||||||
|
|
||||||
|
const FOO: &str = "foo";
|
||||||
|
const BAR: &str = "bar";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn it_writes_uncompressed() -> Result<(), Error> {
|
||||||
|
let file = File::create("tmp.bdf")?;
|
||||||
|
let f = BufWriter::new(file);
|
||||||
|
let mut writer = BDFWriter::new(f, 2, false);
|
||||||
|
writer.add_lookup_entry(HashEntry::new(BAR.to_string(), 5))?;
|
||||||
|
writer.add_lookup_entry(HashEntry::new(FOO.to_string(), 4))?;
|
||||||
|
let mut entry_1 = DataEntry::new("lol".to_string());
|
||||||
|
entry_1.add_hash_value(FOO.to_string(), vec![0, 1, 0, 2]);
|
||||||
|
entry_1.add_hash_value(BAR.to_string(), vec![0, 2, 3, 4, 5]);
|
||||||
|
writer.add_data_entry(entry_1)?;
|
||||||
|
let mut entry_2 = DataEntry::new("lel".to_string());
|
||||||
|
entry_2.add_hash_value(BAR.to_string(), vec![0, 3, 2, 1, 5]);
|
||||||
|
entry_2.add_hash_value(FOO.to_string(), vec![4, 5, 2, 3]);
|
||||||
|
writer.add_data_entry(entry_2)?;
|
||||||
|
writer.flush()?;
|
||||||
|
writer.flush_writer()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn it_writes_compressed() -> Result<(), Error> {
|
||||||
|
let file = File::create("tmp-compressed.bdf")?;
|
||||||
|
let f = BufWriter::new(file);
|
||||||
|
let mut writer = BDFWriter::new(f, 2, true);
|
||||||
|
writer.add_lookup_entry(HashEntry::new(FOO.to_string(), 4))?;
|
||||||
|
writer.add_lookup_entry(HashEntry::new(BAR.to_string(), 5))?;
|
||||||
|
let mut entry_1 = DataEntry::new("lol".to_string());
|
||||||
|
entry_1.add_hash_value(FOO.to_string(), vec![2, 4, 0, 2]);
|
||||||
|
entry_1.add_hash_value(BAR.to_string(), vec![5, 2, 1, 4, 5]);
|
||||||
|
writer.add_data_entry(entry_1)?;
|
||||||
|
let mut entry_2 = DataEntry::new("lel".to_string());
|
||||||
|
entry_2.add_hash_value(BAR.to_string(), vec![0, 3, 2, 1, 5]);
|
||||||
|
entry_2.add_hash_value(FOO.to_string(), vec![4, 5, 2, 3]);
|
||||||
|
writer.add_data_entry(entry_2)?;
|
||||||
|
writer.flush()?;
|
||||||
|
writer.flush_writer()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod chunks;
|
||||||
|
pub mod io;
|
Loading…
Reference in New Issue