From f5ad767d22f3c0ef01eeebe64723162ae6d12a36 Mon Sep 17 00:00:00 2001 From: trivernis Date: Sat, 26 Sep 2020 20:19:34 +0200 Subject: [PATCH] Add scanning of chunk data in nbt format Signed-off-by: trivernis --- Cargo.lock | 81 +++++++++++++++++++- Cargo.toml | 5 +- src/chunk.rs | 120 +++++++++++++++++++++++++++++ src/lib.rs | 2 + src/nbt.rs | 183 +++++++++++++++++++++++++++++++++++++++++++++ src/region_file.rs | 52 ++++++------- src/scan.rs | 23 +++++- 7 files changed, 432 insertions(+), 34 deletions(-) create mode 100644 src/chunk.rs create mode 100644 src/nbt.rs diff --git a/Cargo.lock b/Cargo.lock index 95d3b3a..6602e16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,11 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "adler" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" + [[package]] name = "aho-corasick" version = "0.7.13" @@ -47,6 +53,12 @@ version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +[[package]] +name = "cc" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef611cc68ff783f18535d77ddd080185275713d852c4f5cbb6122c462a7a825c" + [[package]] name = "cfg-if" version = "0.1.10" @@ -68,6 +80,15 @@ dependencies = [ "vec_map", ] +[[package]] +name = "cmake" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e56268c17a6248366d66d4a47a3381369d068cce8409bb1716ed77ea32163bb" +dependencies = [ + "cc", +] + [[package]] name = "colored" version = "2.0.0" @@ -96,6 +117,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "crc32fast" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.4.4" @@ -168,6 +198,19 @@ dependencies = [ "termcolor", ] +[[package]] +name = "flate2" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "766d0e77a2c1502169d4a93ff3b8c15a71fd946cd0126309752104e5f3c46d94" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "libz-sys", + "miniz_oxide", +] + [[package]] name = "heck" version = "0.3.1" @@ -219,6 +262,19 @@ version = "0.2.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235" +[[package]] +name = "libz-sys" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "602113192b08db8f38796c4e85c39e960c145965140e918018bcde1952429655" +dependencies = [ + "cc", + "cmake", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "log" version = "0.4.11" @@ -251,17 +307,28 @@ dependencies = [ [[package]] name = "minecraft-regions-tool" -version = "0.3.0" +version = "0.4.0" dependencies = [ "byteorder", "colored", "env_logger", + "flate2", "indicatif", "log", "rayon", "structopt", ] +[[package]] +name = "miniz_oxide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c60c0dfe32c10b43a144bad8fc83538c52f58302c92300ea7ec7bf7b38d5a7b9" +dependencies = [ + "adler", + "autocfg", +] + [[package]] name = "num_cpus" version = "1.13.0" @@ -278,6 +345,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" +[[package]] +name = "pkg-config" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -480,6 +553,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +[[package]] +name = "vcpkg" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c" + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 1ed1451..bbf52a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minecraft-regions-tool" -version = "0.3.0" +version = "0.4.0" authors = ["trivernis "] edition = "2018" license = "GPL-3.0" @@ -17,4 +17,5 @@ rayon = "1.4.0" indicatif = "0.15.0" log = "0.4.11" env_logger ="0.7.1" -colored = "2.0.0" \ No newline at end of file +colored = "2.0.0" +flate2 = {version = "1.0", features = ["zlib-ng-compat"]} \ No newline at end of file diff --git a/src/chunk.rs b/src/chunk.rs new file mode 100644 index 0000000..b1931ea --- /dev/null +++ b/src/chunk.rs @@ -0,0 +1,120 @@ +use crate::nbt::{NBTError, NBTReader, NBTValue}; +use crate::region_file::BLOCK_SIZE; +use byteorder::{BigEndian, ByteOrder, ReadBytesExt}; + +use flate2::bufread::ZlibDecoder; +use std::fmt::{Display, Formatter}; +use std::fs::File; +use std::io::{self, BufReader, Error, Read, Seek, SeekFrom}; + +type IOResult = io::Result; + +const TAG_LEVEL: &str = "Level"; +const TAG_X_POS: &str = "xPos"; +const TAG_Z_POS: &str = "zPos"; + +#[derive(Debug)] +pub struct Chunk { + pub length: u32, + pub compression_type: u8, + nbt_raw: Vec, +} + +impl Chunk { + pub fn from_buf_reader(reader: &mut BufReader, include_nbt: bool) -> IOResult { + let mut length_raw = [0u8; 4]; + reader.read_exact(&mut length_raw)?; + let length = BigEndian::read_u32(&length_raw); + let compression_type = reader.read_u8()?; + + let mut nbt_raw = Vec::new(); + if include_nbt { + for _ in 0..((length - 1) as f32 / BLOCK_SIZE as f32).ceil() as u8 { + let mut buffer = [0u8; BLOCK_SIZE]; + reader.read(&mut buffer)?; + nbt_raw.append(&mut buffer.to_vec()); + } + nbt_raw.truncate((length - 1) as usize); + } + + if length > 0 { + reader.seek(SeekFrom::Current((length - 1) as i64))?; + } else { + reader.seek(SeekFrom::Current((length) as i64))?; + } + + Ok(Self { + compression_type, + length, + nbt_raw, + }) + } + + pub fn validate_nbt_data(&mut self) -> Result<(), ChunkScanError> { + if self.compression_type == 2 { + let mut decoder = ZlibDecoder::new(&self.nbt_raw[..]); + let mut data = Vec::new(); + decoder.read_to_end(&mut data)?; + self.nbt_raw = data; + } + let mut reader = NBTReader::new(&self.nbt_raw[..]); + let data = reader.parse()?; + + if !data.contains_key(TAG_LEVEL) { + Err(ChunkScanError::MissingTag(TAG_LEVEL)) + } else { + let lvl_data = &data[TAG_LEVEL]; + + if let NBTValue::Compound(lvl_data) = lvl_data { + if !lvl_data.contains_key(TAG_X_POS) { + Err(ChunkScanError::MissingTag(TAG_X_POS)) + } else if !lvl_data.contains_key(TAG_Z_POS) { + Err(ChunkScanError::MissingTag(TAG_Z_POS)) + } else { + Ok(()) + } + } else { + Err(ChunkScanError::InvalidFormat(TAG_LEVEL)) + } + } + } +} + +#[derive(Debug)] +pub enum ChunkScanError { + String(String), + IO(io::Error), + NBTError(NBTError), + MissingTag(&'static str), + InvalidFormat(&'static str), +} + +impl Display for ChunkScanError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(s) => write!(f, "{}", s), + Self::IO(io) => write!(f, "IO Error: {}", io), + Self::NBTError(nbt) => write!(f, "NBT Error: {}", nbt), + Self::MissingTag(tag) => write!(f, "Missing Tag in NBT Data: {}", tag), + Self::InvalidFormat(tag) => write!(f, "Unexpected data format for NBT Tag {}", tag), + } + } +} + +impl From for ChunkScanError { + fn from(io_err: Error) -> Self { + Self::IO(io_err) + } +} + +impl From for ChunkScanError { + fn from(nbt: NBTError) -> Self { + Self::NBTError(nbt) + } +} + +impl From for ChunkScanError { + fn from(err: String) -> Self { + Self::String(err) + } +} diff --git a/src/lib.rs b/src/lib.rs index f3b4e59..504fb3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +pub mod chunk; +pub mod nbt; pub mod region_file; pub mod scan; pub mod world_folder; diff --git a/src/nbt.rs b/src/nbt.rs new file mode 100644 index 0000000..724f0af --- /dev/null +++ b/src/nbt.rs @@ -0,0 +1,183 @@ +use byteorder::{BigEndian, ReadBytesExt}; +use std::collections::HashMap; +use std::error::Error; +use std::fmt::{self, Display, Formatter}; +use std::io::{self, Read}; + +pub struct NBTReader { + inner: Box, +} + +type NBTResult = Result; + +impl NBTReader +where + R: io::Read, +{ + pub fn new(inner: R) -> Self { + Self { + inner: Box::new(inner), + } + } + + /// Parses the contents of the reader + pub fn parse(&mut self) -> NBTResult> { + let tag = self.inner.read_u8()?; + + if tag != 10 { + return Err(NBTError::MissingRootTag); + } + let _ = self.parse_string()?; + + self.parse_compound() + } + + /// Parses a compound tag + fn parse_compound(&mut self) -> NBTResult> { + let mut root_value = HashMap::new(); + loop { + let tag = self.inner.read_u8()?; + if tag == 0 { + break; + } + let name = self.parse_string()?; + + let value = match tag { + 1 => NBTValue::Byte(self.inner.read_u8()?), + 2 => NBTValue::Short(self.inner.read_i16::()?), + 3 => NBTValue::Int(self.inner.read_i32::()?), + 4 => NBTValue::Long(self.inner.read_i64::()?), + 5 => NBTValue::Float(self.inner.read_f32::()?), + 6 => NBTValue::Double(self.inner.read_f64::()?), + 7 => NBTValue::ByteArray(self.parse_byte_array()?), + 8 => NBTValue::String(self.parse_string()?), + 9 => NBTValue::List(self.parse_list()?), + 10 => NBTValue::Compound(self.parse_compound()?), + 11 => NBTValue::IntArray(self.parse_int_array()?), + 12 => NBTValue::LongArray(self.parse_long_array()?), + _ => return Err(NBTError::InvalidTag(tag)), + }; + root_value.insert(name, value); + } + + Ok(root_value) + } + + /// Parses an array of bytes + fn parse_byte_array(&mut self) -> NBTResult> { + let length = self.inner.read_u32::()?; + let mut buf = Vec::with_capacity(length as usize); + self.inner.read_exact(&mut buf)?; + + Ok(buf) + } + + /// Parses a string value + fn parse_string(&mut self) -> NBTResult { + let length = self.inner.read_u16::()?; + if length == 0 { + return Ok(String::new()); + } + let mut buf = Vec::with_capacity(length as usize); + for _ in 0..length { + buf.push(self.inner.read_u8()?); + } + + String::from_utf8(buf).map_err(|_| NBTError::InvalidName) + } + + /// Parses a list of nbt values + fn parse_list(&mut self) -> NBTResult> { + let tag = self.inner.read_u8()?; + let length = self.inner.read_u32::()?; + + let parse_fn: Box NBTResult> = match tag { + 0 => Box::new(|_| Ok(NBTValue::Null)), + 1 => Box::new(|nbt| Ok(NBTValue::Byte(nbt.inner.read_u8()?))), + 2 => Box::new(|nbt| Ok(NBTValue::Short(nbt.inner.read_i16::()?))), + 3 => Box::new(|nbt| Ok(NBTValue::Int(nbt.inner.read_i32::()?))), + 4 => Box::new(|nbt| Ok(NBTValue::Long(nbt.inner.read_i64::()?))), + 5 => Box::new(|nbt| Ok(NBTValue::Float(nbt.inner.read_f32::()?))), + 6 => Box::new(|nbt| Ok(NBTValue::Double(nbt.inner.read_f64::()?))), + 7 => Box::new(|nbt| Ok(NBTValue::ByteArray(nbt.parse_byte_array()?))), + 8 => Box::new(|nbt| Ok(NBTValue::String(nbt.parse_string()?))), + 9 => Box::new(|nbt| Ok(NBTValue::List(nbt.parse_list()?))), + 11 => Box::new(|nbt| Ok(NBTValue::IntArray(nbt.parse_int_array()?))), + 10 => Box::new(|nbt| Ok(NBTValue::Compound(nbt.parse_compound()?))), + 12 => Box::new(|nbt| Ok(NBTValue::LongArray(nbt.parse_long_array()?))), + _ => return Err(NBTError::InvalidTag(tag)), + }; + let mut items = Vec::new(); + for _ in 0..length { + items.push(parse_fn(self)?); + } + + Ok(items) + } + + /// Parses an array of 32 bit integers + fn parse_int_array(&mut self) -> NBTResult> { + let length = self.inner.read_u32::()?; + let mut items = Vec::new(); + for _ in 0..length { + items.push(self.inner.read_i32::()?); + } + + Ok(items) + } + + /// Parses an array of 64 bit integers + fn parse_long_array(&mut self) -> NBTResult> { + let length = self.inner.read_u32::()?; + let mut items = Vec::new(); + for _ in 0..length { + items.push(self.inner.read_i64::()?); + } + + Ok(items) + } +} + +#[derive(Clone, Debug)] +pub enum NBTValue { + Null, + Byte(u8), + Short(i16), + Int(i32), + Long(i64), + Float(f32), + Double(f64), + ByteArray(Vec), + String(String), + List(Vec), + Compound(HashMap), + IntArray(Vec), + LongArray(Vec), +} + +#[derive(Debug)] +pub enum NBTError { + IO(io::Error), + MissingRootTag, + InvalidTag(u8), + InvalidName, +} + +impl Display for NBTError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::IO(io) => write!(f, "IO Error: {}", io), + Self::InvalidTag(tag) => write!(f, "Invalid Tag: 0x{:x}", tag), + Self::MissingRootTag => write!(f, "Missing root tag!"), + Self::InvalidName => write!(f, "Encountered invalid tag name"), + } + } +} + +impl Error for NBTError {} + +impl From for NBTError { + fn from(io_err: io::Error) -> Self { + Self::IO(io_err) + } +} diff --git a/src/region_file.rs b/src/region_file.rs index 3104066..f4ae776 100644 --- a/src/region_file.rs +++ b/src/region_file.rs @@ -1,10 +1,11 @@ +use crate::chunk::{Chunk, ChunkScanError}; use crate::scan::ScanStatistics; -use byteorder::{BigEndian, ByteOrder, ReadBytesExt, WriteBytesExt}; +use byteorder::{BigEndian, ByteOrder, WriteBytesExt}; use std::fs::{File, OpenOptions}; use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write}; use std::path::PathBuf; -const BLOCK_SIZE: usize = 4096; +pub const BLOCK_SIZE: usize = 4096; pub struct RegionFile { reader: BufReader, @@ -51,8 +52,8 @@ impl RegionFile { let reader_offset = *offset as u64 * BLOCK_SIZE as u64; self.reader.seek(SeekFrom::Start(reader_offset))?; - match self.read_chunk() { - Ok(chunk) => { + match Chunk::from_buf_reader(&mut self.reader, true) { + Ok(mut chunk) => { let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil(); if chunk.compression_type > 3 { @@ -61,6 +62,23 @@ impl RegionFile { self.writer.seek(SeekFrom::Start(reader_offset + 4))?; self.writer.write_u8(1)?; } + } else { + if let Err(e) = chunk.validate_nbt_data() { + match e { + ChunkScanError::IO(e) => { + log::debug!( + "Compression error when reading chunk {}: {}", + offset, + e + ); + statistic.corrupted_compression += 1; + } + _ => { + log::debug!("Missing nbt for chunk {}: {}", offset, e); + statistic.missing_nbt += 1; + } + } + } } if *sections != chunk_sections as u8 || chunk.length >= 1_048_576 { @@ -71,6 +89,7 @@ impl RegionFile { } } Err(e) => { + statistic.failed_to_read += 1; log::error!("Failed to read chunk at {}: {}", offset, e); } } @@ -86,25 +105,6 @@ impl RegionFile { Ok(statistic) } - - /// Reads a chunk at the current location - fn read_chunk(&mut self) -> Result { - let mut length_raw = [0u8; 4]; - self.reader.read_exact(&mut length_raw)?; - let length = BigEndian::read_u32(&length_raw); - let compression_type = self.reader.read_u8()?; - - if length > 0 { - self.reader.seek(SeekFrom::Current((length - 1) as i64))?; - } else { - self.reader.seek(SeekFrom::Current((length) as i64))?; - } - - Ok(Chunk { - length, - compression_type, - }) - } } #[derive(Debug)] @@ -182,9 +182,3 @@ impl Timestamps { Self { inner: timestamps } } } - -#[derive(Debug)] -pub struct Chunk { - pub length: u32, - pub compression_type: u8, -} diff --git a/src/scan.rs b/src/scan.rs index 9a2dd0b..9fbd6d4 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -6,6 +6,9 @@ pub struct ScanStatistics { pub total_chunks: u64, pub invalid_length: u64, pub invalid_compression_method: u64, + pub missing_nbt: u64, + pub failed_to_read: u64, + pub corrupted_compression: u64, } impl ScanStatistics { @@ -14,6 +17,9 @@ impl ScanStatistics { total_chunks: 0, invalid_length: 0, invalid_compression_method: 0, + missing_nbt: 0, + corrupted_compression: 0, + failed_to_read: 0, } } } @@ -25,6 +31,9 @@ impl Add for ScanStatistics { self.invalid_length += rhs.invalid_length; self.total_chunks += rhs.total_chunks; self.invalid_compression_method += rhs.invalid_compression_method; + self.failed_to_read += rhs.failed_to_read; + self.missing_nbt += rhs.missing_nbt; + self.corrupted_compression += rhs.corrupted_compression; self } @@ -34,8 +43,18 @@ impl Display for ScanStatistics { fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!( f, - "Total Chunks: {}\nChunks with invalid length: {}\nChunks with invalid compression method: {}", - self.total_chunks, self.invalid_length, self.invalid_compression_method + "Total Chunks: {} + Failed to Read: {} + Chunks with invalid length: {} + Chunks with invalid compression method: {} + Chunks with missing nbt data: {} + Chunks with corrupted compressed data {}", + self.total_chunks, + self.failed_to_read, + self.invalid_length, + self.invalid_compression_method, + self.missing_nbt, + self.corrupted_compression ) } }