Add scanning of chunk data in nbt format

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 4 years ago
parent 701991b1ab
commit f5ad767d22
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

81
Cargo.lock generated

@ -1,5 +1,11 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
[[package]]
name = "adler"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "0.7.13" version = "0.7.13"
@ -47,6 +53,12 @@ version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
[[package]]
name = "cc"
version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef611cc68ff783f18535d77ddd080185275713d852c4f5cbb6122c462a7a825c"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "0.1.10" version = "0.1.10"
@ -68,6 +80,15 @@ dependencies = [
"vec_map", "vec_map",
] ]
[[package]]
name = "cmake"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e56268c17a6248366d66d4a47a3381369d068cce8409bb1716ed77ea32163bb"
dependencies = [
"cc",
]
[[package]] [[package]]
name = "colored" name = "colored"
version = "2.0.0" version = "2.0.0"
@ -96,6 +117,15 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "crc32fast"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.4.4" version = "0.4.4"
@ -168,6 +198,19 @@ dependencies = [
"termcolor", "termcolor",
] ]
[[package]]
name = "flate2"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "766d0e77a2c1502169d4a93ff3b8c15a71fd946cd0126309752104e5f3c46d94"
dependencies = [
"cfg-if",
"crc32fast",
"libc",
"libz-sys",
"miniz_oxide",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.3.1" version = "0.3.1"
@ -219,6 +262,19 @@ version = "0.2.77"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235" checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235"
[[package]]
name = "libz-sys"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "602113192b08db8f38796c4e85c39e960c145965140e918018bcde1952429655"
dependencies = [
"cc",
"cmake",
"libc",
"pkg-config",
"vcpkg",
]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.11" version = "0.4.11"
@ -251,17 +307,28 @@ dependencies = [
[[package]] [[package]]
name = "minecraft-regions-tool" name = "minecraft-regions-tool"
version = "0.3.0" version = "0.4.0"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"colored", "colored",
"env_logger", "env_logger",
"flate2",
"indicatif", "indicatif",
"log", "log",
"rayon", "rayon",
"structopt", "structopt",
] ]
[[package]]
name = "miniz_oxide"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c60c0dfe32c10b43a144bad8fc83538c52f58302c92300ea7ec7bf7b38d5a7b9"
dependencies = [
"adler",
"autocfg",
]
[[package]] [[package]]
name = "num_cpus" name = "num_cpus"
version = "1.13.0" version = "1.13.0"
@ -278,6 +345,12 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
[[package]]
name = "pkg-config"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33"
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.4" version = "1.0.4"
@ -480,6 +553,12 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]]
name = "vcpkg"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c"
[[package]] [[package]]
name = "vec_map" name = "vec_map"
version = "0.8.2" version = "0.8.2"

@ -1,6 +1,6 @@
[package] [package]
name = "minecraft-regions-tool" name = "minecraft-regions-tool"
version = "0.3.0" version = "0.4.0"
authors = ["trivernis <trivernis@protonmail.com>"] authors = ["trivernis <trivernis@protonmail.com>"]
edition = "2018" edition = "2018"
license = "GPL-3.0" license = "GPL-3.0"
@ -18,3 +18,4 @@ indicatif = "0.15.0"
log = "0.4.11" log = "0.4.11"
env_logger ="0.7.1" env_logger ="0.7.1"
colored = "2.0.0" colored = "2.0.0"
flate2 = {version = "1.0", features = ["zlib-ng-compat"]}

@ -0,0 +1,120 @@
use crate::nbt::{NBTError, NBTReader, NBTValue};
use crate::region_file::BLOCK_SIZE;
use byteorder::{BigEndian, ByteOrder, ReadBytesExt};
use flate2::bufread::ZlibDecoder;
use std::fmt::{Display, Formatter};
use std::fs::File;
use std::io::{self, BufReader, Error, Read, Seek, SeekFrom};
type IOResult<T> = io::Result<T>;
const TAG_LEVEL: &str = "Level";
const TAG_X_POS: &str = "xPos";
const TAG_Z_POS: &str = "zPos";
#[derive(Debug)]
pub struct Chunk {
pub length: u32,
pub compression_type: u8,
nbt_raw: Vec<u8>,
}
impl Chunk {
pub fn from_buf_reader(reader: &mut BufReader<File>, include_nbt: bool) -> IOResult<Self> {
let mut length_raw = [0u8; 4];
reader.read_exact(&mut length_raw)?;
let length = BigEndian::read_u32(&length_raw);
let compression_type = reader.read_u8()?;
let mut nbt_raw = Vec::new();
if include_nbt {
for _ in 0..((length - 1) as f32 / BLOCK_SIZE as f32).ceil() as u8 {
let mut buffer = [0u8; BLOCK_SIZE];
reader.read(&mut buffer)?;
nbt_raw.append(&mut buffer.to_vec());
}
nbt_raw.truncate((length - 1) as usize);
}
if length > 0 {
reader.seek(SeekFrom::Current((length - 1) as i64))?;
} else {
reader.seek(SeekFrom::Current((length) as i64))?;
}
Ok(Self {
compression_type,
length,
nbt_raw,
})
}
pub fn validate_nbt_data(&mut self) -> Result<(), ChunkScanError> {
if self.compression_type == 2 {
let mut decoder = ZlibDecoder::new(&self.nbt_raw[..]);
let mut data = Vec::new();
decoder.read_to_end(&mut data)?;
self.nbt_raw = data;
}
let mut reader = NBTReader::new(&self.nbt_raw[..]);
let data = reader.parse()?;
if !data.contains_key(TAG_LEVEL) {
Err(ChunkScanError::MissingTag(TAG_LEVEL))
} else {
let lvl_data = &data[TAG_LEVEL];
if let NBTValue::Compound(lvl_data) = lvl_data {
if !lvl_data.contains_key(TAG_X_POS) {
Err(ChunkScanError::MissingTag(TAG_X_POS))
} else if !lvl_data.contains_key(TAG_Z_POS) {
Err(ChunkScanError::MissingTag(TAG_Z_POS))
} else {
Ok(())
}
} else {
Err(ChunkScanError::InvalidFormat(TAG_LEVEL))
}
}
}
}
#[derive(Debug)]
pub enum ChunkScanError {
String(String),
IO(io::Error),
NBTError(NBTError),
MissingTag(&'static str),
InvalidFormat(&'static str),
}
impl Display for ChunkScanError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::String(s) => write!(f, "{}", s),
Self::IO(io) => write!(f, "IO Error: {}", io),
Self::NBTError(nbt) => write!(f, "NBT Error: {}", nbt),
Self::MissingTag(tag) => write!(f, "Missing Tag in NBT Data: {}", tag),
Self::InvalidFormat(tag) => write!(f, "Unexpected data format for NBT Tag {}", tag),
}
}
}
impl From<io::Error> for ChunkScanError {
fn from(io_err: Error) -> Self {
Self::IO(io_err)
}
}
impl From<NBTError> for ChunkScanError {
fn from(nbt: NBTError) -> Self {
Self::NBTError(nbt)
}
}
impl From<String> for ChunkScanError {
fn from(err: String) -> Self {
Self::String(err)
}
}

@ -1,3 +1,5 @@
pub mod chunk;
pub mod nbt;
pub mod region_file; pub mod region_file;
pub mod scan; pub mod scan;
pub mod world_folder; pub mod world_folder;

@ -0,0 +1,183 @@
use byteorder::{BigEndian, ReadBytesExt};
use std::collections::HashMap;
use std::error::Error;
use std::fmt::{self, Display, Formatter};
use std::io::{self, Read};
pub struct NBTReader<R> {
inner: Box<R>,
}
type NBTResult<T> = Result<T, NBTError>;
impl<R> NBTReader<R>
where
R: io::Read,
{
pub fn new(inner: R) -> Self {
Self {
inner: Box::new(inner),
}
}
/// Parses the contents of the reader
pub fn parse(&mut self) -> NBTResult<HashMap<String, NBTValue>> {
let tag = self.inner.read_u8()?;
if tag != 10 {
return Err(NBTError::MissingRootTag);
}
let _ = self.parse_string()?;
self.parse_compound()
}
/// Parses a compound tag
fn parse_compound(&mut self) -> NBTResult<HashMap<String, NBTValue>> {
let mut root_value = HashMap::new();
loop {
let tag = self.inner.read_u8()?;
if tag == 0 {
break;
}
let name = self.parse_string()?;
let value = match tag {
1 => NBTValue::Byte(self.inner.read_u8()?),
2 => NBTValue::Short(self.inner.read_i16::<BigEndian>()?),
3 => NBTValue::Int(self.inner.read_i32::<BigEndian>()?),
4 => NBTValue::Long(self.inner.read_i64::<BigEndian>()?),
5 => NBTValue::Float(self.inner.read_f32::<BigEndian>()?),
6 => NBTValue::Double(self.inner.read_f64::<BigEndian>()?),
7 => NBTValue::ByteArray(self.parse_byte_array()?),
8 => NBTValue::String(self.parse_string()?),
9 => NBTValue::List(self.parse_list()?),
10 => NBTValue::Compound(self.parse_compound()?),
11 => NBTValue::IntArray(self.parse_int_array()?),
12 => NBTValue::LongArray(self.parse_long_array()?),
_ => return Err(NBTError::InvalidTag(tag)),
};
root_value.insert(name, value);
}
Ok(root_value)
}
/// Parses an array of bytes
fn parse_byte_array(&mut self) -> NBTResult<Vec<u8>> {
let length = self.inner.read_u32::<BigEndian>()?;
let mut buf = Vec::with_capacity(length as usize);
self.inner.read_exact(&mut buf)?;
Ok(buf)
}
/// Parses a string value
fn parse_string(&mut self) -> NBTResult<String> {
let length = self.inner.read_u16::<BigEndian>()?;
if length == 0 {
return Ok(String::new());
}
let mut buf = Vec::with_capacity(length as usize);
for _ in 0..length {
buf.push(self.inner.read_u8()?);
}
String::from_utf8(buf).map_err(|_| NBTError::InvalidName)
}
/// Parses a list of nbt values
fn parse_list(&mut self) -> NBTResult<Vec<NBTValue>> {
let tag = self.inner.read_u8()?;
let length = self.inner.read_u32::<BigEndian>()?;
let parse_fn: Box<dyn Fn(&mut Self) -> NBTResult<NBTValue>> = match tag {
0 => Box::new(|_| Ok(NBTValue::Null)),
1 => Box::new(|nbt| Ok(NBTValue::Byte(nbt.inner.read_u8()?))),
2 => Box::new(|nbt| Ok(NBTValue::Short(nbt.inner.read_i16::<BigEndian>()?))),
3 => Box::new(|nbt| Ok(NBTValue::Int(nbt.inner.read_i32::<BigEndian>()?))),
4 => Box::new(|nbt| Ok(NBTValue::Long(nbt.inner.read_i64::<BigEndian>()?))),
5 => Box::new(|nbt| Ok(NBTValue::Float(nbt.inner.read_f32::<BigEndian>()?))),
6 => Box::new(|nbt| Ok(NBTValue::Double(nbt.inner.read_f64::<BigEndian>()?))),
7 => Box::new(|nbt| Ok(NBTValue::ByteArray(nbt.parse_byte_array()?))),
8 => Box::new(|nbt| Ok(NBTValue::String(nbt.parse_string()?))),
9 => Box::new(|nbt| Ok(NBTValue::List(nbt.parse_list()?))),
11 => Box::new(|nbt| Ok(NBTValue::IntArray(nbt.parse_int_array()?))),
10 => Box::new(|nbt| Ok(NBTValue::Compound(nbt.parse_compound()?))),
12 => Box::new(|nbt| Ok(NBTValue::LongArray(nbt.parse_long_array()?))),
_ => return Err(NBTError::InvalidTag(tag)),
};
let mut items = Vec::new();
for _ in 0..length {
items.push(parse_fn(self)?);
}
Ok(items)
}
/// Parses an array of 32 bit integers
fn parse_int_array(&mut self) -> NBTResult<Vec<i32>> {
let length = self.inner.read_u32::<BigEndian>()?;
let mut items = Vec::new();
for _ in 0..length {
items.push(self.inner.read_i32::<BigEndian>()?);
}
Ok(items)
}
/// Parses an array of 64 bit integers
fn parse_long_array(&mut self) -> NBTResult<Vec<i64>> {
let length = self.inner.read_u32::<BigEndian>()?;
let mut items = Vec::new();
for _ in 0..length {
items.push(self.inner.read_i64::<BigEndian>()?);
}
Ok(items)
}
}
#[derive(Clone, Debug)]
pub enum NBTValue {
Null,
Byte(u8),
Short(i16),
Int(i32),
Long(i64),
Float(f32),
Double(f64),
ByteArray(Vec<u8>),
String(String),
List(Vec<NBTValue>),
Compound(HashMap<String, NBTValue>),
IntArray(Vec<i32>),
LongArray(Vec<i64>),
}
#[derive(Debug)]
pub enum NBTError {
IO(io::Error),
MissingRootTag,
InvalidTag(u8),
InvalidName,
}
impl Display for NBTError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::IO(io) => write!(f, "IO Error: {}", io),
Self::InvalidTag(tag) => write!(f, "Invalid Tag: 0x{:x}", tag),
Self::MissingRootTag => write!(f, "Missing root tag!"),
Self::InvalidName => write!(f, "Encountered invalid tag name"),
}
}
}
impl Error for NBTError {}
impl From<io::Error> for NBTError {
fn from(io_err: io::Error) -> Self {
Self::IO(io_err)
}
}

@ -1,10 +1,11 @@
use crate::chunk::{Chunk, ChunkScanError};
use crate::scan::ScanStatistics; use crate::scan::ScanStatistics;
use byteorder::{BigEndian, ByteOrder, ReadBytesExt, WriteBytesExt}; use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
use std::fs::{File, OpenOptions}; use std::fs::{File, OpenOptions};
use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write}; use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write};
use std::path::PathBuf; use std::path::PathBuf;
const BLOCK_SIZE: usize = 4096; pub const BLOCK_SIZE: usize = 4096;
pub struct RegionFile { pub struct RegionFile {
reader: BufReader<File>, reader: BufReader<File>,
@ -51,8 +52,8 @@ impl RegionFile {
let reader_offset = *offset as u64 * BLOCK_SIZE as u64; let reader_offset = *offset as u64 * BLOCK_SIZE as u64;
self.reader.seek(SeekFrom::Start(reader_offset))?; self.reader.seek(SeekFrom::Start(reader_offset))?;
match self.read_chunk() { match Chunk::from_buf_reader(&mut self.reader, true) {
Ok(chunk) => { Ok(mut chunk) => {
let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil(); let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
if chunk.compression_type > 3 { if chunk.compression_type > 3 {
@ -61,6 +62,23 @@ impl RegionFile {
self.writer.seek(SeekFrom::Start(reader_offset + 4))?; self.writer.seek(SeekFrom::Start(reader_offset + 4))?;
self.writer.write_u8(1)?; self.writer.write_u8(1)?;
} }
} else {
if let Err(e) = chunk.validate_nbt_data() {
match e {
ChunkScanError::IO(e) => {
log::debug!(
"Compression error when reading chunk {}: {}",
offset,
e
);
statistic.corrupted_compression += 1;
}
_ => {
log::debug!("Missing nbt for chunk {}: {}", offset, e);
statistic.missing_nbt += 1;
}
}
}
} }
if *sections != chunk_sections as u8 || chunk.length >= 1_048_576 { if *sections != chunk_sections as u8 || chunk.length >= 1_048_576 {
@ -71,6 +89,7 @@ impl RegionFile {
} }
} }
Err(e) => { Err(e) => {
statistic.failed_to_read += 1;
log::error!("Failed to read chunk at {}: {}", offset, e); log::error!("Failed to read chunk at {}: {}", offset, e);
} }
} }
@ -86,25 +105,6 @@ impl RegionFile {
Ok(statistic) Ok(statistic)
} }
/// Reads a chunk at the current location
fn read_chunk(&mut self) -> Result<Chunk> {
let mut length_raw = [0u8; 4];
self.reader.read_exact(&mut length_raw)?;
let length = BigEndian::read_u32(&length_raw);
let compression_type = self.reader.read_u8()?;
if length > 0 {
self.reader.seek(SeekFrom::Current((length - 1) as i64))?;
} else {
self.reader.seek(SeekFrom::Current((length) as i64))?;
}
Ok(Chunk {
length,
compression_type,
})
}
} }
#[derive(Debug)] #[derive(Debug)]
@ -182,9 +182,3 @@ impl Timestamps {
Self { inner: timestamps } Self { inner: timestamps }
} }
} }
#[derive(Debug)]
pub struct Chunk {
pub length: u32,
pub compression_type: u8,
}

@ -6,6 +6,9 @@ pub struct ScanStatistics {
pub total_chunks: u64, pub total_chunks: u64,
pub invalid_length: u64, pub invalid_length: u64,
pub invalid_compression_method: u64, pub invalid_compression_method: u64,
pub missing_nbt: u64,
pub failed_to_read: u64,
pub corrupted_compression: u64,
} }
impl ScanStatistics { impl ScanStatistics {
@ -14,6 +17,9 @@ impl ScanStatistics {
total_chunks: 0, total_chunks: 0,
invalid_length: 0, invalid_length: 0,
invalid_compression_method: 0, invalid_compression_method: 0,
missing_nbt: 0,
corrupted_compression: 0,
failed_to_read: 0,
} }
} }
} }
@ -25,6 +31,9 @@ impl Add for ScanStatistics {
self.invalid_length += rhs.invalid_length; self.invalid_length += rhs.invalid_length;
self.total_chunks += rhs.total_chunks; self.total_chunks += rhs.total_chunks;
self.invalid_compression_method += rhs.invalid_compression_method; self.invalid_compression_method += rhs.invalid_compression_method;
self.failed_to_read += rhs.failed_to_read;
self.missing_nbt += rhs.missing_nbt;
self.corrupted_compression += rhs.corrupted_compression;
self self
} }
@ -34,8 +43,18 @@ impl Display for ScanStatistics {
fn fmt(&self, f: &mut Formatter<'_>) -> Result { fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!( write!(
f, f,
"Total Chunks: {}\nChunks with invalid length: {}\nChunks with invalid compression method: {}", "Total Chunks: {}
self.total_chunks, self.invalid_length, self.invalid_compression_method Failed to Read: {}
Chunks with invalid length: {}
Chunks with invalid compression method: {}
Chunks with missing nbt data: {}
Chunks with corrupted compressed data {}",
self.total_chunks,
self.failed_to_read,
self.invalid_length,
self.invalid_compression_method,
self.missing_nbt,
self.corrupted_compression
) )
} }
} }

Loading…
Cancel
Save