diff --git a/Cargo.lock b/Cargo.lock index 7bc70b8..ff284dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -185,6 +185,18 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "enum-as-inner" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c5f0096a91d210159eceb2ff5e1c4da18388a170e1e3ce948aac9c8fdbbf595" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "env_logger" version = "0.7.1" @@ -307,10 +319,11 @@ dependencies = [ [[package]] name = "minecraft-regions-tool" -version = "0.5.3" +version = "0.5.4" dependencies = [ "byteorder", "colored", + "enum-as-inner", "env_logger", "flate2", "indicatif", diff --git a/Cargo.toml b/Cargo.toml index ebfe3a4..9e9d6b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minecraft-regions-tool" -version = "0.5.3" +version = "0.5.4" authors = ["trivernis "] edition = "2018" license = "GPL-3.0" @@ -18,4 +18,5 @@ indicatif = "0.15.0" log = "0.4.11" env_logger ="0.7.1" colored = "2.0.0" -flate2 = {version = "1.0", features = ["zlib-ng-compat"]} \ No newline at end of file +flate2 = {version = "1.0", features = ["zlib-ng-compat"]} +enum-as-inner = "0.3.3" \ No newline at end of file diff --git a/src/chunk.rs b/src/chunk.rs index 9cf01e1..506dac9 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,7 +1,7 @@ use crate::nbt::{NBTError, NBTReader, NBTValue}; use byteorder::{BigEndian, ReadBytesExt}; -use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL}; +use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL, TAG_X_POS, TAG_Z_POS}; use crate::region_file::BLOCK_SIZE; use flate2::read::{GzDecoder, ZlibDecoder}; use std::fmt::{Display, Formatter}; @@ -13,6 +13,8 @@ type IOResult = io::Result; pub struct Chunk { pub length: u32, pub compression_type: u8, + pub x_pos: Option, + pub z_pos: Option, } impl Chunk { @@ -26,6 +28,8 @@ impl Chunk { Ok(Self { compression_type, length, + x_pos: None, + z_pos: None, }) } @@ -55,6 +59,9 @@ impl Chunk { return Err(ChunkScanError::MissingTag(tag)); } } + self.x_pos = lvl_data[TAG_X_POS].as_int().cloned(); + self.z_pos = lvl_data[TAG_Z_POS].as_int().cloned(); + Ok(()) } else { Err(ChunkScanError::InvalidFormat(TAG_LEVEL)) diff --git a/src/constants.rs b/src/constants.rs index 193a37d..c40be2f 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -26,12 +26,8 @@ pub mod tags { TAG_SECTIONS, TAG_LAST_UPDATE, TAG_INHABITED_TIME, - TAG_HEIGHTMAPS, TAG_ENTITIES, TAG_TILE_ENTITIES, - TAG_LIQUID_TICKS, - TAG_POST_PROCESSING, TAG_STATUS, - TAG_STRUCTURES, ]; } diff --git a/src/nbt.rs b/src/nbt.rs index 2ac0914..4fcf1fb 100644 --- a/src/nbt.rs +++ b/src/nbt.rs @@ -1,5 +1,6 @@ use crate::utils::ByteArrayCache; use byteorder::{BigEndian, ReadBytesExt}; +use enum_as_inner::EnumAsInner; use std::collections::HashMap; use std::error::Error; use std::fmt::{self, Display, Formatter}; @@ -76,6 +77,7 @@ where /// Parses an array of bytes fn parse_byte_array(&mut self) -> NBTResult { let length = self.inner.read_u32::()?; + // store the data of the byte array in a compressed byte array cache to save memory let mut cache = ByteArrayCache::new(); let mut buf = vec![0u8; length as usize]; self.inner.read_exact(&mut buf)?; @@ -148,7 +150,7 @@ where } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, EnumAsInner)] pub enum NBTValue { Null, Byte(u8), diff --git a/src/region_file.rs b/src/region_file.rs index df438ef..3ca836e 100644 --- a/src/region_file.rs +++ b/src/region_file.rs @@ -11,17 +11,20 @@ use std::sync::Arc; pub const BLOCK_SIZE: usize = 4096; pub struct RegionFile { + path: PathBuf, reader: BufReader, writer: BufWriter, locations: Locations, #[allow(dead_code)] timestamps: Timestamps, + length: u64, } impl RegionFile { pub fn new(path: &PathBuf) -> Result { let fr = OpenOptions::new().read(true).open(path)?; let fw = OpenOptions::new().write(true).open(path)?; + let file_size = fr.metadata()?.len(); let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr); let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw); @@ -31,10 +34,12 @@ impl RegionFile { reader.read_exact(&mut timestamps_raw)?; Ok(Self { + path: path.clone(), locations: Locations::from_bytes(&locations_raw), timestamps: Timestamps::from_bytes(×tamps_raw), reader, writer, + length: file_size, }) } @@ -63,20 +68,44 @@ impl RegionFile { let mut previous_sections = 0; for (index, (offset, sections)) in entries { + // Calculate and seek to the start of the chunk let reader_offset = offset as u64 * BLOCK_SIZE as u64; self.reader.seek(SeekFrom::Start(reader_offset))?; - let offset_diff = offset - (previous_offset + previous_sections); + let offset_diff = offset as i32 - (previous_offset as i32 + previous_sections as i32); + // Check if there is wasted space between the chunks + // since the chunks are iterated ordered by offset the previous chunk is the closest if offset_diff > 0 { statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64; + log::debug!( + "Gap of unused {:.2} KiB detected between {} and {}", + (BLOCK_SIZE as f32 * offset_diff as f32) / 1024.0, + previous_offset, + offset + ); if options.fix { shift_operations.push((offset as usize, -(offset_diff as isize))); } } + // Check if the chunk is longer than the file + if offset < 2 || self.length < (offset + sections as u32) as u64 * BLOCK_SIZE as u64 { + statistic.invalid_chunk_pointer += 1; + log::debug!( + "Invalid chunk offset and sections at index {}: {} + {}", + index, + offset, + sections + ); + if options.fix_delete { + self.delete_chunk(index)?; + } + continue; + } match Chunk::from_buf_reader(&mut self.reader) { Ok(chunk) => { let exists = self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?; + // If scan_chunk returns false the chunk entry was deleted if !exists && options.fix { shift_operations .push((offset as usize + sections as usize, -(sections as isize))) @@ -84,7 +113,12 @@ impl RegionFile { } Err(e) => { statistic.failed_to_read += 1; - log::error!("Failed to read chunk at {}: {}", offset, e); + log::error!( + "Failed to read chunk at {} in {:?}: {}", + offset, + self.path, + e + ); if options.fix_delete { self.delete_chunk(index)?; shift_operations @@ -98,20 +132,9 @@ impl RegionFile { } if options.fix || options.fix_delete { - let mut shifted = 0isize; + self.perform_shift_operations(shift_operations)?; - let mut operations = shift_operations.iter().peekable(); - while let Some((offset, amount)) = operations.next() { - shifted += *amount; - let end_offset = if let Some((o, a)) = operations.peek() { - (*o as isize + *a) as usize - } else { - self.locations.max_offset() as usize - }; - self.shift_right(*offset, end_offset, shifted)?; - self.locations - .shift_entries(*offset as u32, end_offset as u32, shifted as i32); - } + // The new size of the file is the estimated size based on the highest chunk offset + sections statistic.shrunk_size = self.locations.estimated_size(); self.writer.seek(SeekFrom::Start(0))?; self.writer @@ -122,6 +145,47 @@ impl RegionFile { Ok(statistic) } + /// Performs shift operations defined in the shift_operations vector + fn perform_shift_operations( + &mut self, + mut shift_operations: Vec<(usize, isize)>, + ) -> Result<()> { + // sort the shift operations by resulting offset to have them in the right order + shift_operations.sort_by(|(o1, a1), (o2, a2)| { + let to_offset1 = *o1 as isize + *a1; + let to_offset2 = *o2 as isize + *a2; + if to_offset1 > to_offset1 { + Ordering::Greater + } else if to_offset1 < to_offset2 { + Ordering::Less + } else { + Ordering::Equal + } + }); + let mut shifted = 0isize; + + // perform shifting of chunks to close gaps between them + let mut operations = shift_operations.iter().peekable(); + + while let Some((offset, amount)) = operations.next() { + shifted += *amount; + let end_offset = if let Some((o, a)) = operations.peek() { + (*o as isize + *a) as usize + } else { + self.locations.max_offset() as usize + }; + if *offset > end_offset { + log::error!("Invalid shift ({} - {}) -> {}", offset, end_offset, shifted); + break; + } + self.shift_right(*offset, end_offset, shifted)?; + self.locations + .shift_entries(*offset as u32, end_offset as u32, shifted as i32); + } + + Ok(()) + } + /// Scans a single chunk for errors fn scan_chunk( &mut self, @@ -135,6 +199,10 @@ impl RegionFile { let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil(); let reader_offset = offset as u64 * BLOCK_SIZE as u64; + // Valid compression types are: + // 0 - uncompressed + // 1 - GZIP + // 2 - ZLIB if chunk.compression_type > 3 { statistic.invalid_compression_method += 1; if options.fix { @@ -142,7 +210,9 @@ impl RegionFile { self.writer.write_u8(1)?; } } else { + // seek to the start of the actual chunk data self.reader.seek(SeekFrom::Start(reader_offset + 5))?; + if let Err(e) = chunk.validate_nbt_data(&mut self.reader) { match e { ChunkScanError::IO(e) => { @@ -162,6 +232,21 @@ impl RegionFile { self.delete_chunk(index)?; return Ok(false); } + } else { + // validate that the chunk is the one the index should be pointing at + if let Some(x) = chunk.x_pos { + if let Some(z) = chunk.z_pos { + if get_chunk_index(x as isize, z as isize) != index { + statistic.invalid_chunk_pointer += 1; + log::debug!("Pointer {} pointing to wrong chunk ({},{})", index, x, z); + + if options.fix_delete { + // Delete the entry of the chunk from the locations table + self.delete_chunk(index)?; + } + } + } + } } } @@ -199,13 +284,18 @@ impl RegionFile { amount, end_offset, ); + // seek to the start of the data to be shifted self.reader .seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?; + // seek to the start of the data to be shifted self.writer .seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?; + // seek the amount the data should be shifted self.writer .seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?; + for _ in 0..(end_offset - start_offset) { + // since the offset is based on the fixed BLOCK_SIZE we can use that as our buffer size let mut buf = [0u8; BLOCK_SIZE]; let read = self.reader.read(&mut buf)?; self.writer.write(&buf)?; @@ -234,6 +324,7 @@ impl Locations { let mut locations = Vec::new(); for i in (0..BLOCK_SIZE - 1).step_by(4) { + // construct a 4-byte number from 3 bytes let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]]; let offset = BigEndian::read_u32(&offset_raw); let count = bytes[i + 3]; @@ -258,15 +349,13 @@ impl Locations { } /// Returns the offset of a chunk - pub fn get_chunk_offset(&self, x: usize, z: usize) -> Option { - let index = x % 32 + (z % 32) * 32; - self.inner.get(index).map(|e| (*e).0) + pub fn get_chunk_offset(&self, x: isize, z: isize) -> Option { + self.inner.get(get_chunk_index(x, z)).map(|e| (*e).0) } /// Returns the number of sectors for a chunk - pub fn get_chunk_sectors(&self, x: usize, z: usize) -> Option { - let index = x % 32 + (z % 32) * 32; - self.inner.get(index).map(|e| (*e).1) + pub fn get_chunk_sectors(&self, x: isize, z: isize) -> Option { + self.inner.get(get_chunk_index(x, z)).map(|e| (*e).1) } /// Returns chunk entry list @@ -364,3 +453,17 @@ impl Timestamps { Self { inner: timestamps } } } + +#[inline] +fn get_chunk_index(x: isize, z: isize) -> usize { + let mut x = x % 32; + let mut z = z % 32; + if x < 0 { + x += 32; + } + if z < 0 { + z += 32; + } + + x as usize + z as usize * 32 +} diff --git a/src/scan.rs b/src/scan.rs index 2cfb628..5c83833 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -10,6 +10,7 @@ pub struct ScanStatistics { pub corrupted_nbt: u64, pub failed_to_read: u64, pub corrupted_compression: u64, + pub invalid_chunk_pointer: u64, pub shrunk_size: u64, pub unused_space: u64, } @@ -23,6 +24,7 @@ impl ScanStatistics { missing_nbt: 0, corrupted_nbt: 0, corrupted_compression: 0, + invalid_chunk_pointer: 0, failed_to_read: 0, shrunk_size: 0, unused_space: 0, @@ -40,6 +42,7 @@ impl Add for ScanStatistics { self.failed_to_read += rhs.failed_to_read; self.missing_nbt += rhs.missing_nbt; self.corrupted_compression += rhs.corrupted_compression; + self.invalid_chunk_pointer += rhs.invalid_chunk_pointer; self.corrupted_nbt += rhs.corrupted_nbt; self.unused_space += rhs.unused_space; @@ -54,6 +57,7 @@ impl Display for ScanStatistics { " Total Chunks: {} Failed to Read: {} + Invalid chunk pointers: {} Chunks with invalid length: {} Chunks with invalid compression method: {} Chunks with missing nbt data: {} @@ -62,6 +66,7 @@ impl Display for ScanStatistics { Unused space: {} KiB", self.total_chunks, self.failed_to_read, + self.invalid_chunk_pointer, self.invalid_length, self.invalid_compression_method, self.missing_nbt, diff --git a/src/utils.rs b/src/utils.rs index b65b6f2..aa3c317 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -25,6 +25,7 @@ impl ByteArrayCache { } } + /// Write the data to the inner buffer by using compression pub fn write(&mut self, reader: R) -> Result<()> { let mut encoder = ZlibEncoder::new(reader, Compression::default()); let mut buffer = Vec::new();