Fix optional tag checks and shifting of overlapping chunks

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 4 years ago
parent f72ceb0ed2
commit eac3d24136
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

15
Cargo.lock generated

@ -185,6 +185,18 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "enum-as-inner"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c5f0096a91d210159eceb2ff5e1c4da18388a170e1e3ce948aac9c8fdbbf595"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "env_logger" name = "env_logger"
version = "0.7.1" version = "0.7.1"
@ -307,10 +319,11 @@ dependencies = [
[[package]] [[package]]
name = "minecraft-regions-tool" name = "minecraft-regions-tool"
version = "0.5.3" version = "0.5.4"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"colored", "colored",
"enum-as-inner",
"env_logger", "env_logger",
"flate2", "flate2",
"indicatif", "indicatif",

@ -1,6 +1,6 @@
[package] [package]
name = "minecraft-regions-tool" name = "minecraft-regions-tool"
version = "0.5.3" version = "0.5.4"
authors = ["trivernis <trivernis@protonmail.com>"] authors = ["trivernis <trivernis@protonmail.com>"]
edition = "2018" edition = "2018"
license = "GPL-3.0" license = "GPL-3.0"
@ -18,4 +18,5 @@ indicatif = "0.15.0"
log = "0.4.11" log = "0.4.11"
env_logger ="0.7.1" env_logger ="0.7.1"
colored = "2.0.0" colored = "2.0.0"
flate2 = {version = "1.0", features = ["zlib-ng-compat"]} flate2 = {version = "1.0", features = ["zlib-ng-compat"]}
enum-as-inner = "0.3.3"

@ -1,7 +1,7 @@
use crate::nbt::{NBTError, NBTReader, NBTValue}; use crate::nbt::{NBTError, NBTReader, NBTValue};
use byteorder::{BigEndian, ReadBytesExt}; use byteorder::{BigEndian, ReadBytesExt};
use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL}; use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL, TAG_X_POS, TAG_Z_POS};
use crate::region_file::BLOCK_SIZE; use crate::region_file::BLOCK_SIZE;
use flate2::read::{GzDecoder, ZlibDecoder}; use flate2::read::{GzDecoder, ZlibDecoder};
use std::fmt::{Display, Formatter}; use std::fmt::{Display, Formatter};
@ -13,6 +13,8 @@ type IOResult<T> = io::Result<T>;
pub struct Chunk { pub struct Chunk {
pub length: u32, pub length: u32,
pub compression_type: u8, pub compression_type: u8,
pub x_pos: Option<i32>,
pub z_pos: Option<i32>,
} }
impl Chunk { impl Chunk {
@ -26,6 +28,8 @@ impl Chunk {
Ok(Self { Ok(Self {
compression_type, compression_type,
length, length,
x_pos: None,
z_pos: None,
}) })
} }
@ -55,6 +59,9 @@ impl Chunk {
return Err(ChunkScanError::MissingTag(tag)); return Err(ChunkScanError::MissingTag(tag));
} }
} }
self.x_pos = lvl_data[TAG_X_POS].as_int().cloned();
self.z_pos = lvl_data[TAG_Z_POS].as_int().cloned();
Ok(()) Ok(())
} else { } else {
Err(ChunkScanError::InvalidFormat(TAG_LEVEL)) Err(ChunkScanError::InvalidFormat(TAG_LEVEL))

@ -26,12 +26,8 @@ pub mod tags {
TAG_SECTIONS, TAG_SECTIONS,
TAG_LAST_UPDATE, TAG_LAST_UPDATE,
TAG_INHABITED_TIME, TAG_INHABITED_TIME,
TAG_HEIGHTMAPS,
TAG_ENTITIES, TAG_ENTITIES,
TAG_TILE_ENTITIES, TAG_TILE_ENTITIES,
TAG_LIQUID_TICKS,
TAG_POST_PROCESSING,
TAG_STATUS, TAG_STATUS,
TAG_STRUCTURES,
]; ];
} }

@ -1,5 +1,6 @@
use crate::utils::ByteArrayCache; use crate::utils::ByteArrayCache;
use byteorder::{BigEndian, ReadBytesExt}; use byteorder::{BigEndian, ReadBytesExt};
use enum_as_inner::EnumAsInner;
use std::collections::HashMap; use std::collections::HashMap;
use std::error::Error; use std::error::Error;
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
@ -76,6 +77,7 @@ where
/// Parses an array of bytes /// Parses an array of bytes
fn parse_byte_array(&mut self) -> NBTResult<ByteArrayCache> { fn parse_byte_array(&mut self) -> NBTResult<ByteArrayCache> {
let length = self.inner.read_u32::<BigEndian>()?; let length = self.inner.read_u32::<BigEndian>()?;
// store the data of the byte array in a compressed byte array cache to save memory
let mut cache = ByteArrayCache::new(); let mut cache = ByteArrayCache::new();
let mut buf = vec![0u8; length as usize]; let mut buf = vec![0u8; length as usize];
self.inner.read_exact(&mut buf)?; self.inner.read_exact(&mut buf)?;
@ -148,7 +150,7 @@ where
} }
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, EnumAsInner)]
pub enum NBTValue { pub enum NBTValue {
Null, Null,
Byte(u8), Byte(u8),

@ -11,17 +11,20 @@ use std::sync::Arc;
pub const BLOCK_SIZE: usize = 4096; pub const BLOCK_SIZE: usize = 4096;
pub struct RegionFile { pub struct RegionFile {
path: PathBuf,
reader: BufReader<File>, reader: BufReader<File>,
writer: BufWriter<File>, writer: BufWriter<File>,
locations: Locations, locations: Locations,
#[allow(dead_code)] #[allow(dead_code)]
timestamps: Timestamps, timestamps: Timestamps,
length: u64,
} }
impl RegionFile { impl RegionFile {
pub fn new(path: &PathBuf) -> Result<Self> { pub fn new(path: &PathBuf) -> Result<Self> {
let fr = OpenOptions::new().read(true).open(path)?; let fr = OpenOptions::new().read(true).open(path)?;
let fw = OpenOptions::new().write(true).open(path)?; let fw = OpenOptions::new().write(true).open(path)?;
let file_size = fr.metadata()?.len();
let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr); let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr);
let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw); let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw);
@ -31,10 +34,12 @@ impl RegionFile {
reader.read_exact(&mut timestamps_raw)?; reader.read_exact(&mut timestamps_raw)?;
Ok(Self { Ok(Self {
path: path.clone(),
locations: Locations::from_bytes(&locations_raw), locations: Locations::from_bytes(&locations_raw),
timestamps: Timestamps::from_bytes(&timestamps_raw), timestamps: Timestamps::from_bytes(&timestamps_raw),
reader, reader,
writer, writer,
length: file_size,
}) })
} }
@ -63,20 +68,44 @@ impl RegionFile {
let mut previous_sections = 0; let mut previous_sections = 0;
for (index, (offset, sections)) in entries { for (index, (offset, sections)) in entries {
// Calculate and seek to the start of the chunk
let reader_offset = offset as u64 * BLOCK_SIZE as u64; let reader_offset = offset as u64 * BLOCK_SIZE as u64;
self.reader.seek(SeekFrom::Start(reader_offset))?; self.reader.seek(SeekFrom::Start(reader_offset))?;
let offset_diff = offset - (previous_offset + previous_sections); let offset_diff = offset as i32 - (previous_offset as i32 + previous_sections as i32);
// Check if there is wasted space between the chunks
// since the chunks are iterated ordered by offset the previous chunk is the closest
if offset_diff > 0 { if offset_diff > 0 {
statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64; statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64;
log::debug!(
"Gap of unused {:.2} KiB detected between {} and {}",
(BLOCK_SIZE as f32 * offset_diff as f32) / 1024.0,
previous_offset,
offset
);
if options.fix { if options.fix {
shift_operations.push((offset as usize, -(offset_diff as isize))); shift_operations.push((offset as usize, -(offset_diff as isize)));
} }
} }
// Check if the chunk is longer than the file
if offset < 2 || self.length < (offset + sections as u32) as u64 * BLOCK_SIZE as u64 {
statistic.invalid_chunk_pointer += 1;
log::debug!(
"Invalid chunk offset and sections at index {}: {} + {}",
index,
offset,
sections
);
if options.fix_delete {
self.delete_chunk(index)?;
}
continue;
}
match Chunk::from_buf_reader(&mut self.reader) { match Chunk::from_buf_reader(&mut self.reader) {
Ok(chunk) => { Ok(chunk) => {
let exists = let exists =
self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?; self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?;
// If scan_chunk returns false the chunk entry was deleted
if !exists && options.fix { if !exists && options.fix {
shift_operations shift_operations
.push((offset as usize + sections as usize, -(sections as isize))) .push((offset as usize + sections as usize, -(sections as isize)))
@ -84,7 +113,12 @@ impl RegionFile {
} }
Err(e) => { Err(e) => {
statistic.failed_to_read += 1; statistic.failed_to_read += 1;
log::error!("Failed to read chunk at {}: {}", offset, e); log::error!(
"Failed to read chunk at {} in {:?}: {}",
offset,
self.path,
e
);
if options.fix_delete { if options.fix_delete {
self.delete_chunk(index)?; self.delete_chunk(index)?;
shift_operations shift_operations
@ -98,20 +132,9 @@ impl RegionFile {
} }
if options.fix || options.fix_delete { if options.fix || options.fix_delete {
let mut shifted = 0isize; self.perform_shift_operations(shift_operations)?;
let mut operations = shift_operations.iter().peekable(); // The new size of the file is the estimated size based on the highest chunk offset + sections
while let Some((offset, amount)) = operations.next() {
shifted += *amount;
let end_offset = if let Some((o, a)) = operations.peek() {
(*o as isize + *a) as usize
} else {
self.locations.max_offset() as usize
};
self.shift_right(*offset, end_offset, shifted)?;
self.locations
.shift_entries(*offset as u32, end_offset as u32, shifted as i32);
}
statistic.shrunk_size = self.locations.estimated_size(); statistic.shrunk_size = self.locations.estimated_size();
self.writer.seek(SeekFrom::Start(0))?; self.writer.seek(SeekFrom::Start(0))?;
self.writer self.writer
@ -122,6 +145,47 @@ impl RegionFile {
Ok(statistic) Ok(statistic)
} }
/// Performs shift operations defined in the shift_operations vector
fn perform_shift_operations(
&mut self,
mut shift_operations: Vec<(usize, isize)>,
) -> Result<()> {
// sort the shift operations by resulting offset to have them in the right order
shift_operations.sort_by(|(o1, a1), (o2, a2)| {
let to_offset1 = *o1 as isize + *a1;
let to_offset2 = *o2 as isize + *a2;
if to_offset1 > to_offset1 {
Ordering::Greater
} else if to_offset1 < to_offset2 {
Ordering::Less
} else {
Ordering::Equal
}
});
let mut shifted = 0isize;
// perform shifting of chunks to close gaps between them
let mut operations = shift_operations.iter().peekable();
while let Some((offset, amount)) = operations.next() {
shifted += *amount;
let end_offset = if let Some((o, a)) = operations.peek() {
(*o as isize + *a) as usize
} else {
self.locations.max_offset() as usize
};
if *offset > end_offset {
log::error!("Invalid shift ({} - {}) -> {}", offset, end_offset, shifted);
break;
}
self.shift_right(*offset, end_offset, shifted)?;
self.locations
.shift_entries(*offset as u32, end_offset as u32, shifted as i32);
}
Ok(())
}
/// Scans a single chunk for errors /// Scans a single chunk for errors
fn scan_chunk( fn scan_chunk(
&mut self, &mut self,
@ -135,6 +199,10 @@ impl RegionFile {
let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil(); let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
let reader_offset = offset as u64 * BLOCK_SIZE as u64; let reader_offset = offset as u64 * BLOCK_SIZE as u64;
// Valid compression types are:
// 0 - uncompressed
// 1 - GZIP
// 2 - ZLIB
if chunk.compression_type > 3 { if chunk.compression_type > 3 {
statistic.invalid_compression_method += 1; statistic.invalid_compression_method += 1;
if options.fix { if options.fix {
@ -142,7 +210,9 @@ impl RegionFile {
self.writer.write_u8(1)?; self.writer.write_u8(1)?;
} }
} else { } else {
// seek to the start of the actual chunk data
self.reader.seek(SeekFrom::Start(reader_offset + 5))?; self.reader.seek(SeekFrom::Start(reader_offset + 5))?;
if let Err(e) = chunk.validate_nbt_data(&mut self.reader) { if let Err(e) = chunk.validate_nbt_data(&mut self.reader) {
match e { match e {
ChunkScanError::IO(e) => { ChunkScanError::IO(e) => {
@ -162,6 +232,21 @@ impl RegionFile {
self.delete_chunk(index)?; self.delete_chunk(index)?;
return Ok(false); return Ok(false);
} }
} else {
// validate that the chunk is the one the index should be pointing at
if let Some(x) = chunk.x_pos {
if let Some(z) = chunk.z_pos {
if get_chunk_index(x as isize, z as isize) != index {
statistic.invalid_chunk_pointer += 1;
log::debug!("Pointer {} pointing to wrong chunk ({},{})", index, x, z);
if options.fix_delete {
// Delete the entry of the chunk from the locations table
self.delete_chunk(index)?;
}
}
}
}
} }
} }
@ -199,13 +284,18 @@ impl RegionFile {
amount, amount,
end_offset, end_offset,
); );
// seek to the start of the data to be shifted
self.reader self.reader
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?; .seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
// seek to the start of the data to be shifted
self.writer self.writer
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?; .seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
// seek the amount the data should be shifted
self.writer self.writer
.seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?; .seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?;
for _ in 0..(end_offset - start_offset) { for _ in 0..(end_offset - start_offset) {
// since the offset is based on the fixed BLOCK_SIZE we can use that as our buffer size
let mut buf = [0u8; BLOCK_SIZE]; let mut buf = [0u8; BLOCK_SIZE];
let read = self.reader.read(&mut buf)?; let read = self.reader.read(&mut buf)?;
self.writer.write(&buf)?; self.writer.write(&buf)?;
@ -234,6 +324,7 @@ impl Locations {
let mut locations = Vec::new(); let mut locations = Vec::new();
for i in (0..BLOCK_SIZE - 1).step_by(4) { for i in (0..BLOCK_SIZE - 1).step_by(4) {
// construct a 4-byte number from 3 bytes
let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]]; let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]];
let offset = BigEndian::read_u32(&offset_raw); let offset = BigEndian::read_u32(&offset_raw);
let count = bytes[i + 3]; let count = bytes[i + 3];
@ -258,15 +349,13 @@ impl Locations {
} }
/// Returns the offset of a chunk /// Returns the offset of a chunk
pub fn get_chunk_offset(&self, x: usize, z: usize) -> Option<u32> { pub fn get_chunk_offset(&self, x: isize, z: isize) -> Option<u32> {
let index = x % 32 + (z % 32) * 32; self.inner.get(get_chunk_index(x, z)).map(|e| (*e).0)
self.inner.get(index).map(|e| (*e).0)
} }
/// Returns the number of sectors for a chunk /// Returns the number of sectors for a chunk
pub fn get_chunk_sectors(&self, x: usize, z: usize) -> Option<u8> { pub fn get_chunk_sectors(&self, x: isize, z: isize) -> Option<u8> {
let index = x % 32 + (z % 32) * 32; self.inner.get(get_chunk_index(x, z)).map(|e| (*e).1)
self.inner.get(index).map(|e| (*e).1)
} }
/// Returns chunk entry list /// Returns chunk entry list
@ -364,3 +453,17 @@ impl Timestamps {
Self { inner: timestamps } Self { inner: timestamps }
} }
} }
#[inline]
fn get_chunk_index(x: isize, z: isize) -> usize {
let mut x = x % 32;
let mut z = z % 32;
if x < 0 {
x += 32;
}
if z < 0 {
z += 32;
}
x as usize + z as usize * 32
}

@ -10,6 +10,7 @@ pub struct ScanStatistics {
pub corrupted_nbt: u64, pub corrupted_nbt: u64,
pub failed_to_read: u64, pub failed_to_read: u64,
pub corrupted_compression: u64, pub corrupted_compression: u64,
pub invalid_chunk_pointer: u64,
pub shrunk_size: u64, pub shrunk_size: u64,
pub unused_space: u64, pub unused_space: u64,
} }
@ -23,6 +24,7 @@ impl ScanStatistics {
missing_nbt: 0, missing_nbt: 0,
corrupted_nbt: 0, corrupted_nbt: 0,
corrupted_compression: 0, corrupted_compression: 0,
invalid_chunk_pointer: 0,
failed_to_read: 0, failed_to_read: 0,
shrunk_size: 0, shrunk_size: 0,
unused_space: 0, unused_space: 0,
@ -40,6 +42,7 @@ impl Add for ScanStatistics {
self.failed_to_read += rhs.failed_to_read; self.failed_to_read += rhs.failed_to_read;
self.missing_nbt += rhs.missing_nbt; self.missing_nbt += rhs.missing_nbt;
self.corrupted_compression += rhs.corrupted_compression; self.corrupted_compression += rhs.corrupted_compression;
self.invalid_chunk_pointer += rhs.invalid_chunk_pointer;
self.corrupted_nbt += rhs.corrupted_nbt; self.corrupted_nbt += rhs.corrupted_nbt;
self.unused_space += rhs.unused_space; self.unused_space += rhs.unused_space;
@ -54,6 +57,7 @@ impl Display for ScanStatistics {
" "
Total Chunks: {} Total Chunks: {}
Failed to Read: {} Failed to Read: {}
Invalid chunk pointers: {}
Chunks with invalid length: {} Chunks with invalid length: {}
Chunks with invalid compression method: {} Chunks with invalid compression method: {}
Chunks with missing nbt data: {} Chunks with missing nbt data: {}
@ -62,6 +66,7 @@ impl Display for ScanStatistics {
Unused space: {} KiB", Unused space: {} KiB",
self.total_chunks, self.total_chunks,
self.failed_to_read, self.failed_to_read,
self.invalid_chunk_pointer,
self.invalid_length, self.invalid_length,
self.invalid_compression_method, self.invalid_compression_method,
self.missing_nbt, self.missing_nbt,

@ -25,6 +25,7 @@ impl ByteArrayCache {
} }
} }
/// Write the data to the inner buffer by using compression
pub fn write<R: Read>(&mut self, reader: R) -> Result<()> { pub fn write<R: Read>(&mut self, reader: R) -> Result<()> {
let mut encoder = ZlibEncoder::new(reader, Compression::default()); let mut encoder = ZlibEncoder::new(reader, Compression::default());
let mut buffer = Vec::new(); let mut buffer = Vec::new();

Loading…
Cancel
Save