Fix optional tag checks and shifting of overlapping chunks

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 4 years ago
parent f72ceb0ed2
commit eac3d24136
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

15
Cargo.lock generated

@ -185,6 +185,18 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "enum-as-inner"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c5f0096a91d210159eceb2ff5e1c4da18388a170e1e3ce948aac9c8fdbbf595"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "env_logger"
version = "0.7.1"
@ -307,10 +319,11 @@ dependencies = [
[[package]]
name = "minecraft-regions-tool"
version = "0.5.3"
version = "0.5.4"
dependencies = [
"byteorder",
"colored",
"enum-as-inner",
"env_logger",
"flate2",
"indicatif",

@ -1,6 +1,6 @@
[package]
name = "minecraft-regions-tool"
version = "0.5.3"
version = "0.5.4"
authors = ["trivernis <trivernis@protonmail.com>"]
edition = "2018"
license = "GPL-3.0"
@ -18,4 +18,5 @@ indicatif = "0.15.0"
log = "0.4.11"
env_logger ="0.7.1"
colored = "2.0.0"
flate2 = {version = "1.0", features = ["zlib-ng-compat"]}
flate2 = {version = "1.0", features = ["zlib-ng-compat"]}
enum-as-inner = "0.3.3"

@ -1,7 +1,7 @@
use crate::nbt::{NBTError, NBTReader, NBTValue};
use byteorder::{BigEndian, ReadBytesExt};
use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL};
use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL, TAG_X_POS, TAG_Z_POS};
use crate::region_file::BLOCK_SIZE;
use flate2::read::{GzDecoder, ZlibDecoder};
use std::fmt::{Display, Formatter};
@ -13,6 +13,8 @@ type IOResult<T> = io::Result<T>;
pub struct Chunk {
pub length: u32,
pub compression_type: u8,
pub x_pos: Option<i32>,
pub z_pos: Option<i32>,
}
impl Chunk {
@ -26,6 +28,8 @@ impl Chunk {
Ok(Self {
compression_type,
length,
x_pos: None,
z_pos: None,
})
}
@ -55,6 +59,9 @@ impl Chunk {
return Err(ChunkScanError::MissingTag(tag));
}
}
self.x_pos = lvl_data[TAG_X_POS].as_int().cloned();
self.z_pos = lvl_data[TAG_Z_POS].as_int().cloned();
Ok(())
} else {
Err(ChunkScanError::InvalidFormat(TAG_LEVEL))

@ -26,12 +26,8 @@ pub mod tags {
TAG_SECTIONS,
TAG_LAST_UPDATE,
TAG_INHABITED_TIME,
TAG_HEIGHTMAPS,
TAG_ENTITIES,
TAG_TILE_ENTITIES,
TAG_LIQUID_TICKS,
TAG_POST_PROCESSING,
TAG_STATUS,
TAG_STRUCTURES,
];
}

@ -1,5 +1,6 @@
use crate::utils::ByteArrayCache;
use byteorder::{BigEndian, ReadBytesExt};
use enum_as_inner::EnumAsInner;
use std::collections::HashMap;
use std::error::Error;
use std::fmt::{self, Display, Formatter};
@ -76,6 +77,7 @@ where
/// Parses an array of bytes
fn parse_byte_array(&mut self) -> NBTResult<ByteArrayCache> {
let length = self.inner.read_u32::<BigEndian>()?;
// store the data of the byte array in a compressed byte array cache to save memory
let mut cache = ByteArrayCache::new();
let mut buf = vec![0u8; length as usize];
self.inner.read_exact(&mut buf)?;
@ -148,7 +150,7 @@ where
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, EnumAsInner)]
pub enum NBTValue {
Null,
Byte(u8),

@ -11,17 +11,20 @@ use std::sync::Arc;
pub const BLOCK_SIZE: usize = 4096;
pub struct RegionFile {
path: PathBuf,
reader: BufReader<File>,
writer: BufWriter<File>,
locations: Locations,
#[allow(dead_code)]
timestamps: Timestamps,
length: u64,
}
impl RegionFile {
pub fn new(path: &PathBuf) -> Result<Self> {
let fr = OpenOptions::new().read(true).open(path)?;
let fw = OpenOptions::new().write(true).open(path)?;
let file_size = fr.metadata()?.len();
let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr);
let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw);
@ -31,10 +34,12 @@ impl RegionFile {
reader.read_exact(&mut timestamps_raw)?;
Ok(Self {
path: path.clone(),
locations: Locations::from_bytes(&locations_raw),
timestamps: Timestamps::from_bytes(&timestamps_raw),
reader,
writer,
length: file_size,
})
}
@ -63,20 +68,44 @@ impl RegionFile {
let mut previous_sections = 0;
for (index, (offset, sections)) in entries {
// Calculate and seek to the start of the chunk
let reader_offset = offset as u64 * BLOCK_SIZE as u64;
self.reader.seek(SeekFrom::Start(reader_offset))?;
let offset_diff = offset - (previous_offset + previous_sections);
let offset_diff = offset as i32 - (previous_offset as i32 + previous_sections as i32);
// Check if there is wasted space between the chunks
// since the chunks are iterated ordered by offset the previous chunk is the closest
if offset_diff > 0 {
statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64;
log::debug!(
"Gap of unused {:.2} KiB detected between {} and {}",
(BLOCK_SIZE as f32 * offset_diff as f32) / 1024.0,
previous_offset,
offset
);
if options.fix {
shift_operations.push((offset as usize, -(offset_diff as isize)));
}
}
// Check if the chunk is longer than the file
if offset < 2 || self.length < (offset + sections as u32) as u64 * BLOCK_SIZE as u64 {
statistic.invalid_chunk_pointer += 1;
log::debug!(
"Invalid chunk offset and sections at index {}: {} + {}",
index,
offset,
sections
);
if options.fix_delete {
self.delete_chunk(index)?;
}
continue;
}
match Chunk::from_buf_reader(&mut self.reader) {
Ok(chunk) => {
let exists =
self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?;
// If scan_chunk returns false the chunk entry was deleted
if !exists && options.fix {
shift_operations
.push((offset as usize + sections as usize, -(sections as isize)))
@ -84,7 +113,12 @@ impl RegionFile {
}
Err(e) => {
statistic.failed_to_read += 1;
log::error!("Failed to read chunk at {}: {}", offset, e);
log::error!(
"Failed to read chunk at {} in {:?}: {}",
offset,
self.path,
e
);
if options.fix_delete {
self.delete_chunk(index)?;
shift_operations
@ -98,20 +132,9 @@ impl RegionFile {
}
if options.fix || options.fix_delete {
let mut shifted = 0isize;
self.perform_shift_operations(shift_operations)?;
let mut operations = shift_operations.iter().peekable();
while let Some((offset, amount)) = operations.next() {
shifted += *amount;
let end_offset = if let Some((o, a)) = operations.peek() {
(*o as isize + *a) as usize
} else {
self.locations.max_offset() as usize
};
self.shift_right(*offset, end_offset, shifted)?;
self.locations
.shift_entries(*offset as u32, end_offset as u32, shifted as i32);
}
// The new size of the file is the estimated size based on the highest chunk offset + sections
statistic.shrunk_size = self.locations.estimated_size();
self.writer.seek(SeekFrom::Start(0))?;
self.writer
@ -122,6 +145,47 @@ impl RegionFile {
Ok(statistic)
}
/// Performs shift operations defined in the shift_operations vector
fn perform_shift_operations(
&mut self,
mut shift_operations: Vec<(usize, isize)>,
) -> Result<()> {
// sort the shift operations by resulting offset to have them in the right order
shift_operations.sort_by(|(o1, a1), (o2, a2)| {
let to_offset1 = *o1 as isize + *a1;
let to_offset2 = *o2 as isize + *a2;
if to_offset1 > to_offset1 {
Ordering::Greater
} else if to_offset1 < to_offset2 {
Ordering::Less
} else {
Ordering::Equal
}
});
let mut shifted = 0isize;
// perform shifting of chunks to close gaps between them
let mut operations = shift_operations.iter().peekable();
while let Some((offset, amount)) = operations.next() {
shifted += *amount;
let end_offset = if let Some((o, a)) = operations.peek() {
(*o as isize + *a) as usize
} else {
self.locations.max_offset() as usize
};
if *offset > end_offset {
log::error!("Invalid shift ({} - {}) -> {}", offset, end_offset, shifted);
break;
}
self.shift_right(*offset, end_offset, shifted)?;
self.locations
.shift_entries(*offset as u32, end_offset as u32, shifted as i32);
}
Ok(())
}
/// Scans a single chunk for errors
fn scan_chunk(
&mut self,
@ -135,6 +199,10 @@ impl RegionFile {
let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
let reader_offset = offset as u64 * BLOCK_SIZE as u64;
// Valid compression types are:
// 0 - uncompressed
// 1 - GZIP
// 2 - ZLIB
if chunk.compression_type > 3 {
statistic.invalid_compression_method += 1;
if options.fix {
@ -142,7 +210,9 @@ impl RegionFile {
self.writer.write_u8(1)?;
}
} else {
// seek to the start of the actual chunk data
self.reader.seek(SeekFrom::Start(reader_offset + 5))?;
if let Err(e) = chunk.validate_nbt_data(&mut self.reader) {
match e {
ChunkScanError::IO(e) => {
@ -162,6 +232,21 @@ impl RegionFile {
self.delete_chunk(index)?;
return Ok(false);
}
} else {
// validate that the chunk is the one the index should be pointing at
if let Some(x) = chunk.x_pos {
if let Some(z) = chunk.z_pos {
if get_chunk_index(x as isize, z as isize) != index {
statistic.invalid_chunk_pointer += 1;
log::debug!("Pointer {} pointing to wrong chunk ({},{})", index, x, z);
if options.fix_delete {
// Delete the entry of the chunk from the locations table
self.delete_chunk(index)?;
}
}
}
}
}
}
@ -199,13 +284,18 @@ impl RegionFile {
amount,
end_offset,
);
// seek to the start of the data to be shifted
self.reader
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
// seek to the start of the data to be shifted
self.writer
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
// seek the amount the data should be shifted
self.writer
.seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?;
for _ in 0..(end_offset - start_offset) {
// since the offset is based on the fixed BLOCK_SIZE we can use that as our buffer size
let mut buf = [0u8; BLOCK_SIZE];
let read = self.reader.read(&mut buf)?;
self.writer.write(&buf)?;
@ -234,6 +324,7 @@ impl Locations {
let mut locations = Vec::new();
for i in (0..BLOCK_SIZE - 1).step_by(4) {
// construct a 4-byte number from 3 bytes
let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]];
let offset = BigEndian::read_u32(&offset_raw);
let count = bytes[i + 3];
@ -258,15 +349,13 @@ impl Locations {
}
/// Returns the offset of a chunk
pub fn get_chunk_offset(&self, x: usize, z: usize) -> Option<u32> {
let index = x % 32 + (z % 32) * 32;
self.inner.get(index).map(|e| (*e).0)
pub fn get_chunk_offset(&self, x: isize, z: isize) -> Option<u32> {
self.inner.get(get_chunk_index(x, z)).map(|e| (*e).0)
}
/// Returns the number of sectors for a chunk
pub fn get_chunk_sectors(&self, x: usize, z: usize) -> Option<u8> {
let index = x % 32 + (z % 32) * 32;
self.inner.get(index).map(|e| (*e).1)
pub fn get_chunk_sectors(&self, x: isize, z: isize) -> Option<u8> {
self.inner.get(get_chunk_index(x, z)).map(|e| (*e).1)
}
/// Returns chunk entry list
@ -364,3 +453,17 @@ impl Timestamps {
Self { inner: timestamps }
}
}
#[inline]
fn get_chunk_index(x: isize, z: isize) -> usize {
let mut x = x % 32;
let mut z = z % 32;
if x < 0 {
x += 32;
}
if z < 0 {
z += 32;
}
x as usize + z as usize * 32
}

@ -10,6 +10,7 @@ pub struct ScanStatistics {
pub corrupted_nbt: u64,
pub failed_to_read: u64,
pub corrupted_compression: u64,
pub invalid_chunk_pointer: u64,
pub shrunk_size: u64,
pub unused_space: u64,
}
@ -23,6 +24,7 @@ impl ScanStatistics {
missing_nbt: 0,
corrupted_nbt: 0,
corrupted_compression: 0,
invalid_chunk_pointer: 0,
failed_to_read: 0,
shrunk_size: 0,
unused_space: 0,
@ -40,6 +42,7 @@ impl Add for ScanStatistics {
self.failed_to_read += rhs.failed_to_read;
self.missing_nbt += rhs.missing_nbt;
self.corrupted_compression += rhs.corrupted_compression;
self.invalid_chunk_pointer += rhs.invalid_chunk_pointer;
self.corrupted_nbt += rhs.corrupted_nbt;
self.unused_space += rhs.unused_space;
@ -54,6 +57,7 @@ impl Display for ScanStatistics {
"
Total Chunks: {}
Failed to Read: {}
Invalid chunk pointers: {}
Chunks with invalid length: {}
Chunks with invalid compression method: {}
Chunks with missing nbt data: {}
@ -62,6 +66,7 @@ impl Display for ScanStatistics {
Unused space: {} KiB",
self.total_chunks,
self.failed_to_read,
self.invalid_chunk_pointer,
self.invalid_length,
self.invalid_compression_method,
self.missing_nbt,

@ -25,6 +25,7 @@ impl ByteArrayCache {
}
}
/// Write the data to the inner buffer by using compression
pub fn write<R: Read>(&mut self, reader: R) -> Result<()> {
let mut encoder = ZlibEncoder::new(reader, Compression::default());
let mut buffer = Vec::new();

Loading…
Cancel
Save