From eb88a39b9a092d311d0c2362ae1724571d188d54 Mon Sep 17 00:00:00 2001 From: trivernis Date: Sun, 27 Sep 2020 13:03:58 +0200 Subject: [PATCH] Add optional deletion of corrupted chunks and region files Signed-off-by: trivernis --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/chunk.rs | 10 ++- src/lib.rs | 1 + src/main.rs | 17 +++- src/nbt.rs | 20 ++--- src/region_file.rs | 192 ++++++++++++++++++++++++++++++++------------ src/scan.rs | 27 +++++++ src/utils.rs | 45 +++++++++++ src/world_folder.rs | 15 +++- 10 files changed, 261 insertions(+), 70 deletions(-) create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index f7e6e15..b02cbeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -307,7 +307,7 @@ dependencies = [ [[package]] name = "minecraft-regions-tool" -version = "0.4.1" +version = "0.5.0" dependencies = [ "byteorder", "colored", diff --git a/Cargo.toml b/Cargo.toml index 0a514d2..d20cc67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minecraft-regions-tool" -version = "0.4.1" +version = "0.5.0" authors = ["trivernis "] edition = "2018" license = "GPL-3.0" diff --git a/src/chunk.rs b/src/chunk.rs index 6d6970a..69b643d 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -11,6 +11,7 @@ type IOResult = io::Result; const TAG_LEVEL: &str = "Level"; const TAG_X_POS: &str = "xPos"; const TAG_Z_POS: &str = "zPos"; +const TAG_SECTIONS: &str = "Sections"; #[derive(Debug)] pub struct Chunk { @@ -54,8 +55,15 @@ impl Chunk { Err(ChunkScanError::MissingTag(TAG_X_POS)) } else if !lvl_data.contains_key(TAG_Z_POS) { Err(ChunkScanError::MissingTag(TAG_Z_POS)) + } else if !lvl_data.contains_key(TAG_SECTIONS) { + Err(ChunkScanError::MissingTag(TAG_SECTIONS)) } else { - Ok(()) + let sections = &lvl_data[TAG_SECTIONS]; + if let NBTValue::List(_) = sections { + Ok(()) + } else { + Err(ChunkScanError::InvalidFormat(TAG_SECTIONS)) + } } } else { Err(ChunkScanError::InvalidFormat(TAG_LEVEL)) diff --git a/src/lib.rs b/src/lib.rs index 504fb3f..acfc1e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,4 +2,5 @@ pub mod chunk; pub mod nbt; pub mod region_file; pub mod scan; +pub mod utils; pub mod world_folder; diff --git a/src/main.rs b/src/main.rs index a061786..890139c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ use colored::*; use env_logger::Env; use log::Level; +use minecraft_regions_tool::scan::ScanOptions; use minecraft_regions_tool::world_folder::WorldFolder; use std::path::PathBuf; use structopt::StructOpt; @@ -26,14 +27,19 @@ enum SubCommand { Count, /// Scan for errors in the region files and optionally fix them - Scan(ScanOptions), + Scan(ScanArgs), } #[derive(StructOpt, Debug)] #[structopt()] -struct ScanOptions { +struct ScanArgs { + /// Fixes errors that can be fixed without problems #[structopt(short, long)] fix: bool, + + /// Deletes corrupted data + #[structopt(short, long)] + delete: bool, } fn main() { @@ -47,7 +53,12 @@ fn main() { log::info!("Fixing fixable errors."); } log::info!("Scanning Region files for errors..."); - log::info!("Scan Results:\n{}", world.scan_files(opt.fix).unwrap()) + log::info!( + "Scan Results:\n{}", + world + .scan_files(ScanOptions::new().fix(opt.fix).fix_delete(opt.delete)) + .unwrap() + ) } } } diff --git a/src/nbt.rs b/src/nbt.rs index 4b16f9a..2ac0914 100644 --- a/src/nbt.rs +++ b/src/nbt.rs @@ -1,13 +1,14 @@ +use crate::utils::ByteArrayCache; use byteorder::{BigEndian, ReadBytesExt}; use std::collections::HashMap; use std::error::Error; use std::fmt::{self, Display, Formatter}; -use std::io::{self, Read}; +use std::io::{self}; const MAX_RECURSION: u64 = 100; pub struct NBTReader { - inner: Box, + inner: R, recursion: u64, } @@ -19,7 +20,7 @@ where { pub fn new(inner: R) -> Self { Self { - inner: Box::new(inner), + inner, recursion: 0, } } @@ -73,13 +74,14 @@ where } /// Parses an array of bytes - fn parse_byte_array(&mut self) -> NBTResult> { + fn parse_byte_array(&mut self) -> NBTResult { let length = self.inner.read_u32::()?; - for _ in 0..length { - self.inner.read_u8()?; - } + let mut cache = ByteArrayCache::new(); + let mut buf = vec![0u8; length as usize]; + self.inner.read_exact(&mut buf)?; + cache.write(&buf[..])?; - Ok(Vec::with_capacity(0)) + Ok(cache) } /// Parses a string value @@ -155,7 +157,7 @@ pub enum NBTValue { Long(i64), Float(f32), Double(f64), - ByteArray(Vec), + ByteArray(ByteArrayCache), String(String), List(Vec), Compound(HashMap), diff --git a/src/region_file.rs b/src/region_file.rs index 18967ad..fb94360 100644 --- a/src/region_file.rs +++ b/src/region_file.rs @@ -1,9 +1,12 @@ use crate::chunk::{Chunk, ChunkScanError}; +use crate::scan::ScanOptions; use crate::scan::ScanStatistics; use byteorder::{BigEndian, ByteOrder, WriteBytesExt}; +use std::cmp::Ordering; use std::fs::{File, OpenOptions}; use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write}; use std::path::PathBuf; +use std::sync::Arc; pub const BLOCK_SIZE: usize = 4096; @@ -37,61 +40,32 @@ impl RegionFile { /// Returns the number of chunks in the file pub fn count_chunks(&self) -> usize { - return self.locations.valid_entries().len(); + return self.locations.valid_entries_enumerate().len(); } /// Scans the chunk entries for possible errors - pub fn scan_chunks(&mut self, fix: bool) -> Result { + pub fn scan_chunks(&mut self, options: &Arc) -> Result { let mut statistic = ScanStatistics::new(); - let entries = self.locations.valid_entries(); - let mut corrected_entries = Vec::new(); + let mut entries = self.locations.valid_entries_enumerate(); + entries.sort_by(|(_, (a, _)), (_, (b, _))| { + if a > b { + Ordering::Greater + } else if a < b { + Ordering::Less + } else { + Ordering::Equal + } + }); statistic.total_chunks = entries.len() as u64; - for (offset, sections) in &entries { - let reader_offset = *offset as u64 * BLOCK_SIZE as u64; + for (index, (offset, sections)) in entries { + let reader_offset = offset as u64 * BLOCK_SIZE as u64; self.reader.seek(SeekFrom::Start(reader_offset))?; match Chunk::from_buf_reader(&mut self.reader) { - Ok(mut chunk) => { - let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil(); - - if chunk.compression_type > 3 { - statistic.invalid_compression_method += 1; - if fix { - self.writer.seek(SeekFrom::Start(reader_offset + 4))?; - self.writer.write_u8(1)?; - } - } else { - self.reader.seek(SeekFrom::Start(reader_offset + 5))?; - if let Err(e) = chunk.validate_nbt_data(&mut self.reader) { - match e { - ChunkScanError::IO(e) => { - log::debug!( - "Compression error when reading chunk {}: {}", - offset, - e - ); - statistic.corrupted_compression += 1; - } - ChunkScanError::NBTError(e) => { - log::debug!("Corrupted nbt data for chunk {}: {}", offset, e); - statistic.corrupted_nbt += 1; - } - _ => { - log::debug!("Missing nbt data for chunk {}: {}", offset, e); - statistic.missing_nbt += 1; - } - } - } - } - - if *sections != chunk_sections as u8 || chunk.length >= 1_048_576 { - statistic.invalid_length += 1; - corrected_entries.push((*offset, chunk_sections as u8)); - } else { - corrected_entries.push((*offset, *sections)); - } + Ok(chunk) => { + self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?; } Err(e) => { statistic.failed_to_read += 1; @@ -99,9 +73,8 @@ impl RegionFile { } } } - self.locations.set_entries(corrected_entries); - if fix { + if options.fix || options.fix_delete { self.writer.seek(SeekFrom::Start(0))?; self.writer .write_all(self.locations.to_bytes().as_slice())?; @@ -110,6 +83,82 @@ impl RegionFile { Ok(statistic) } + + /// Scans a single chunk for errors + fn scan_chunk( + &mut self, + index: usize, + offset: u32, + sections: u8, + mut chunk: Chunk, + statistic: &mut ScanStatistics, + options: &Arc, + ) -> Result<()> { + let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil(); + let reader_offset = offset as u64 * BLOCK_SIZE as u64; + + if chunk.compression_type > 3 { + statistic.invalid_compression_method += 1; + if options.fix { + self.writer.seek(SeekFrom::Start(reader_offset + 4))?; + self.writer.write_u8(1)?; + } + } else { + self.reader.seek(SeekFrom::Start(reader_offset + 5))?; + if let Err(e) = chunk.validate_nbt_data(&mut self.reader) { + match e { + ChunkScanError::IO(e) => { + log::debug!("Compression error at chunk {}: {}", offset, e); + statistic.corrupted_compression += 1; + } + ChunkScanError::NBTError(e) => { + log::debug!("Corrupted nbt data for chunk {}: {}", offset, e); + statistic.corrupted_nbt += 1; + } + _ => { + log::debug!("Missing nbt data for chunk {}: {}", offset, e); + statistic.missing_nbt += 1; + } + } + self.delete_chunk(index)?; + } + } + + if sections != chunk_sections as u8 || chunk.length >= 1_048_576 { + statistic.invalid_length += 1; + self.locations + .replace_entry_unchecked(index, (offset, chunk_sections as u8)); + } + + Ok(()) + } + + /// Deletes a chunk and shifts all other chunks + pub fn delete_chunk(&mut self, index: usize) -> Result<()> { + let (offset, sections) = self.locations.get_chunk_entry_unchecked(index); + self.reader.seek(SeekFrom::Start( + (offset as usize * BLOCK_SIZE + sections as usize * BLOCK_SIZE) as u64, + ))?; + self.writer + .seek(SeekFrom::Start((offset as usize * BLOCK_SIZE) as u64))?; + log::debug!( + "Shifting chunk entries starting from {} by {} to the left", + offset, + sections as u32 + ); + loop { + let mut buf = [0u8; BLOCK_SIZE]; + let read = self.reader.read(&mut buf)?; + self.writer.write(&buf)?; + if read < BLOCK_SIZE { + break; + } + } + self.locations.delete_chunk_entry_unchecked(index); + self.locations.shift_entries(offset, -(sections as i32)); + + Ok(()) + } } #[derive(Debug)] @@ -158,16 +207,55 @@ impl Locations { } /// Returns chunk entry list - pub fn valid_entries(&self) -> Vec<(u32, u8)> { + pub fn valid_entries_enumerate(&self) -> Vec<(usize, (u32, u8))> { self.inner .iter() - .filter_map(|e| if (*e).0 >= 2 { Some(*e) } else { None }) + .enumerate() + .filter_map(|e| { + if (*e.1).0 >= 2 { + Some((e.0, *e.1)) + } else { + None + } + }) .collect() } - /// Replaces the entry list with a new one - pub fn set_entries(&mut self, entries: Vec<(u32, u8)>) { - self.inner = entries; + /// Replaces an entry with a new one. Panics if the index doesn't exist + pub fn replace_entry_unchecked(&mut self, index: usize, entry: (u32, u8)) { + self.inner[index] = entry; + } + + /// Returns a chunk entry for an index. Panics if it doesn't exist. + pub fn get_chunk_entry_unchecked(&self, index: usize) -> (u32, u8) { + self.inner[index] + } + + /// Sets a chunk entry to not generated. Panics if the index doesn't exists + pub fn delete_chunk_entry_unchecked(&mut self, index: usize) { + self.inner[index] = (0, 0); + } + + /// Shifts all entries starting from `start_index` by `amount` + pub fn shift_entries(&mut self, start_offset: u32, amount: i32) { + log::debug!( + "Shifting location entries starting from {} by {}", + start_offset, + amount + ); + self.inner = self + .inner + .iter() + .map(|e| { + let mut entry = *e; + + if e.0 >= start_offset { + entry.0 = (entry.0 as i32 + amount) as u32; + } + + entry + }) + .collect(); } } diff --git a/src/scan.rs b/src/scan.rs index 3a4f8be..98877cb 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -64,3 +64,30 @@ impl Display for ScanStatistics { ) } } + +#[derive(Clone, Debug)] +pub struct ScanOptions { + pub fix: bool, + pub fix_delete: bool, +} + +impl ScanOptions { + pub fn new() -> Self { + ScanOptions { + fix: false, + fix_delete: false, + } + } + + pub fn fix(mut self, fix: bool) -> Self { + self.fix = fix; + + self + } + + pub fn fix_delete(mut self, fix_delete: bool) -> Self { + self.fix_delete = fix_delete; + + self + } +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..b65b6f2 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,45 @@ +use flate2::read::ZlibEncoder; +use flate2::Compression; +use std::io::{Read, Result}; + +#[derive(Clone, Debug)] +pub struct ByteArrayCache { + inner: Vec, + position: usize, +} + +impl ByteArrayCache { + /// Creates a new byte array cache + pub fn new() -> Self { + Self { + inner: Vec::new(), + position: 0, + } + } + + /// Creates a new byte array cache with capacity + pub fn with_capacity(capacity: usize) -> Self { + Self { + inner: Vec::with_capacity(capacity), + position: 0, + } + } + + pub fn write(&mut self, reader: R) -> Result<()> { + let mut encoder = ZlibEncoder::new(reader, Compression::default()); + let mut buffer = Vec::new(); + encoder.read_to_end(&mut buffer)?; + self.inner.append(&mut buffer); + + Ok(()) + } +} + +impl Read for ByteArrayCache { + fn read(&mut self, buf: &mut [u8]) -> Result { + let read = (&self.inner[self.position..]).read(buf)?; + self.position += read; + + Ok(read) + } +} diff --git a/src/world_folder.rs b/src/world_folder.rs index e9e62f9..d2d3714 100644 --- a/src/world_folder.rs +++ b/src/world_folder.rs @@ -1,4 +1,5 @@ use crate::region_file::RegionFile; +use crate::scan::ScanOptions; use crate::scan::ScanStatistics; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; use log::LevelFilter; @@ -7,6 +8,7 @@ use std::fs; use std::io; use std::ops::Add; use std::path::PathBuf; +use std::sync::Arc; pub struct WorldFolder { path: PathBuf, @@ -29,9 +31,11 @@ impl WorldFolder { Ok(count) } - pub fn scan_files(&self, fix: bool) -> io::Result { + /// Scans all region files for potential errors + pub fn scan_files(&self, options: ScanOptions) -> io::Result { let paths = self.region_file_paths(); let bar = ProgressBar::new(paths.len() as u64); + let options = Arc::new(options); bar.set_style( ProgressStyle::default_bar().template("\r[{eta_precise}] {wide_bar} {pos}/{len} "), ); @@ -46,13 +50,18 @@ impl WorldFolder { log::debug!("Opening and scanning region file {:?}", path); let mut region_file = RegionFile::new(path) .map_err(|e| { - log::error!("Failed to open region file {:?}: {}", path, e); + log::error!("Failed to open region file {:?}: {}\n", path, e); + if options.fix_delete { + if let Err(e) = fs::remove_file(path) { + return e; + } + } e }) .ok()?; - let result = region_file.scan_chunks(fix).ok()?; + let result = region_file.scan_chunks(&options).ok()?; bar.inc(1); log::debug!("Statistics for {:?}:\n{}", path, result);