Fix optional tag checks and shifting of overlapping chunks

Signed-off-by: trivernis <trivernis@protonmail.com>
Add more nbt tags to be checked
11 changed files with 541 additions and 92 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -185,6 +185,18 @@ version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"

+[[package]]
+name = "enum-as-inner"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c5f0096a91d210159eceb2ff5e1c4da18388a170e1e3ce948aac9c8fdbbf595"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "env_logger"
 version = "0.7.1"
@ -307,10 +319,11 @@ dependencies = [

 [[package]]
 name = "minecraft-regions-tool"
-version = "0.4.1"
+version = "0.5.4"
 dependencies = [
 "byteorder",
 "colored",
+ "enum-as-inner",
 "env_logger",
 "flate2",
 "indicatif",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "minecraft-regions-tool"
-version = "0.4.1"
+version = "0.5.4"
 authors = ["trivernis <trivernis@protonmail.com>"]
 edition = "2018"
 license = "GPL-3.0"
@ -18,4 +18,5 @@ indicatif = "0.15.0"
 log = "0.4.11"
 env_logger ="0.7.1"
 colored = "2.0.0"
-flate2 = {version = "1.0", features = ["zlib-ng-compat"]}
+flate2 = {version = "1.0", features = ["zlib-ng-compat"]}
+enum-as-inner = "0.3.3"
--- a/src/chunk.rs
+++ b/src/chunk.rs
@ -1,27 +1,26 @@
 use crate::nbt::{NBTError, NBTReader, NBTValue};
 use byteorder::{BigEndian, ReadBytesExt};

+use crate::constants::tags::{LEVEL_TAGS, TAG_LEVEL, TAG_X_POS, TAG_Z_POS};
 use crate::region_file::BLOCK_SIZE;
-use flate2::read::ZlibDecoder;
+use flate2::read::{GzDecoder, ZlibDecoder};
 use std::fmt::{Display, Formatter};
 use std::io::{self, BufReader, Error};

 type IOResult<T> = io::Result<T>;

-const TAG_LEVEL: &str = "Level";
-const TAG_X_POS: &str = "xPos";
-const TAG_Z_POS: &str = "zPos";
-
 #[derive(Debug)]
 pub struct Chunk {
    pub length: u32,
    pub compression_type: u8,
+    pub x_pos: Option<i32>,
+    pub z_pos: Option<i32>,
 }

 impl Chunk {
    pub fn from_buf_reader<R: io::Read + io::Seek>(reader: &mut R) -> IOResult<Self> {
        let length = reader.read_u32::<BigEndian>()?;
-        if length > 128 * BLOCK_SIZE as u32 {
+        if length > 128 * BLOCK_SIZE as u32 || length == 0 {
            return Err(io::Error::from(io::ErrorKind::InvalidData));
        }
        let compression_type = reader.read_u8()?;
@ -29,6 +28,8 @@ impl Chunk {
        Ok(Self {
            compression_type,
            length,
+            x_pos: None,
+            z_pos: None,
        })
    }

@ -36,7 +37,10 @@ impl Chunk {
        &mut self,
        reader: &mut R,
    ) -> Result<(), ChunkScanError> {
-        let data = if self.compression_type == 2 {
+        let data = if self.compression_type == 1 {
+            let mut nbt_reader = NBTReader::new(BufReader::new(GzDecoder::new(reader)));
+            nbt_reader.parse()?
+        } else if self.compression_type == 2 {
            let mut nbt_reader = NBTReader::new(BufReader::new(ZlibDecoder::new(reader)));
            nbt_reader.parse()?
        } else {
@ -50,13 +54,15 @@ impl Chunk {
            let lvl_data = &data[TAG_LEVEL];

            if let NBTValue::Compound(lvl_data) = lvl_data {
-                if !lvl_data.contains_key(TAG_X_POS) {
-                    Err(ChunkScanError::MissingTag(TAG_X_POS))
-                } else if !lvl_data.contains_key(TAG_Z_POS) {
-                    Err(ChunkScanError::MissingTag(TAG_Z_POS))
-                } else {
-                    Ok(())
+                for tag in LEVEL_TAGS {
+                    if !lvl_data.contains_key(*tag) {
+                        return Err(ChunkScanError::MissingTag(tag));
+                    }
                }
+                self.x_pos = lvl_data[TAG_X_POS].as_int().cloned();
+                self.z_pos = lvl_data[TAG_Z_POS].as_int().cloned();
+
+                Ok(())
            } else {
                Err(ChunkScanError::InvalidFormat(TAG_LEVEL))
            }
--- a/src/constants.rs
+++ b/src/constants.rs
@ -0,0 +1,33 @@
+/// NBT tag constants
+pub mod tags {
+    pub const TAG_LEVEL: &str = "Level";
+    pub const TAG_X_POS: &str = "xPos";
+    pub const TAG_Z_POS: &str = "zPos";
+    pub const TAG_SECTIONS: &str = "Sections";
+    pub const TAG_LAST_UPDATE: &str = "LastUpdate";
+    pub const TAG_INHABITED_TIME: &str = "InhabitedTime";
+    pub const TAG_HEIGHTMAPS: &str = "Heightmaps";
+    pub const TAG_CARVING_MASKS: &str = "CarvingMasks";
+    pub const TAG_ENTITIES: &str = "Entities";
+    pub const TAG_TILE_ENTITIES: &str = "TileEntities";
+    pub const TAG_TILE_TICKS: &str = "TileTicks";
+    pub const TAG_LIQUID_TICKS: &str = "LiquidTicks";
+    pub const TAG_LIGHTS: &str = "Lights";
+    pub const TAG_LIQUIDS_TO_BE_TICKED: &str = "LiquidsToBeTicked";
+    pub const TAG_TO_BE_TICKED: &str = "ToBeTicked";
+    pub const TAG_POST_PROCESSING: &str = "PostProcessing";
+    pub const TAG_STATUS: &str = "Status";
+    pub const TAG_STRUCTURES: &str = "Structures";
+
+    /// A list of required tags stored in the level tag
+    pub const LEVEL_TAGS: &[&'static str] = &[
+        TAG_X_POS,
+        TAG_Z_POS,
+        TAG_SECTIONS,
+        TAG_LAST_UPDATE,
+        TAG_INHABITED_TIME,
+        TAG_ENTITIES,
+        TAG_TILE_ENTITIES,
+        TAG_STATUS,
+    ];
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,5 +1,7 @@
 pub mod chunk;
+pub mod constants;
 pub mod nbt;
 pub mod region_file;
 pub mod scan;
+pub mod utils;
 pub mod world_folder;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,6 +1,7 @@
 use colored::*;
 use env_logger::Env;
 use log::Level;
+use minecraft_regions_tool::scan::ScanOptions;
 use minecraft_regions_tool::world_folder::WorldFolder;
 use std::path::PathBuf;
 use structopt::StructOpt;
@ -26,14 +27,19 @@ enum SubCommand {
    Count,

    /// Scan for errors in the region files and optionally fix them
-    Scan(ScanOptions),
+    Scan(ScanArgs),
 }

 #[derive(StructOpt, Debug)]
 #[structopt()]
-struct ScanOptions {
+struct ScanArgs {
+    /// Fixes errors that can be fixed without problems
    #[structopt(short, long)]
    fix: bool,
+
+    /// Deletes corrupted data
+    #[structopt(short, long)]
+    delete: bool,
 }

 fn main() {
@ -47,7 +53,12 @@ fn main() {
                log::info!("Fixing fixable errors.");
            }
            log::info!("Scanning Region files for errors...");
-            log::info!("Scan Results:\n{}", world.scan_files(opt.fix).unwrap())
+            log::info!(
+                "Scan Results:\n{}",
+                world
+                    .scan_files(ScanOptions::new().fix(opt.fix).fix_delete(opt.delete))
+                    .unwrap()
+            )
        }
    }
 }
--- a/src/nbt.rs
+++ b/src/nbt.rs
@ -1,13 +1,15 @@
+use crate::utils::ByteArrayCache;
 use byteorder::{BigEndian, ReadBytesExt};
+use enum_as_inner::EnumAsInner;
 use std::collections::HashMap;
 use std::error::Error;
 use std::fmt::{self, Display, Formatter};
-use std::io::{self, Read};
+use std::io::{self};

 const MAX_RECURSION: u64 = 100;

 pub struct NBTReader<R> {
-    inner: Box<R>,
+    inner: R,
    recursion: u64,
 }

@ -19,7 +21,7 @@ where
 {
    pub fn new(inner: R) -> Self {
        Self {
-            inner: Box::new(inner),
+            inner,
            recursion: 0,
        }
    }
@ -73,13 +75,15 @@ where
    }

    /// Parses an array of bytes
-    fn parse_byte_array(&mut self) -> NBTResult<Vec<u8>> {
+    fn parse_byte_array(&mut self) -> NBTResult<ByteArrayCache> {
        let length = self.inner.read_u32::<BigEndian>()?;
-        for _ in 0..length {
-            self.inner.read_u8()?;
-        }
+        // store the data of the byte array in a compressed byte array cache to save memory
+        let mut cache = ByteArrayCache::new();
+        let mut buf = vec![0u8; length as usize];
+        self.inner.read_exact(&mut buf)?;
+        cache.write(&buf[..])?;

-        Ok(Vec::with_capacity(0))
+        Ok(cache)
    }

    /// Parses a string value
@ -146,7 +150,7 @@ where
    }
 }

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, EnumAsInner)]
 pub enum NBTValue {
    Null,
    Byte(u8),
@ -155,7 +159,7 @@ pub enum NBTValue {
    Long(i64),
    Float(f32),
    Double(f64),
-    ByteArray(Vec<u8>),
+    ByteArray(ByteArrayCache),
    String(String),
    List(Vec<NBTValue>),
    Compound(HashMap<String, NBTValue>),
--- a/src/region_file.rs
+++ b/src/region_file.rs
@ -1,24 +1,30 @@
 use crate::chunk::{Chunk, ChunkScanError};
+use crate::scan::ScanOptions;
 use crate::scan::ScanStatistics;
 use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
+use std::cmp::Ordering;
 use std::fs::{File, OpenOptions};
 use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write};
 use std::path::PathBuf;
+use std::sync::Arc;

 pub const BLOCK_SIZE: usize = 4096;

 pub struct RegionFile {
+    path: PathBuf,
    reader: BufReader<File>,
    writer: BufWriter<File>,
    locations: Locations,
    #[allow(dead_code)]
    timestamps: Timestamps,
+    length: u64,
 }

 impl RegionFile {
    pub fn new(path: &PathBuf) -> Result<Self> {
        let fr = OpenOptions::new().read(true).open(path)?;
        let fw = OpenOptions::new().write(true).open(path)?;
+        let file_size = fr.metadata()?.len();
        let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr);
        let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw);

@ -28,80 +34,108 @@ impl RegionFile {
        reader.read_exact(&mut timestamps_raw)?;

        Ok(Self {
+            path: path.clone(),
            locations: Locations::from_bytes(&locations_raw),
            timestamps: Timestamps::from_bytes(&timestamps_raw),
            reader,
            writer,
+            length: file_size,
        })
    }

    /// Returns the number of chunks in the file
    pub fn count_chunks(&self) -> usize {
-        return self.locations.valid_entries().len();
+        return self.locations.valid_entries_enumerate().len();
    }

    /// Scans the chunk entries for possible errors
-    pub fn scan_chunks(&mut self, fix: bool) -> Result<ScanStatistics> {
+    pub fn scan_chunks(&mut self, options: &Arc<ScanOptions>) -> Result<ScanStatistics> {
        let mut statistic = ScanStatistics::new();
+        let mut shift_operations: Vec<(usize, isize)> = Vec::new();

-        let entries = self.locations.valid_entries();
-        let mut corrected_entries = Vec::new();
+        let mut entries = self.locations.valid_entries_enumerate();
+        entries.sort_by(|(_, (a, _)), (_, (b, _))| {
+            if a > b {
+                Ordering::Greater
+            } else if a < b {
+                Ordering::Less
+            } else {
+                Ordering::Equal
+            }
+        });
        statistic.total_chunks = entries.len() as u64;
+        let mut previous_offset = 2;
+        let mut previous_sections = 0;

-        for (offset, sections) in &entries {
-            let reader_offset = *offset as u64 * BLOCK_SIZE as u64;
+        for (index, (offset, sections)) in entries {
+            // Calculate and seek to the start of the chunk
+            let reader_offset = offset as u64 * BLOCK_SIZE as u64;
            self.reader.seek(SeekFrom::Start(reader_offset))?;

+            let offset_diff = offset as i32 - (previous_offset as i32 + previous_sections as i32);
+            // Check if there is wasted space between the chunks
+            // since the chunks are iterated ordered by offset the previous chunk is the closest
+            if offset_diff > 0 {
+                statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64;
+                log::debug!(
+                    "Gap of unused {:.2} KiB detected between {} and {}",
+                    (BLOCK_SIZE as f32 * offset_diff as f32) / 1024.0,
+                    previous_offset,
+                    offset
+                );
+                if options.fix {
+                    shift_operations.push((offset as usize, -(offset_diff as isize)));
+                }
+            }
+            // Check if the chunk is longer than the file
+            if offset < 2 || self.length < (offset + sections as u32) as u64 * BLOCK_SIZE as u64 {
+                statistic.invalid_chunk_pointer += 1;
+                log::debug!(
+                    "Invalid chunk offset and sections at index {}: {} + {}",
+                    index,
+                    offset,
+                    sections
+                );
+                if options.fix_delete {
+                    self.delete_chunk(index)?;
+                }
+                continue;
+            }
            match Chunk::from_buf_reader(&mut self.reader) {
-                Ok(mut chunk) => {
-                    let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
-
-                    if chunk.compression_type > 3 {
-                        statistic.invalid_compression_method += 1;
-                        if fix {
-                            self.writer.seek(SeekFrom::Start(reader_offset + 4))?;
-                            self.writer.write_u8(1)?;
-                        }
-                    } else {
-                        self.reader.seek(SeekFrom::Start(reader_offset + 5))?;
-                        if let Err(e) = chunk.validate_nbt_data(&mut self.reader) {
-                            match e {
-                                ChunkScanError::IO(e) => {
-                                    log::debug!(
-                                        "Compression error when reading chunk {}: {}",
-                                        offset,
-                                        e
-                                    );
-                                    statistic.corrupted_compression += 1;
-                                }
-                                ChunkScanError::NBTError(e) => {
-                                    log::debug!("Corrupted nbt data for chunk {}: {}", offset, e);
-                                    statistic.corrupted_nbt += 1;
-                                }
-                                _ => {
-                                    log::debug!("Missing nbt data for chunk {}: {}", offset, e);
-                                    statistic.missing_nbt += 1;
-                                }
-                            }
-                        }
-                    }
-
-                    if *sections != chunk_sections as u8 || chunk.length >= 1_048_576 {
-                        statistic.invalid_length += 1;
-                        corrected_entries.push((*offset, chunk_sections as u8));
-                    } else {
-                        corrected_entries.push((*offset, *sections));
+                Ok(chunk) => {
+                    let exists =
+                        self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?;
+                    // If scan_chunk returns false the chunk entry was deleted
+                    if !exists && options.fix {
+                        shift_operations
+                            .push((offset as usize + sections as usize, -(sections as isize)))
                    }
                }
                Err(e) => {
                    statistic.failed_to_read += 1;
-                    log::error!("Failed to read chunk at {}: {}", offset, e);
+                    log::error!(
+                        "Failed to read chunk at {} in {:?}: {}",
+                        offset,
+                        self.path,
+                        e
+                    );
+                    if options.fix_delete {
+                        self.delete_chunk(index)?;
+                        shift_operations
+                            .push((offset as usize + sections as usize, -(sections as isize)));
+                    }
                }
            }
+
+            previous_offset = offset;
+            previous_sections = sections as u32;
        }
-        self.locations.set_entries(corrected_entries);

-        if fix {
+        if options.fix || options.fix_delete {
+            self.perform_shift_operations(shift_operations)?;
+
+            // The new size of the file is the estimated size based on the highest chunk offset + sections
+            statistic.shrunk_size = self.locations.estimated_size();
            self.writer.seek(SeekFrom::Start(0))?;
            self.writer
                .write_all(self.locations.to_bytes().as_slice())?;
@ -110,6 +144,174 @@ impl RegionFile {

        Ok(statistic)
    }
+
+    /// Performs shift operations defined in the shift_operations vector
+    fn perform_shift_operations(
+        &mut self,
+        mut shift_operations: Vec<(usize, isize)>,
+    ) -> Result<()> {
+        // sort the shift operations by resulting offset to have them in the right order
+        shift_operations.sort_by(|(o1, a1), (o2, a2)| {
+            let to_offset1 = *o1 as isize + *a1;
+            let to_offset2 = *o2 as isize + *a2;
+            if to_offset1 > to_offset1 {
+                Ordering::Greater
+            } else if to_offset1 < to_offset2 {
+                Ordering::Less
+            } else {
+                Ordering::Equal
+            }
+        });
+        let mut shifted = 0isize;
+
+        // perform shifting of chunks to close gaps between them
+        let mut operations = shift_operations.iter().peekable();
+
+        while let Some((offset, amount)) = operations.next() {
+            shifted += *amount;
+            let end_offset = if let Some((o, a)) = operations.peek() {
+                (*o as isize + *a) as usize
+            } else {
+                self.locations.max_offset() as usize
+            };
+            if *offset > end_offset {
+                log::error!("Invalid shift ({} - {}) -> {}", offset, end_offset, shifted);
+                break;
+            }
+            self.shift_right(*offset, end_offset, shifted)?;
+            self.locations
+                .shift_entries(*offset as u32, end_offset as u32, shifted as i32);
+        }
+
+        Ok(())
+    }
+
+    /// Scans a single chunk for errors
+    fn scan_chunk(
+        &mut self,
+        index: usize,
+        offset: u32,
+        sections: u8,
+        mut chunk: Chunk,
+        statistic: &mut ScanStatistics,
+        options: &Arc<ScanOptions>,
+    ) -> Result<bool> {
+        let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
+        let reader_offset = offset as u64 * BLOCK_SIZE as u64;
+
+        // Valid compression types are:
+        // 0 - uncompressed
+        // 1 - GZIP
+        // 2 - ZLIB
+        if chunk.compression_type > 3 {
+            statistic.invalid_compression_method += 1;
+            if options.fix {
+                self.writer.seek(SeekFrom::Start(reader_offset + 4))?;
+                self.writer.write_u8(1)?;
+            }
+        } else {
+            // seek to the start of the actual chunk data
+            self.reader.seek(SeekFrom::Start(reader_offset + 5))?;
+
+            if let Err(e) = chunk.validate_nbt_data(&mut self.reader) {
+                match e {
+                    ChunkScanError::IO(e) => {
+                        log::debug!("Compression error at chunk {}: {}", offset, e);
+                        statistic.corrupted_compression += 1;
+                    }
+                    ChunkScanError::NBTError(e) => {
+                        log::debug!("Corrupted nbt data for chunk {}: {}", offset, e);
+                        statistic.corrupted_nbt += 1;
+                    }
+                    _ => {
+                        log::debug!("Missing nbt data for chunk {}: {}", offset, e);
+                        statistic.missing_nbt += 1;
+                    }
+                }
+                if options.fix_delete {
+                    self.delete_chunk(index)?;
+                    return Ok(false);
+                }
+            } else {
+                // validate that the chunk is the one the index should be pointing at
+                if let Some(x) = chunk.x_pos {
+                    if let Some(z) = chunk.z_pos {
+                        if get_chunk_index(x as isize, z as isize) != index {
+                            statistic.invalid_chunk_pointer += 1;
+                            log::debug!("Pointer {} pointing to wrong chunk ({},{})", index, x, z);
+
+                            if options.fix_delete {
+                                // Delete the entry of the chunk from the locations table
+                                self.delete_chunk(index)?;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if sections != chunk_sections as u8 || chunk.length >= 1_048_576 {
+            statistic.invalid_length += 1;
+            if options.fix {
+                self.locations
+                    .replace_entry_unchecked(index, (offset, chunk_sections as u8));
+            }
+        }
+
+        Ok(true)
+    }
+
+    /// Deletes a chunk and shifts all other chunks
+    pub fn delete_chunk(&mut self, index: usize) -> Result<()> {
+        log::debug!(
+            "Deleting chunk at {}",
+            self.locations.get_chunk_entry_unchecked(index).0
+        );
+        self.locations.delete_chunk_entry_unchecked(index);
+        Ok(())
+    }
+
+    /// Shifts the file from the `offset` position `amount` blocks to the right
+    pub fn shift_right(
+        &mut self,
+        start_offset: usize,
+        end_offset: usize,
+        amount: isize,
+    ) -> Result<()> {
+        log::debug!(
+            "Shifting chunk blocks starting from {} by {} until {}",
+            start_offset,
+            amount,
+            end_offset,
+        );
+        // seek to the start of the data to be shifted
+        self.reader
+            .seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
+        // seek to the start of the data to be shifted
+        self.writer
+            .seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
+        // seek the amount the data should be shifted
+        self.writer
+            .seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?;
+
+        for _ in 0..(end_offset - start_offset) {
+            // since the offset is based on the fixed BLOCK_SIZE we can use that as our buffer size
+            let mut buf = [0u8; BLOCK_SIZE];
+            let read = self.reader.read(&mut buf)?;
+            self.writer.write(&buf)?;
+
+            if read < BLOCK_SIZE {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Closes the region file by flushing the writer
+    pub fn close(&mut self) -> Result<()> {
+        self.writer.flush()
+    }
 }

 #[derive(Debug)]
@ -122,6 +324,7 @@ impl Locations {
        let mut locations = Vec::new();

        for i in (0..BLOCK_SIZE - 1).step_by(4) {
+            // construct a 4-byte number from 3 bytes
            let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]];
            let offset = BigEndian::read_u32(&offset_raw);
            let count = bytes[i + 3];
@ -146,28 +349,91 @@ impl Locations {
    }

    /// Returns the offset of a chunk
-    pub fn get_chunk_offset(&self, x: usize, z: usize) -> Option<u32> {
-        let index = x % 32 + (z % 32) * 32;
-        self.inner.get(index).map(|e| (*e).0)
+    pub fn get_chunk_offset(&self, x: isize, z: isize) -> Option<u32> {
+        self.inner.get(get_chunk_index(x, z)).map(|e| (*e).0)
    }

    /// Returns the number of sectors for a chunk
-    pub fn get_chunk_sectors(&self, x: usize, z: usize) -> Option<u8> {
-        let index = x % 32 + (z % 32) * 32;
-        self.inner.get(index).map(|e| (*e).1)
+    pub fn get_chunk_sectors(&self, x: isize, z: isize) -> Option<u8> {
+        self.inner.get(get_chunk_index(x, z)).map(|e| (*e).1)
    }

    /// Returns chunk entry list
-    pub fn valid_entries(&self) -> Vec<(u32, u8)> {
+    pub fn valid_entries_enumerate(&self) -> Vec<(usize, (u32, u8))> {
        self.inner
            .iter()
-            .filter_map(|e| if (*e).0 >= 2 { Some(*e) } else { None })
+            .enumerate()
+            .filter_map(|e| {
+                if (*e.1).0 >= 2 {
+                    Some((e.0, *e.1))
+                } else {
+                    None
+                }
+            })
            .collect()
    }

-    /// Replaces the entry list with a new one
-    pub fn set_entries(&mut self, entries: Vec<(u32, u8)>) {
-        self.inner = entries;
+    /// The maximum offset in the file
+    pub fn max_offset(&self) -> u32 {
+        let largest = self
+            .inner
+            .iter()
+            .max_by(|(a, _), (b, _)| {
+                if a > b {
+                    Ordering::Greater
+                } else if a < b {
+                    Ordering::Less
+                } else {
+                    Ordering::Equal
+                }
+            })
+            .cloned()
+            .unwrap_or((2, 0));
+
+        largest.0 + largest.1 as u32
+    }
+
+    /// Returns the estimated of all chunks combined including the header
+    pub fn estimated_size(&self) -> u64 {
+        self.max_offset() as u64 * BLOCK_SIZE as u64
+    }
+
+    /// Replaces an entry with a new one. Panics if the index doesn't exist
+    pub fn replace_entry_unchecked(&mut self, index: usize, entry: (u32, u8)) {
+        self.inner[index] = entry;
+    }
+
+    /// Returns a chunk entry for an index. Panics if it doesn't exist.
+    pub fn get_chunk_entry_unchecked(&self, index: usize) -> (u32, u8) {
+        self.inner[index]
+    }
+
+    /// Sets a chunk entry to not generated. Panics if the index doesn't exists
+    pub fn delete_chunk_entry_unchecked(&mut self, index: usize) {
+        self.inner[index] = (0, 0);
+    }
+
+    /// Shifts all entries starting from `start_index` by `amount`
+    pub fn shift_entries(&mut self, start_offset: u32, end_offset: u32, amount: i32) {
+        log::debug!(
+            "Shifting location entries starting from {} by {} until {}",
+            start_offset,
+            amount,
+            end_offset
+        );
+        self.inner = self
+            .inner
+            .iter()
+            .map(|e| {
+                let mut entry = *e;
+
+                if e.0 >= start_offset && e.0 <= end_offset {
+                    entry.0 = (entry.0 as i32 + amount) as u32;
+                }
+
+                entry
+            })
+            .collect();
    }
 }

@ -187,3 +453,17 @@ impl Timestamps {
        Self { inner: timestamps }
    }
 }
+
+#[inline]
+fn get_chunk_index(x: isize, z: isize) -> usize {
+    let mut x = x % 32;
+    let mut z = z % 32;
+    if x < 0 {
+        x += 32;
+    }
+    if z < 0 {
+        z += 32;
+    }
+
+    x as usize + z as usize * 32
+}
--- a/src/scan.rs
+++ b/src/scan.rs
@ -10,6 +10,9 @@ pub struct ScanStatistics {
    pub corrupted_nbt: u64,
    pub failed_to_read: u64,
    pub corrupted_compression: u64,
+    pub invalid_chunk_pointer: u64,
+    pub shrunk_size: u64,
+    pub unused_space: u64,
 }

 impl ScanStatistics {
@ -21,7 +24,10 @@ impl ScanStatistics {
            missing_nbt: 0,
            corrupted_nbt: 0,
            corrupted_compression: 0,
+            invalid_chunk_pointer: 0,
            failed_to_read: 0,
+            shrunk_size: 0,
+            unused_space: 0,
        }
    }
 }
@ -36,7 +42,9 @@ impl Add for ScanStatistics {
        self.failed_to_read += rhs.failed_to_read;
        self.missing_nbt += rhs.missing_nbt;
        self.corrupted_compression += rhs.corrupted_compression;
+        self.invalid_chunk_pointer += rhs.invalid_chunk_pointer;
        self.corrupted_nbt += rhs.corrupted_nbt;
+        self.unused_space += rhs.unused_space;

        self
    }
@ -49,18 +57,49 @@ impl Display for ScanStatistics {
            "
            Total Chunks: {}
            Failed to Read: {}
+            Invalid chunk pointers: {}
            Chunks with invalid length: {}
            Chunks with invalid compression method: {}
            Chunks with missing nbt data: {}
            Chunks with corrupted nbt data: {}
-            Chunks with corrupted compressed data {}",
+            Chunks with corrupted compressed data: {}
+            Unused space: {} KiB",
            self.total_chunks,
            self.failed_to_read,
+            self.invalid_chunk_pointer,
            self.invalid_length,
            self.invalid_compression_method,
            self.missing_nbt,
            self.corrupted_nbt,
-            self.corrupted_compression
+            self.corrupted_compression,
+            self.unused_space / 1024,
        )
    }
 }
+
+#[derive(Clone, Debug)]
+pub struct ScanOptions {
+    pub fix: bool,
+    pub fix_delete: bool,
+}
+
+impl ScanOptions {
+    pub fn new() -> Self {
+        ScanOptions {
+            fix: false,
+            fix_delete: false,
+        }
+    }
+
+    pub fn fix(mut self, fix: bool) -> Self {
+        self.fix = fix;
+
+        self
+    }
+
+    pub fn fix_delete(mut self, fix_delete: bool) -> Self {
+        self.fix_delete = fix_delete;
+
+        self
+    }
+}
--- a/src/utils.rs
+++ b/src/utils.rs
@ -0,0 +1,46 @@
+use flate2::read::ZlibEncoder;
+use flate2::Compression;
+use std::io::{Read, Result};
+
+#[derive(Clone, Debug)]
+pub struct ByteArrayCache {
+    inner: Vec<u8>,
+    position: usize,
+}
+
+impl ByteArrayCache {
+    /// Creates a new byte array cache
+    pub fn new() -> Self {
+        Self {
+            inner: Vec::new(),
+            position: 0,
+        }
+    }
+
+    /// Creates a new byte array cache with capacity
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            inner: Vec::with_capacity(capacity),
+            position: 0,
+        }
+    }
+
+    /// Write the data to the inner buffer by using compression
+    pub fn write<R: Read>(&mut self, reader: R) -> Result<()> {
+        let mut encoder = ZlibEncoder::new(reader, Compression::default());
+        let mut buffer = Vec::new();
+        encoder.read_to_end(&mut buffer)?;
+        self.inner.append(&mut buffer);
+
+        Ok(())
+    }
+}
+
+impl Read for ByteArrayCache {
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let read = (&self.inner[self.position..]).read(buf)?;
+        self.position += read;
+
+        Ok(read)
+    }
+}
--- a/src/world_folder.rs
+++ b/src/world_folder.rs
@ -1,12 +1,15 @@
 use crate::region_file::RegionFile;
+use crate::scan::ScanOptions;
 use crate::scan::ScanStatistics;
 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
 use log::LevelFilter;
 use rayon::prelude::*;
 use std::fs;
+use std::fs::OpenOptions;
 use std::io;
 use std::ops::Add;
 use std::path::PathBuf;
+use std::sync::Arc;

 pub struct WorldFolder {
    path: PathBuf,
@ -29,9 +32,11 @@ impl WorldFolder {
        Ok(count)
    }

-    pub fn scan_files(&self, fix: bool) -> io::Result<ScanStatistics> {
+    /// Scans all region files for potential errors
+    pub fn scan_files(&self, options: ScanOptions) -> io::Result<ScanStatistics> {
        let paths = self.region_file_paths();
        let bar = ProgressBar::new(paths.len() as u64);
+        let options = Arc::new(options);
        bar.set_style(
            ProgressStyle::default_bar().template("\r[{eta_precise}] {wide_bar} {pos}/{len} "),
        );
@ -46,13 +51,22 @@ impl WorldFolder {
                log::debug!("Opening and scanning region file {:?}", path);
                let mut region_file = RegionFile::new(path)
                    .map_err(|e| {
-                        log::error!("Failed to open region file {:?}: {}", path, e);
+                        log::error!("Failed to open region file {:?}: {}\n", path, e);
+                        if options.fix_delete {
+                            if let Err(e) = fs::remove_file(path) {
+                                return e;
+                            }
+                        }

                        e
                    })
                    .ok()?;

-                let result = region_file.scan_chunks(fix).ok()?;
+                let result = region_file.scan_chunks(&options).ok()?;
+                if options.fix && result.shrunk_size > 0 {
+                    let f = OpenOptions::new().read(true).write(true).open(path).ok()?;
+                    f.set_len(result.shrunk_size).ok()?;
+                }
                bar.inc(1);
                log::debug!("Statistics for {:?}:\n{}", path, result);
Author	SHA1	Message	Date
trivernis	eac3d24136	Fix optional tag checks and shifting of overlapping chunks Signed-off-by: trivernis <trivernis@protonmail.com>	4 years ago
trivernis	f72ceb0ed2	Add more nbt tags to be checked Signed-off-by: trivernis <trivernis@protonmail.com>	4 years ago
trivernis	3f6063b24a	Improve shifting by partial shifting on multiple operations Signed-off-by: trivernis <trivernis@protonmail.com>	4 years ago
trivernis	4842b9f346	Fix shifting of chunks to occupy unused space Signed-off-by: trivernis <trivernis@protonmail.com>	4 years ago
trivernis	eb88a39b9a	Add optional deletion of corrupted chunks and region files Signed-off-by: trivernis <trivernis@protonmail.com>	4 years ago