You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
minecraft-regions-tool/src/region_file.rs

470 lines
16 KiB
Rust

use crate::chunk::{Chunk, ChunkScanError};
use crate::scan::ScanOptions;
use crate::scan::ScanStatistics;
use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
use std::cmp::Ordering;
use std::fs::{File, OpenOptions};
use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write};
use std::path::PathBuf;
use std::sync::Arc;
pub const BLOCK_SIZE: usize = 4096;
pub struct RegionFile {
path: PathBuf,
reader: BufReader<File>,
writer: BufWriter<File>,
locations: Locations,
#[allow(dead_code)]
timestamps: Timestamps,
length: u64,
}
impl RegionFile {
pub fn new(path: &PathBuf) -> Result<Self> {
let fr = OpenOptions::new().read(true).open(path)?;
let fw = OpenOptions::new().write(true).open(path)?;
let file_size = fr.metadata()?.len();
let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr);
let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw);
let mut locations_raw = [0u8; BLOCK_SIZE];
let mut timestamps_raw = [0u8; BLOCK_SIZE];
reader.read_exact(&mut locations_raw)?;
reader.read_exact(&mut timestamps_raw)?;
Ok(Self {
path: path.clone(),
locations: Locations::from_bytes(&locations_raw),
timestamps: Timestamps::from_bytes(&timestamps_raw),
reader,
writer,
length: file_size,
})
}
/// Returns the number of chunks in the file
pub fn count_chunks(&self) -> usize {
return self.locations.valid_entries_enumerate().len();
}
/// Scans the chunk entries for possible errors
pub fn scan_chunks(&mut self, options: &Arc<ScanOptions>) -> Result<ScanStatistics> {
let mut statistic = ScanStatistics::new();
let mut shift_operations: Vec<(usize, isize)> = Vec::new();
let mut entries = self.locations.valid_entries_enumerate();
entries.sort_by(|(_, (a, _)), (_, (b, _))| {
if a > b {
Ordering::Greater
} else if a < b {
Ordering::Less
} else {
Ordering::Equal
}
});
statistic.total_chunks = entries.len() as u64;
let mut previous_offset = 2;
let mut previous_sections = 0;
for (index, (offset, sections)) in entries {
// Calculate and seek to the start of the chunk
let reader_offset = offset as u64 * BLOCK_SIZE as u64;
self.reader.seek(SeekFrom::Start(reader_offset))?;
let offset_diff = offset as i32 - (previous_offset as i32 + previous_sections as i32);
// Check if there is wasted space between the chunks
// since the chunks are iterated ordered by offset the previous chunk is the closest
if offset_diff > 0 {
statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64;
log::debug!(
"Gap of unused {:.2} KiB detected between {} and {}",
(BLOCK_SIZE as f32 * offset_diff as f32) / 1024.0,
previous_offset,
offset
);
if options.fix {
shift_operations.push((offset as usize, -(offset_diff as isize)));
}
}
// Check if the chunk is longer than the file
if offset < 2 || self.length < (offset + sections as u32) as u64 * BLOCK_SIZE as u64 {
statistic.invalid_chunk_pointer += 1;
log::debug!(
"Invalid chunk offset and sections at index {}: {} + {}",
index,
offset,
sections
);
if options.fix_delete {
self.delete_chunk(index)?;
}
continue;
}
match Chunk::from_buf_reader(&mut self.reader) {
Ok(chunk) => {
let exists =
self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?;
// If scan_chunk returns false the chunk entry was deleted
if !exists && options.fix {
shift_operations
.push((offset as usize + sections as usize, -(sections as isize)))
}
}
Err(e) => {
statistic.failed_to_read += 1;
log::error!(
"Failed to read chunk at {} in {:?}: {}",
offset,
self.path,
e
);
if options.fix_delete {
self.delete_chunk(index)?;
shift_operations
.push((offset as usize + sections as usize, -(sections as isize)));
}
}
}
previous_offset = offset;
previous_sections = sections as u32;
}
if options.fix || options.fix_delete {
self.perform_shift_operations(shift_operations)?;
// The new size of the file is the estimated size based on the highest chunk offset + sections
statistic.shrunk_size = self.locations.estimated_size();
self.writer.seek(SeekFrom::Start(0))?;
self.writer
.write_all(self.locations.to_bytes().as_slice())?;
self.writer.flush()?;
}
Ok(statistic)
}
/// Performs shift operations defined in the shift_operations vector
fn perform_shift_operations(
&mut self,
mut shift_operations: Vec<(usize, isize)>,
) -> Result<()> {
// sort the shift operations by resulting offset to have them in the right order
shift_operations.sort_by(|(o1, a1), (o2, a2)| {
let to_offset1 = *o1 as isize + *a1;
let to_offset2 = *o2 as isize + *a2;
if to_offset1 > to_offset1 {
Ordering::Greater
} else if to_offset1 < to_offset2 {
Ordering::Less
} else {
Ordering::Equal
}
});
let mut shifted = 0isize;
// perform shifting of chunks to close gaps between them
let mut operations = shift_operations.iter().peekable();
while let Some((offset, amount)) = operations.next() {
shifted += *amount;
let end_offset = if let Some((o, a)) = operations.peek() {
(*o as isize + *a) as usize
} else {
self.locations.max_offset() as usize
};
if *offset > end_offset {
log::error!("Invalid shift ({} - {}) -> {}", offset, end_offset, shifted);
break;
}
self.shift_right(*offset, end_offset, shifted)?;
self.locations
.shift_entries(*offset as u32, end_offset as u32, shifted as i32);
}
Ok(())
}
/// Scans a single chunk for errors
fn scan_chunk(
&mut self,
index: usize,
offset: u32,
sections: u8,
mut chunk: Chunk,
statistic: &mut ScanStatistics,
options: &Arc<ScanOptions>,
) -> Result<bool> {
let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
let reader_offset = offset as u64 * BLOCK_SIZE as u64;
// Valid compression types are:
// 0 - uncompressed
// 1 - GZIP
// 2 - ZLIB
if chunk.compression_type > 3 {
statistic.invalid_compression_method += 1;
if options.fix {
self.writer.seek(SeekFrom::Start(reader_offset + 4))?;
self.writer.write_u8(1)?;
}
} else {
// seek to the start of the actual chunk data
self.reader.seek(SeekFrom::Start(reader_offset + 5))?;
if let Err(e) = chunk.validate_nbt_data(&mut self.reader) {
match e {
ChunkScanError::IO(e) => {
log::debug!("Compression error at chunk {}: {}", offset, e);
statistic.corrupted_compression += 1;
}
ChunkScanError::NBTError(e) => {
log::debug!("Corrupted nbt data for chunk {}: {}", offset, e);
statistic.corrupted_nbt += 1;
}
_ => {
log::debug!("Missing nbt data for chunk {}: {}", offset, e);
statistic.missing_nbt += 1;
}
}
if options.fix_delete {
self.delete_chunk(index)?;
return Ok(false);
}
} else {
// validate that the chunk is the one the index should be pointing at
if let Some(x) = chunk.x_pos {
if let Some(z) = chunk.z_pos {
if get_chunk_index(x as isize, z as isize) != index {
statistic.invalid_chunk_pointer += 1;
log::debug!("Pointer {} pointing to wrong chunk ({},{})", index, x, z);
if options.fix_delete {
// Delete the entry of the chunk from the locations table
self.delete_chunk(index)?;
}
}
}
}
}
}
if sections != chunk_sections as u8 || chunk.length >= 1_048_576 {
statistic.invalid_length += 1;
if options.fix {
self.locations
.replace_entry_unchecked(index, (offset, chunk_sections as u8));
}
}
Ok(true)
}
/// Deletes a chunk and shifts all other chunks
pub fn delete_chunk(&mut self, index: usize) -> Result<()> {
log::debug!(
"Deleting chunk at {}",
self.locations.get_chunk_entry_unchecked(index).0
);
self.locations.delete_chunk_entry_unchecked(index);
Ok(())
}
/// Shifts the file from the `offset` position `amount` blocks to the right
pub fn shift_right(
&mut self,
start_offset: usize,
end_offset: usize,
amount: isize,
) -> Result<()> {
log::debug!(
"Shifting chunk blocks starting from {} by {} until {}",
start_offset,
amount,
end_offset,
);
// seek to the start of the data to be shifted
self.reader
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
// seek to the start of the data to be shifted
self.writer
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
// seek the amount the data should be shifted
self.writer
.seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?;
for _ in 0..(end_offset - start_offset) {
// since the offset is based on the fixed BLOCK_SIZE we can use that as our buffer size
let mut buf = [0u8; BLOCK_SIZE];
let read = self.reader.read(&mut buf)?;
self.writer.write(&buf)?;
if read < BLOCK_SIZE {
break;
}
}
Ok(())
}
/// Closes the region file by flushing the writer
pub fn close(&mut self) -> Result<()> {
self.writer.flush()
}
}
#[derive(Debug)]
pub struct Locations {
inner: Vec<(u32, u8)>,
}
impl Locations {
pub fn from_bytes(bytes: &[u8; BLOCK_SIZE]) -> Self {
let mut locations = Vec::new();
for i in (0..BLOCK_SIZE - 1).step_by(4) {
// construct a 4-byte number from 3 bytes
let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]];
let offset = BigEndian::read_u32(&offset_raw);
let count = bytes[i + 3];
locations.push((offset, count));
}
Self { inner: locations }
}
/// Returns the byte representation of the locations table
pub fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
for (offset, sections) in &self.inner {
let mut offset_raw = [0u8; 4];
BigEndian::write_u32(&mut offset_raw, *offset);
bytes.append(&mut offset_raw[1..4].to_vec());
bytes.push(*sections);
}
bytes
}
/// Returns the offset of a chunk
pub fn get_chunk_offset(&self, x: isize, z: isize) -> Option<u32> {
self.inner.get(get_chunk_index(x, z)).map(|e| (*e).0)
}
/// Returns the number of sectors for a chunk
pub fn get_chunk_sectors(&self, x: isize, z: isize) -> Option<u8> {
self.inner.get(get_chunk_index(x, z)).map(|e| (*e).1)
}
/// Returns chunk entry list
pub fn valid_entries_enumerate(&self) -> Vec<(usize, (u32, u8))> {
self.inner
.iter()
.enumerate()
.filter_map(|e| {
if (*e.1).0 >= 2 {
Some((e.0, *e.1))
} else {
None
}
})
.collect()
}
/// The maximum offset in the file
pub fn max_offset(&self) -> u32 {
let largest = self
.inner
.iter()
.max_by(|(a, _), (b, _)| {
if a > b {
Ordering::Greater
} else if a < b {
Ordering::Less
} else {
Ordering::Equal
}
})
.cloned()
.unwrap_or((2, 0));
largest.0 + largest.1 as u32
}
/// Returns the estimated of all chunks combined including the header
pub fn estimated_size(&self) -> u64 {
self.max_offset() as u64 * BLOCK_SIZE as u64
}
/// Replaces an entry with a new one. Panics if the index doesn't exist
pub fn replace_entry_unchecked(&mut self, index: usize, entry: (u32, u8)) {
self.inner[index] = entry;
}
/// Returns a chunk entry for an index. Panics if it doesn't exist.
pub fn get_chunk_entry_unchecked(&self, index: usize) -> (u32, u8) {
self.inner[index]
}
/// Sets a chunk entry to not generated. Panics if the index doesn't exists
pub fn delete_chunk_entry_unchecked(&mut self, index: usize) {
self.inner[index] = (0, 0);
}
/// Shifts all entries starting from `start_index` by `amount`
pub fn shift_entries(&mut self, start_offset: u32, end_offset: u32, amount: i32) {
log::debug!(
"Shifting location entries starting from {} by {} until {}",
start_offset,
amount,
end_offset
);
self.inner = self
.inner
.iter()
.map(|e| {
let mut entry = *e;
if e.0 >= start_offset && e.0 <= end_offset {
entry.0 = (entry.0 as i32 + amount) as u32;
}
entry
})
.collect();
}
}
#[derive(Debug)]
pub struct Timestamps {
inner: Vec<u32>,
}
impl Timestamps {
pub fn from_bytes(bytes: &[u8; BLOCK_SIZE]) -> Self {
let mut timestamps = Vec::new();
for i in (0..BLOCK_SIZE - 1).step_by(4) {
timestamps.push(BigEndian::read_u32(&bytes[i..i + 4]))
}
Self { inner: timestamps }
}
}
#[inline]
fn get_chunk_index(x: isize, z: isize) -> usize {
let mut x = x % 32;
let mut z = z % 32;
if x < 0 {
x += 32;
}
if z < 0 {
z += 32;
}
x as usize + z as usize * 32
}