You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
470 lines
16 KiB
Rust
470 lines
16 KiB
Rust
use crate::chunk::{Chunk, ChunkScanError};
|
|
use crate::scan::ScanOptions;
|
|
use crate::scan::ScanStatistics;
|
|
use byteorder::{BigEndian, ByteOrder, WriteBytesExt};
|
|
use std::cmp::Ordering;
|
|
use std::fs::{File, OpenOptions};
|
|
use std::io::{BufReader, BufWriter, Read, Result, Seek, SeekFrom, Write};
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
|
|
pub const BLOCK_SIZE: usize = 4096;
|
|
|
|
pub struct RegionFile {
|
|
path: PathBuf,
|
|
reader: BufReader<File>,
|
|
writer: BufWriter<File>,
|
|
locations: Locations,
|
|
#[allow(dead_code)]
|
|
timestamps: Timestamps,
|
|
length: u64,
|
|
}
|
|
|
|
impl RegionFile {
|
|
pub fn new(path: &PathBuf) -> Result<Self> {
|
|
let fr = OpenOptions::new().read(true).open(path)?;
|
|
let fw = OpenOptions::new().write(true).open(path)?;
|
|
let file_size = fr.metadata()?.len();
|
|
let mut reader = BufReader::with_capacity(BLOCK_SIZE, fr);
|
|
let writer = BufWriter::with_capacity(2 * BLOCK_SIZE, fw);
|
|
|
|
let mut locations_raw = [0u8; BLOCK_SIZE];
|
|
let mut timestamps_raw = [0u8; BLOCK_SIZE];
|
|
reader.read_exact(&mut locations_raw)?;
|
|
reader.read_exact(&mut timestamps_raw)?;
|
|
|
|
Ok(Self {
|
|
path: path.clone(),
|
|
locations: Locations::from_bytes(&locations_raw),
|
|
timestamps: Timestamps::from_bytes(×tamps_raw),
|
|
reader,
|
|
writer,
|
|
length: file_size,
|
|
})
|
|
}
|
|
|
|
/// Returns the number of chunks in the file
|
|
pub fn count_chunks(&self) -> usize {
|
|
return self.locations.valid_entries_enumerate().len();
|
|
}
|
|
|
|
/// Scans the chunk entries for possible errors
|
|
pub fn scan_chunks(&mut self, options: &Arc<ScanOptions>) -> Result<ScanStatistics> {
|
|
let mut statistic = ScanStatistics::new();
|
|
let mut shift_operations: Vec<(usize, isize)> = Vec::new();
|
|
|
|
let mut entries = self.locations.valid_entries_enumerate();
|
|
entries.sort_by(|(_, (a, _)), (_, (b, _))| {
|
|
if a > b {
|
|
Ordering::Greater
|
|
} else if a < b {
|
|
Ordering::Less
|
|
} else {
|
|
Ordering::Equal
|
|
}
|
|
});
|
|
statistic.total_chunks = entries.len() as u64;
|
|
let mut previous_offset = 2;
|
|
let mut previous_sections = 0;
|
|
|
|
for (index, (offset, sections)) in entries {
|
|
// Calculate and seek to the start of the chunk
|
|
let reader_offset = offset as u64 * BLOCK_SIZE as u64;
|
|
self.reader.seek(SeekFrom::Start(reader_offset))?;
|
|
|
|
let offset_diff = offset as i32 - (previous_offset as i32 + previous_sections as i32);
|
|
// Check if there is wasted space between the chunks
|
|
// since the chunks are iterated ordered by offset the previous chunk is the closest
|
|
if offset_diff > 0 {
|
|
statistic.unused_space += (BLOCK_SIZE * offset_diff as usize) as u64;
|
|
log::debug!(
|
|
"Gap of unused {:.2} KiB detected between {} and {}",
|
|
(BLOCK_SIZE as f32 * offset_diff as f32) / 1024.0,
|
|
previous_offset,
|
|
offset
|
|
);
|
|
if options.fix {
|
|
shift_operations.push((offset as usize, -(offset_diff as isize)));
|
|
}
|
|
}
|
|
// Check if the chunk is longer than the file
|
|
if offset < 2 || self.length < (offset + sections as u32) as u64 * BLOCK_SIZE as u64 {
|
|
statistic.invalid_chunk_pointer += 1;
|
|
log::debug!(
|
|
"Invalid chunk offset and sections at index {}: {} + {}",
|
|
index,
|
|
offset,
|
|
sections
|
|
);
|
|
if options.fix_delete {
|
|
self.delete_chunk(index)?;
|
|
}
|
|
continue;
|
|
}
|
|
match Chunk::from_buf_reader(&mut self.reader) {
|
|
Ok(chunk) => {
|
|
let exists =
|
|
self.scan_chunk(index, offset, sections, chunk, &mut statistic, options)?;
|
|
// If scan_chunk returns false the chunk entry was deleted
|
|
if !exists && options.fix {
|
|
shift_operations
|
|
.push((offset as usize + sections as usize, -(sections as isize)))
|
|
}
|
|
}
|
|
Err(e) => {
|
|
statistic.failed_to_read += 1;
|
|
log::error!(
|
|
"Failed to read chunk at {} in {:?}: {}",
|
|
offset,
|
|
self.path,
|
|
e
|
|
);
|
|
if options.fix_delete {
|
|
self.delete_chunk(index)?;
|
|
shift_operations
|
|
.push((offset as usize + sections as usize, -(sections as isize)));
|
|
}
|
|
}
|
|
}
|
|
|
|
previous_offset = offset;
|
|
previous_sections = sections as u32;
|
|
}
|
|
|
|
if options.fix || options.fix_delete {
|
|
self.perform_shift_operations(shift_operations)?;
|
|
|
|
// The new size of the file is the estimated size based on the highest chunk offset + sections
|
|
statistic.shrunk_size = self.locations.estimated_size();
|
|
self.writer.seek(SeekFrom::Start(0))?;
|
|
self.writer
|
|
.write_all(self.locations.to_bytes().as_slice())?;
|
|
self.writer.flush()?;
|
|
}
|
|
|
|
Ok(statistic)
|
|
}
|
|
|
|
/// Performs shift operations defined in the shift_operations vector
|
|
fn perform_shift_operations(
|
|
&mut self,
|
|
mut shift_operations: Vec<(usize, isize)>,
|
|
) -> Result<()> {
|
|
// sort the shift operations by resulting offset to have them in the right order
|
|
shift_operations.sort_by(|(o1, a1), (o2, a2)| {
|
|
let to_offset1 = *o1 as isize + *a1;
|
|
let to_offset2 = *o2 as isize + *a2;
|
|
if to_offset1 > to_offset1 {
|
|
Ordering::Greater
|
|
} else if to_offset1 < to_offset2 {
|
|
Ordering::Less
|
|
} else {
|
|
Ordering::Equal
|
|
}
|
|
});
|
|
let mut shifted = 0isize;
|
|
|
|
// perform shifting of chunks to close gaps between them
|
|
let mut operations = shift_operations.iter().peekable();
|
|
|
|
while let Some((offset, amount)) = operations.next() {
|
|
shifted += *amount;
|
|
let end_offset = if let Some((o, a)) = operations.peek() {
|
|
(*o as isize + *a) as usize
|
|
} else {
|
|
self.locations.max_offset() as usize
|
|
};
|
|
if *offset > end_offset {
|
|
log::error!("Invalid shift ({} - {}) -> {}", offset, end_offset, shifted);
|
|
break;
|
|
}
|
|
self.shift_right(*offset, end_offset, shifted)?;
|
|
self.locations
|
|
.shift_entries(*offset as u32, end_offset as u32, shifted as i32);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Scans a single chunk for errors
|
|
fn scan_chunk(
|
|
&mut self,
|
|
index: usize,
|
|
offset: u32,
|
|
sections: u8,
|
|
mut chunk: Chunk,
|
|
statistic: &mut ScanStatistics,
|
|
options: &Arc<ScanOptions>,
|
|
) -> Result<bool> {
|
|
let chunk_sections = ((chunk.length + 4) as f64 / BLOCK_SIZE as f64).ceil();
|
|
let reader_offset = offset as u64 * BLOCK_SIZE as u64;
|
|
|
|
// Valid compression types are:
|
|
// 0 - uncompressed
|
|
// 1 - GZIP
|
|
// 2 - ZLIB
|
|
if chunk.compression_type > 3 {
|
|
statistic.invalid_compression_method += 1;
|
|
if options.fix {
|
|
self.writer.seek(SeekFrom::Start(reader_offset + 4))?;
|
|
self.writer.write_u8(1)?;
|
|
}
|
|
} else {
|
|
// seek to the start of the actual chunk data
|
|
self.reader.seek(SeekFrom::Start(reader_offset + 5))?;
|
|
|
|
if let Err(e) = chunk.validate_nbt_data(&mut self.reader) {
|
|
match e {
|
|
ChunkScanError::IO(e) => {
|
|
log::debug!("Compression error at chunk {}: {}", offset, e);
|
|
statistic.corrupted_compression += 1;
|
|
}
|
|
ChunkScanError::NBTError(e) => {
|
|
log::debug!("Corrupted nbt data for chunk {}: {}", offset, e);
|
|
statistic.corrupted_nbt += 1;
|
|
}
|
|
_ => {
|
|
log::debug!("Missing nbt data for chunk {}: {}", offset, e);
|
|
statistic.missing_nbt += 1;
|
|
}
|
|
}
|
|
if options.fix_delete {
|
|
self.delete_chunk(index)?;
|
|
return Ok(false);
|
|
}
|
|
} else {
|
|
// validate that the chunk is the one the index should be pointing at
|
|
if let Some(x) = chunk.x_pos {
|
|
if let Some(z) = chunk.z_pos {
|
|
if get_chunk_index(x as isize, z as isize) != index {
|
|
statistic.invalid_chunk_pointer += 1;
|
|
log::debug!("Pointer {} pointing to wrong chunk ({},{})", index, x, z);
|
|
|
|
if options.fix_delete {
|
|
// Delete the entry of the chunk from the locations table
|
|
self.delete_chunk(index)?;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if sections != chunk_sections as u8 || chunk.length >= 1_048_576 {
|
|
statistic.invalid_length += 1;
|
|
if options.fix {
|
|
self.locations
|
|
.replace_entry_unchecked(index, (offset, chunk_sections as u8));
|
|
}
|
|
}
|
|
|
|
Ok(true)
|
|
}
|
|
|
|
/// Deletes a chunk and shifts all other chunks
|
|
pub fn delete_chunk(&mut self, index: usize) -> Result<()> {
|
|
log::debug!(
|
|
"Deleting chunk at {}",
|
|
self.locations.get_chunk_entry_unchecked(index).0
|
|
);
|
|
self.locations.delete_chunk_entry_unchecked(index);
|
|
Ok(())
|
|
}
|
|
|
|
/// Shifts the file from the `offset` position `amount` blocks to the right
|
|
pub fn shift_right(
|
|
&mut self,
|
|
start_offset: usize,
|
|
end_offset: usize,
|
|
amount: isize,
|
|
) -> Result<()> {
|
|
log::debug!(
|
|
"Shifting chunk blocks starting from {} by {} until {}",
|
|
start_offset,
|
|
amount,
|
|
end_offset,
|
|
);
|
|
// seek to the start of the data to be shifted
|
|
self.reader
|
|
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
|
|
// seek to the start of the data to be shifted
|
|
self.writer
|
|
.seek(SeekFrom::Start((start_offset * BLOCK_SIZE) as u64))?;
|
|
// seek the amount the data should be shifted
|
|
self.writer
|
|
.seek(SeekFrom::Current(amount as i64 * BLOCK_SIZE as i64))?;
|
|
|
|
for _ in 0..(end_offset - start_offset) {
|
|
// since the offset is based on the fixed BLOCK_SIZE we can use that as our buffer size
|
|
let mut buf = [0u8; BLOCK_SIZE];
|
|
let read = self.reader.read(&mut buf)?;
|
|
self.writer.write(&buf)?;
|
|
|
|
if read < BLOCK_SIZE {
|
|
break;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Closes the region file by flushing the writer
|
|
pub fn close(&mut self) -> Result<()> {
|
|
self.writer.flush()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Locations {
|
|
inner: Vec<(u32, u8)>,
|
|
}
|
|
|
|
impl Locations {
|
|
pub fn from_bytes(bytes: &[u8; BLOCK_SIZE]) -> Self {
|
|
let mut locations = Vec::new();
|
|
|
|
for i in (0..BLOCK_SIZE - 1).step_by(4) {
|
|
// construct a 4-byte number from 3 bytes
|
|
let offset_raw = [0u8, bytes[i], bytes[i + 1], bytes[i + 2]];
|
|
let offset = BigEndian::read_u32(&offset_raw);
|
|
let count = bytes[i + 3];
|
|
locations.push((offset, count));
|
|
}
|
|
|
|
Self { inner: locations }
|
|
}
|
|
|
|
/// Returns the byte representation of the locations table
|
|
pub fn to_bytes(&self) -> Vec<u8> {
|
|
let mut bytes = Vec::new();
|
|
|
|
for (offset, sections) in &self.inner {
|
|
let mut offset_raw = [0u8; 4];
|
|
BigEndian::write_u32(&mut offset_raw, *offset);
|
|
bytes.append(&mut offset_raw[1..4].to_vec());
|
|
bytes.push(*sections);
|
|
}
|
|
|
|
bytes
|
|
}
|
|
|
|
/// Returns the offset of a chunk
|
|
pub fn get_chunk_offset(&self, x: isize, z: isize) -> Option<u32> {
|
|
self.inner.get(get_chunk_index(x, z)).map(|e| (*e).0)
|
|
}
|
|
|
|
/// Returns the number of sectors for a chunk
|
|
pub fn get_chunk_sectors(&self, x: isize, z: isize) -> Option<u8> {
|
|
self.inner.get(get_chunk_index(x, z)).map(|e| (*e).1)
|
|
}
|
|
|
|
/// Returns chunk entry list
|
|
pub fn valid_entries_enumerate(&self) -> Vec<(usize, (u32, u8))> {
|
|
self.inner
|
|
.iter()
|
|
.enumerate()
|
|
.filter_map(|e| {
|
|
if (*e.1).0 >= 2 {
|
|
Some((e.0, *e.1))
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
/// The maximum offset in the file
|
|
pub fn max_offset(&self) -> u32 {
|
|
let largest = self
|
|
.inner
|
|
.iter()
|
|
.max_by(|(a, _), (b, _)| {
|
|
if a > b {
|
|
Ordering::Greater
|
|
} else if a < b {
|
|
Ordering::Less
|
|
} else {
|
|
Ordering::Equal
|
|
}
|
|
})
|
|
.cloned()
|
|
.unwrap_or((2, 0));
|
|
|
|
largest.0 + largest.1 as u32
|
|
}
|
|
|
|
/// Returns the estimated of all chunks combined including the header
|
|
pub fn estimated_size(&self) -> u64 {
|
|
self.max_offset() as u64 * BLOCK_SIZE as u64
|
|
}
|
|
|
|
/// Replaces an entry with a new one. Panics if the index doesn't exist
|
|
pub fn replace_entry_unchecked(&mut self, index: usize, entry: (u32, u8)) {
|
|
self.inner[index] = entry;
|
|
}
|
|
|
|
/// Returns a chunk entry for an index. Panics if it doesn't exist.
|
|
pub fn get_chunk_entry_unchecked(&self, index: usize) -> (u32, u8) {
|
|
self.inner[index]
|
|
}
|
|
|
|
/// Sets a chunk entry to not generated. Panics if the index doesn't exists
|
|
pub fn delete_chunk_entry_unchecked(&mut self, index: usize) {
|
|
self.inner[index] = (0, 0);
|
|
}
|
|
|
|
/// Shifts all entries starting from `start_index` by `amount`
|
|
pub fn shift_entries(&mut self, start_offset: u32, end_offset: u32, amount: i32) {
|
|
log::debug!(
|
|
"Shifting location entries starting from {} by {} until {}",
|
|
start_offset,
|
|
amount,
|
|
end_offset
|
|
);
|
|
self.inner = self
|
|
.inner
|
|
.iter()
|
|
.map(|e| {
|
|
let mut entry = *e;
|
|
|
|
if e.0 >= start_offset && e.0 <= end_offset {
|
|
entry.0 = (entry.0 as i32 + amount) as u32;
|
|
}
|
|
|
|
entry
|
|
})
|
|
.collect();
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Timestamps {
|
|
inner: Vec<u32>,
|
|
}
|
|
|
|
impl Timestamps {
|
|
pub fn from_bytes(bytes: &[u8; BLOCK_SIZE]) -> Self {
|
|
let mut timestamps = Vec::new();
|
|
|
|
for i in (0..BLOCK_SIZE - 1).step_by(4) {
|
|
timestamps.push(BigEndian::read_u32(&bytes[i..i + 4]))
|
|
}
|
|
|
|
Self { inner: timestamps }
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn get_chunk_index(x: isize, z: isize) -> usize {
|
|
let mut x = x % 32;
|
|
let mut z = z % 32;
|
|
if x < 0 {
|
|
x += 32;
|
|
}
|
|
if z < 0 {
|
|
z += 32;
|
|
}
|
|
|
|
x as usize + z as usize * 32
|
|
}
|