From 2c77342398b5a951875fc01fad5011a7f2ff8330 Mon Sep 17 00:00:00 2001 From: trivernis Date: Mon, 2 May 2022 22:25:37 +0200 Subject: [PATCH] Rename executable to uarch and add in-memory tar + xz unpacking Signed-off-by: trivernis --- Cargo.lock | 44 +-------------------- Cargo.toml | 3 +- README.md | 6 +-- src/bin/{ua.rs => uarch.rs} | 0 src/format/tar.rs | 78 ++++++++++++------------------------- src/lib.rs | 1 + src/utils/channel_sink.rs | 59 ++++++++++++++++++++++++++++ src/utils/mod.rs | 2 + src/utils/xz_decoder.rs | 52 +++++++++++++++++++++++++ 9 files changed, 144 insertions(+), 101 deletions(-) rename src/bin/{ua.rs => uarch.rs} (100%) create mode 100644 src/utils/channel_sink.rs create mode 100644 src/utils/mod.rs create mode 100644 src/utils/xz_decoder.rs diff --git a/Cargo.lock b/Cargo.lock index 25550e2..775a405 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -300,15 +300,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "fastrand" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" -dependencies = [ - "instant", -] - [[package]] name = "filetime" version = "0.2.16" @@ -395,15 +386,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "itoa" version = "1.0.1" @@ -652,15 +634,6 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - [[package]] name = "rle-decode-fast" version = "1.0.3" @@ -744,20 +717,6 @@ dependencies = [ "xattr", ] -[[package]] -name = "tempfile" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" -dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", -] - [[package]] name = "termcolor" version = "1.1.3" @@ -886,7 +845,7 @@ checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "universal-archiver" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "clap", @@ -894,7 +853,6 @@ dependencies = [ "libflate", "lzma-rs", "tar", - "tempfile", "tracing", "tracing-subscriber", "zip", diff --git a/Cargo.toml b/Cargo.toml index cc25d0d..694881b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "universal-archiver" description = "A tool to unpack archives without having to specify the archive type." repository = "https://github.com/Trivernis/universal-archiver.git" authors = ["Trivernis "] -version = "0.1.1" +version = "0.2.0" edition = "2021" keywords = ["compression", "extract-archive", "archiver", "cli"] license = "MIT" @@ -19,7 +19,6 @@ zip = "0.6.2" lzma-rs = "0.2.0" libflate = "1.2.0" tar = "0.4.38" -tempfile = "3.3.0" tracing-subscriber = {version = "0.3.11", features = ["env-filter"]} [dependencies.clap] diff --git a/README.md b/README.md index 22c5bf0..48250aa 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,14 @@ cargo install universal-archiver ```sh USAGE: - ua + uarch OPTIONS: -h, --help Print help information -V, --version Print version information SUBCOMMANDS: - extract Extracts a given file + x, extract Extracts a given file help Print this message or the help of the given subcommand(s) ``` @@ -37,7 +37,7 @@ SUBCOMMANDS: Extracts a given file USAGE: - ua extract [OUTPUT] + uarch extract [OUTPUT] ARGS: The file to extract diff --git a/src/bin/ua.rs b/src/bin/uarch.rs similarity index 100% rename from src/bin/ua.rs rename to src/bin/uarch.rs diff --git a/src/format/tar.rs b/src/format/tar.rs index a397184..c7c6834 100644 --- a/src/format/tar.rs +++ b/src/format/tar.rs @@ -1,20 +1,20 @@ use crate::format::gzip::GZipFormat; use crate::format::xz::XZFormat; -use crate::format::{get_file_header, FileFormat, FileObject}; +use crate::format::{FileFormat, FileObject}; +use crate::utils::xz_decoder::XzDecoder; use anyhow::{bail, Context}; +use libflate::gzip; use std::fs::File; -use std::io::BufReader; -use std::path::{Path, PathBuf}; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::io::{BufReader, BufWriter, Read, Write}; +use std::path::Path; use std::{fs, io}; use tar::{Archive, EntryType}; -use tempfile::{tempdir, TempDir}; const TAR_HEADER: &[u8] = &[0x75, 0x73, 0x74, 0x61, 0x72]; pub enum TarFormat { - Xz(XZFormat), - Gz(GZipFormat), + Xz, + Gz, Uncompressed, } @@ -25,14 +25,14 @@ impl FileFormat for TarFormat { Ok(Self::Uncompressed) } else if file.ext.contains(".tar.") { - if let Ok(xz) = XZFormat::parse(file) { + if let Ok(_xz) = XZFormat::parse(file) { tracing::info!("Detected tar file compressed with xz"); - Ok(Self::Xz(xz)) - } else if let Ok(gz) = GZipFormat::parse(file) { + Ok(Self::Xz) + } else if let Ok(_gz) = GZipFormat::parse(file) { tracing::info!("Detected tarfile compressed with gz"); - Ok(Self::Gz(gz)) + Ok(Self::Gz) } else { bail!("Not a tar file or a tar with unknown compression"); } @@ -45,36 +45,25 @@ impl FileFormat for TarFormat { } fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> { + let mut reader = BufReader::new(File::open(file).context("Opening input")?); match self { - TarFormat::Xz(xz) => { - let (tmp, _h) = create_tempfile()?; - xz.extract(file, &tmp).context("Decompress with xz")?; - check_extract_tar(&tmp, output) + TarFormat::Xz => { + let mut decoder = XzDecoder::new(reader); + extract_tar(&mut decoder, output) } - TarFormat::Gz(gz) => { - let (tmp, _h) = create_tempfile()?; - gz.extract(file, &tmp).context("Decompress with gz")?; - check_extract_tar(&tmp, output) + TarFormat::Gz => { + let mut decoder = gzip::Decoder::new(&mut reader).context("Creating decoder")?; + extract_tar(&mut decoder, output) } - TarFormat::Uncompressed => extract_tar(file, output).context("Extract tar"), + TarFormat::Uncompressed => extract_tar(&mut reader, output).context("Extract tar"), } } } - -/// Checks if the given tar has a valid tar signature and extracts it if that's the case -fn check_extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { - if !has_tar_header(file)? { - tracing::debug!("The extracted tar doesn't have a valid tar signature. This is normal for non POSIX compliant tars."); - } - extract_tar(file, output).context("Extract tar") -} - /// Extracts a tar file to the given output directory -fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { +fn extract_tar(reader: &mut R, output: &Path) -> anyhow::Result<()> { if output.is_file() { bail!("The output must be a directory."); } - let reader = BufReader::new(File::open(file).context("Opening input file")?); let mut archive = Archive::new(reader); for file in archive.entries().context("Reading tar entries")? { @@ -92,9 +81,12 @@ fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { } } tracing::debug!("Decompressing entry to {output_path:?}"); - let mut output_file = File::create(&output_path) - .with_context(|| format!("Create output file {output_path:?}"))?; + let mut output_file = BufWriter::new( + File::create(&output_path) + .with_context(|| format!("Create output file {output_path:?}"))?, + ); io::copy(&mut file, &mut output_file).context("writing tar entry to output")?; + output_file.flush()?; } EntryType::Directory => { tracing::debug!("Creating output directory {output_path:?}"); @@ -109,23 +101,3 @@ fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { Ok(()) } - -fn create_tempfile() -> anyhow::Result<(PathBuf, TempDir)> { - let tmp_dir = tempdir().context("Create tempdir")?; - let tmp_file = tmp_dir.path().join(format!( - ".extract-file-{}", - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() - )); - - Ok((tmp_file, tmp_dir)) -} - -/// Reads the header of the given file to check if it's a tar file -fn has_tar_header(file: &Path) -> anyhow::Result { - let header = get_file_header(file).context("Get file header")?; - - Ok(header.starts_with(TAR_HEADER)) -} diff --git a/src/lib.rs b/src/lib.rs index db7b59d..139deef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ pub mod format; +pub mod utils; diff --git a/src/utils/channel_sink.rs b/src/utils/channel_sink.rs new file mode 100644 index 0000000..aba1b27 --- /dev/null +++ b/src/utils/channel_sink.rs @@ -0,0 +1,59 @@ +use std::io::{ErrorKind, Write}; +use std::sync::mpsc::{sync_channel, Receiver, SyncSender}; +use std::{io, mem}; + +pub struct ChannelSink { + buffer: Vec, + block_size: usize, + tx: SyncSender>, +} + +impl ChannelSink { + /// Creates a new sink with a channel to send the data to + pub fn new(block_size: usize) -> (Self, Receiver>) { + let (tx, rx) = sync_channel(1); + ( + Self { + buffer: Vec::new(), + block_size, + tx, + }, + rx, + ) + } +} + +impl Write for ChannelSink { + #[tracing::instrument(skip_all, level = "trace")] + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buffer.append(&mut buf.to_vec()); + if self.buffer.len() >= self.block_size { + tracing::trace!("Block size reached. Sending buffer..."); + self.tx + .send(mem::take(&mut self.buffer)) + .map_err(|e| io::Error::new(ErrorKind::BrokenPipe, e))?; + } + + Ok(buf.len()) + } + + #[tracing::instrument(skip_all, level = "trace")] + fn flush(&mut self) -> std::io::Result<()> { + if !self.buffer.is_empty() { + self.tx + .send(mem::take(&mut self.buffer)) + .map_err(|e| io::Error::new(ErrorKind::BrokenPipe, e))?; + } + + Ok(()) + } +} + +impl Drop for ChannelSink { + #[tracing::instrument(skip_all, level = "trace")] + fn drop(&mut self) { + if let Err(e) = self.flush() { + tracing::debug!("Error while trying to flush buffer during drop {e}") + } + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..a375c82 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,2 @@ +pub mod xz_decoder; +pub mod channel_sink; diff --git a/src/utils/xz_decoder.rs b/src/utils/xz_decoder.rs new file mode 100644 index 0000000..a9f6151 --- /dev/null +++ b/src/utils/xz_decoder.rs @@ -0,0 +1,52 @@ +use crate::utils::channel_sink::ChannelSink; +use lzma_rs::xz_decompress; +use std::cmp::min; +use std::io; +use std::io::{BufRead, Read, Write}; +use std::sync::mpsc::Receiver; + +pub struct XzDecoder { + buffer: Vec, + rx: Receiver>, +} + +impl XzDecoder { + pub fn new(mut reader: R) -> Self { + let (mut sink, rx) = ChannelSink::new(1024); + std::thread::spawn(move || { + tracing::debug!("Async decompression thread running"); + if let Err(e) = xz_decompress(&mut reader, &mut sink) { + tracing::error!("Async decompressing finished with error {e}"); + } else { + tracing::debug!("async decompressing succeeded"); + } + }); + Self { + rx, + buffer: Vec::new(), + } + } +} + +impl Read for XzDecoder { + #[tracing::instrument(skip_all, level = "trace")] + fn read(&mut self, mut buf: &mut [u8]) -> io::Result { + self.buffer.reverse(); + if self.buffer.is_empty() { + tracing::trace!("Receiving chunk from channel"); + if let Ok(chunk) = self.rx.recv() { + self.buffer = chunk; + } else { + tracing::debug!("Receiving timed out"); + } + } + + let max_write = min(self.buffer.len(), buf.len()); + tracing::trace!("Wrote {max_write} bytes"); + buf.write_all(&self.buffer[0..max_write])?; + self.buffer.reverse(); + self.buffer.truncate(self.buffer.len() - max_write); + + Ok(max_write) + } +}