Rename executable to uarch and add in-memory tar + xz unpacking

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 3 years ago
parent 37899b2296
commit 2c77342398
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

44
Cargo.lock generated

@ -300,15 +300,6 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "fastrand"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
dependencies = [
"instant",
]
[[package]] [[package]]
name = "filetime" name = "filetime"
version = "0.2.16" version = "0.2.16"
@ -395,15 +386,6 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.1" version = "1.0.1"
@ -652,15 +634,6 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "rle-decode-fast" name = "rle-decode-fast"
version = "1.0.3" version = "1.0.3"
@ -744,20 +717,6 @@ dependencies = [
"xattr", "xattr",
] ]
[[package]]
name = "tempfile"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
dependencies = [
"cfg-if",
"fastrand",
"libc",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]] [[package]]
name = "termcolor" name = "termcolor"
version = "1.1.3" version = "1.1.3"
@ -886,7 +845,7 @@ checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]] [[package]]
name = "universal-archiver" name = "universal-archiver"
version = "0.1.1" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@ -894,7 +853,6 @@ dependencies = [
"libflate", "libflate",
"lzma-rs", "lzma-rs",
"tar", "tar",
"tempfile",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"zip", "zip",

@ -3,7 +3,7 @@ name = "universal-archiver"
description = "A tool to unpack archives without having to specify the archive type." description = "A tool to unpack archives without having to specify the archive type."
repository = "https://github.com/Trivernis/universal-archiver.git" repository = "https://github.com/Trivernis/universal-archiver.git"
authors = ["Trivernis <trivernis@pm.me>"] authors = ["Trivernis <trivernis@pm.me>"]
version = "0.1.1" version = "0.2.0"
edition = "2021" edition = "2021"
keywords = ["compression", "extract-archive", "archiver", "cli"] keywords = ["compression", "extract-archive", "archiver", "cli"]
license = "MIT" license = "MIT"
@ -19,7 +19,6 @@ zip = "0.6.2"
lzma-rs = "0.2.0" lzma-rs = "0.2.0"
libflate = "1.2.0" libflate = "1.2.0"
tar = "0.4.38" tar = "0.4.38"
tempfile = "3.3.0"
tracing-subscriber = {version = "0.3.11", features = ["env-filter"]} tracing-subscriber = {version = "0.3.11", features = ["env-filter"]}
[dependencies.clap] [dependencies.clap]

@ -20,14 +20,14 @@ cargo install universal-archiver
```sh ```sh
USAGE: USAGE:
ua <SUBCOMMAND> uarch <SUBCOMMAND>
OPTIONS: OPTIONS:
-h, --help Print help information -h, --help Print help information
-V, --version Print version information -V, --version Print version information
SUBCOMMANDS: SUBCOMMANDS:
extract Extracts a given file x, extract Extracts a given file
help Print this message or the help of the given subcommand(s) help Print this message or the help of the given subcommand(s)
``` ```
@ -37,7 +37,7 @@ SUBCOMMANDS:
Extracts a given file Extracts a given file
USAGE: USAGE:
ua extract <FILE> [OUTPUT] uarch extract <FILE> [OUTPUT]
ARGS: ARGS:
<FILE> The file to extract <FILE> The file to extract

@ -1,20 +1,20 @@
use crate::format::gzip::GZipFormat; use crate::format::gzip::GZipFormat;
use crate::format::xz::XZFormat; use crate::format::xz::XZFormat;
use crate::format::{get_file_header, FileFormat, FileObject}; use crate::format::{FileFormat, FileObject};
use crate::utils::xz_decoder::XzDecoder;
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use libflate::gzip;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::{BufReader, BufWriter, Read, Write};
use std::path::{Path, PathBuf}; use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
use std::{fs, io}; use std::{fs, io};
use tar::{Archive, EntryType}; use tar::{Archive, EntryType};
use tempfile::{tempdir, TempDir};
const TAR_HEADER: &[u8] = &[0x75, 0x73, 0x74, 0x61, 0x72]; const TAR_HEADER: &[u8] = &[0x75, 0x73, 0x74, 0x61, 0x72];
pub enum TarFormat { pub enum TarFormat {
Xz(XZFormat), Xz,
Gz(GZipFormat), Gz,
Uncompressed, Uncompressed,
} }
@ -25,14 +25,14 @@ impl FileFormat for TarFormat {
Ok(Self::Uncompressed) Ok(Self::Uncompressed)
} else if file.ext.contains(".tar.") { } else if file.ext.contains(".tar.") {
if let Ok(xz) = XZFormat::parse(file) { if let Ok(_xz) = XZFormat::parse(file) {
tracing::info!("Detected tar file compressed with xz"); tracing::info!("Detected tar file compressed with xz");
Ok(Self::Xz(xz)) Ok(Self::Xz)
} else if let Ok(gz) = GZipFormat::parse(file) { } else if let Ok(_gz) = GZipFormat::parse(file) {
tracing::info!("Detected tarfile compressed with gz"); tracing::info!("Detected tarfile compressed with gz");
Ok(Self::Gz(gz)) Ok(Self::Gz)
} else { } else {
bail!("Not a tar file or a tar with unknown compression"); bail!("Not a tar file or a tar with unknown compression");
} }
@ -45,36 +45,25 @@ impl FileFormat for TarFormat {
} }
fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> { fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> {
let mut reader = BufReader::new(File::open(file).context("Opening input")?);
match self { match self {
TarFormat::Xz(xz) => { TarFormat::Xz => {
let (tmp, _h) = create_tempfile()?; let mut decoder = XzDecoder::new(reader);
xz.extract(file, &tmp).context("Decompress with xz")?; extract_tar(&mut decoder, output)
check_extract_tar(&tmp, output)
} }
TarFormat::Gz(gz) => { TarFormat::Gz => {
let (tmp, _h) = create_tempfile()?; let mut decoder = gzip::Decoder::new(&mut reader).context("Creating decoder")?;
gz.extract(file, &tmp).context("Decompress with gz")?; extract_tar(&mut decoder, output)
check_extract_tar(&tmp, output)
} }
TarFormat::Uncompressed => extract_tar(file, output).context("Extract tar"), TarFormat::Uncompressed => extract_tar(&mut reader, output).context("Extract tar"),
} }
} }
} }
/// Checks if the given tar has a valid tar signature and extracts it if that's the case
fn check_extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> {
if !has_tar_header(file)? {
tracing::debug!("The extracted tar doesn't have a valid tar signature. This is normal for non POSIX compliant tars.");
}
extract_tar(file, output).context("Extract tar")
}
/// Extracts a tar file to the given output directory /// Extracts a tar file to the given output directory
fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { fn extract_tar<R: Read>(reader: &mut R, output: &Path) -> anyhow::Result<()> {
if output.is_file() { if output.is_file() {
bail!("The output must be a directory."); bail!("The output must be a directory.");
} }
let reader = BufReader::new(File::open(file).context("Opening input file")?);
let mut archive = Archive::new(reader); let mut archive = Archive::new(reader);
for file in archive.entries().context("Reading tar entries")? { for file in archive.entries().context("Reading tar entries")? {
@ -92,9 +81,12 @@ fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> {
} }
} }
tracing::debug!("Decompressing entry to {output_path:?}"); tracing::debug!("Decompressing entry to {output_path:?}");
let mut output_file = File::create(&output_path) let mut output_file = BufWriter::new(
.with_context(|| format!("Create output file {output_path:?}"))?; File::create(&output_path)
.with_context(|| format!("Create output file {output_path:?}"))?,
);
io::copy(&mut file, &mut output_file).context("writing tar entry to output")?; io::copy(&mut file, &mut output_file).context("writing tar entry to output")?;
output_file.flush()?;
} }
EntryType::Directory => { EntryType::Directory => {
tracing::debug!("Creating output directory {output_path:?}"); tracing::debug!("Creating output directory {output_path:?}");
@ -109,23 +101,3 @@ fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
fn create_tempfile() -> anyhow::Result<(PathBuf, TempDir)> {
let tmp_dir = tempdir().context("Create tempdir")?;
let tmp_file = tmp_dir.path().join(format!(
".extract-file-{}",
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
));
Ok((tmp_file, tmp_dir))
}
/// Reads the header of the given file to check if it's a tar file
fn has_tar_header(file: &Path) -> anyhow::Result<bool> {
let header = get_file_header(file).context("Get file header")?;
Ok(header.starts_with(TAR_HEADER))
}

@ -1 +1,2 @@
pub mod format; pub mod format;
pub mod utils;

@ -0,0 +1,59 @@
use std::io::{ErrorKind, Write};
use std::sync::mpsc::{sync_channel, Receiver, SyncSender};
use std::{io, mem};
pub struct ChannelSink {
buffer: Vec<u8>,
block_size: usize,
tx: SyncSender<Vec<u8>>,
}
impl ChannelSink {
/// Creates a new sink with a channel to send the data to
pub fn new(block_size: usize) -> (Self, Receiver<Vec<u8>>) {
let (tx, rx) = sync_channel(1);
(
Self {
buffer: Vec::new(),
block_size,
tx,
},
rx,
)
}
}
impl Write for ChannelSink {
#[tracing::instrument(skip_all, level = "trace")]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.buffer.append(&mut buf.to_vec());
if self.buffer.len() >= self.block_size {
tracing::trace!("Block size reached. Sending buffer...");
self.tx
.send(mem::take(&mut self.buffer))
.map_err(|e| io::Error::new(ErrorKind::BrokenPipe, e))?;
}
Ok(buf.len())
}
#[tracing::instrument(skip_all, level = "trace")]
fn flush(&mut self) -> std::io::Result<()> {
if !self.buffer.is_empty() {
self.tx
.send(mem::take(&mut self.buffer))
.map_err(|e| io::Error::new(ErrorKind::BrokenPipe, e))?;
}
Ok(())
}
}
impl Drop for ChannelSink {
#[tracing::instrument(skip_all, level = "trace")]
fn drop(&mut self) {
if let Err(e) = self.flush() {
tracing::debug!("Error while trying to flush buffer during drop {e}")
}
}
}

@ -0,0 +1,2 @@
pub mod xz_decoder;
pub mod channel_sink;

@ -0,0 +1,52 @@
use crate::utils::channel_sink::ChannelSink;
use lzma_rs::xz_decompress;
use std::cmp::min;
use std::io;
use std::io::{BufRead, Read, Write};
use std::sync::mpsc::Receiver;
pub struct XzDecoder {
buffer: Vec<u8>,
rx: Receiver<Vec<u8>>,
}
impl XzDecoder {
pub fn new<R: BufRead + Send + 'static>(mut reader: R) -> Self {
let (mut sink, rx) = ChannelSink::new(1024);
std::thread::spawn(move || {
tracing::debug!("Async decompression thread running");
if let Err(e) = xz_decompress(&mut reader, &mut sink) {
tracing::error!("Async decompressing finished with error {e}");
} else {
tracing::debug!("async decompressing succeeded");
}
});
Self {
rx,
buffer: Vec::new(),
}
}
}
impl Read for XzDecoder {
#[tracing::instrument(skip_all, level = "trace")]
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
self.buffer.reverse();
if self.buffer.is_empty() {
tracing::trace!("Receiving chunk from channel");
if let Ok(chunk) = self.rx.recv() {
self.buffer = chunk;
} else {
tracing::debug!("Receiving timed out");
}
}
let max_write = min(self.buffer.len(), buf.len());
tracing::trace!("Wrote {max_write} bytes");
buf.write_all(&self.buffer[0..max_write])?;
self.buffer.reverse();
self.buffer.truncate(self.buffer.len() - max_write);
Ok(max_write)
}
}
Loading…
Cancel
Save