diff --git a/.gitignore b/.gitignore index b888e11..3a15b4e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ *.gz *.xz *.zip +*.7z +*.tar test.txt out out.txt \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index fbdee7b..002ca7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "aes" version = "0.7.5" @@ -284,19 +290,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "extract-archive" -version = "0.1.0" -dependencies = [ - "anyhow", - "clap", - "color-eyre", - "lzma-rs", - "tracing", - "tracing-subscriber", - "zip", -] - [[package]] name = "eyre" version = "0.6.8" @@ -307,6 +300,27 @@ dependencies = [ "once_cell", ] +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "filetime" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0408e2626025178a6a7f7ffc05a25bc47103229f19c113755de7bf63816290c" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "winapi", +] + [[package]] name = "flate2" version = "1.0.23" @@ -381,6 +395,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + [[package]] name = "itoa" version = "1.0.1" @@ -408,6 +431,26 @@ version = "0.2.125" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" +[[package]] +name = "libflate" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093" +dependencies = [ + "adler32", + "crc32fast", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a" +dependencies = [ + "rle-decode-fast", +] + [[package]] name = "log" version = "0.4.16" @@ -576,6 +619,15 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.5.5" @@ -600,6 +652,21 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + [[package]] name = "rustc-demangle" version = "0.1.21" @@ -666,6 +733,31 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tar" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.3" @@ -792,6 +884,22 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "universal-archiver" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "color-eyre", + "libflate", + "lzma-rs", + "tar", + "tempfile", + "tracing", + "tracing-subscriber", + "zip", +] + [[package]] name = "valuable" version = "0.1.0" @@ -835,6 +943,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "xattr" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" +dependencies = [ + "libc", +] + [[package]] name = "zip" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 84d4fc2..948c97c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,16 @@ [package] -name = "extract-archive" +name = "universal-archiver" +description = "A tool to unpack archives without having to specify the archive type." +repository = "https://github.com/Trivernis/universal-archiver.git" +authors = ["Trivernis "] version = "0.1.0" edition = "2021" +keywords = ["compression", "extract-archive", "archiver", "cli"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [[bin]] -name = "extract-archive" +name = "universal-archiver" [dependencies] anyhow = "1.0.57" @@ -14,6 +18,9 @@ color-eyre = "0.6.1" tracing = "0.1.34" zip = "0.6.2" lzma-rs = "0.2.0" +libflate = "1.2.0" +tar = "0.4.38" +tempfile = "3.3.0" tracing-subscriber = {version = "0.3.11", features = ["env-filter"]} [dependencies.clap] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b7f149d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Trivernis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9cd3f14 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Universal Archiver + +Universal Archiver is a tool to easily extract well known archive files +based on their signature. The type of the file doesn't need to be specified. + + +## Why + +Because it's annoying to learn all the tar and zip commands. + + +## Usage + +```sh +USAGE: + universal-archiver + +OPTIONS: + -h, --help Print help information + -V, --version Print version information + +SUBCOMMANDS: + extract Extracts a given file + help Print this message or the help of the given subcommand(s) +``` + +## License + +MIT \ No newline at end of file diff --git a/src/format/gzip.rs b/src/format/gzip.rs new file mode 100644 index 0000000..933810b --- /dev/null +++ b/src/format/gzip.rs @@ -0,0 +1,35 @@ +use crate::format::{FileFormat, FileObject}; +use anyhow::{bail, Context}; +use libflate::gzip::Decoder; +use std::fs::File; +use std::io; +use std::io::BufReader; +use std::path::Path; + +pub const GZIP_HEADER: &[u8] = &[0x1f, 0x8b]; + +pub struct GZipFormat; + +impl FileFormat for GZipFormat { + fn parse(file: &FileObject) -> anyhow::Result { + if file.header.starts_with(GZIP_HEADER) { + if !file.ext.ends_with(".gz") && !file.ext.ends_with(".gzip") { + tracing::error!("The file has a valid gzip signature but not a gzip extension"); + } + Ok(Self) + } else { + bail!("Not a gzip file") + } + } + + fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> { + let mut reader = BufReader::new(File::open(file).context("Opening input")?); + let mut decoder = Decoder::new(&mut reader).context("Creating decoder")?; + let mut output_file = + File::create(output).with_context(|| format!("Creating output file {output:?}"))?; + tracing::debug!("Extracting to {output:?}"); + io::copy(&mut decoder, &mut output_file).context("Deompressing file to output")?; + + Ok(()) + } +} diff --git a/src/format/mod.rs b/src/format/mod.rs index fade35f..e5f0f24 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -1,7 +1,10 @@ +mod gzip; mod tar; mod xz; mod zip; +use crate::format::gzip::GZipFormat; +use crate::format::tar::TarFormat; use crate::format::xz::XZFormat; use crate::format::zip::ZipFormat; use anyhow::{bail, Result}; @@ -12,8 +15,8 @@ use std::path::Path; pub enum Format { Zip(ZipFormat), Xz(XZFormat), - Gz, - Tar, + Gz(GZipFormat), + Tar(TarFormat), } pub struct FileObject { @@ -31,9 +34,15 @@ impl FileFormat for Format { if let Ok(zip) = ZipFormat::parse(file) { tracing::info!("Detected zip format"); Ok(Self::Zip(zip)) + } else if let Ok(tar) = TarFormat::parse(file) { + tracing::info!("Detected tar format"); + Ok(Self::Tar(tar)) } else if let Ok(xz) = XZFormat::parse(file) { tracing::info!("Detected xz format"); Ok(Self::Xz(xz)) + } else if let Ok(gz) = GZipFormat::parse(file) { + tracing::info!("Detected gzip format"); + Ok(Self::Gz(gz)) } else { bail!("Unknown file format"); } @@ -43,7 +52,8 @@ impl FileFormat for Format { match self { Format::Zip(zip) => zip.extract(file, output), Format::Xz(xz) => xz.extract(file, output), - _ => bail!("Not implemented"), + Format::Gz(gz) => gz.extract(file, output), + Format::Tar(tar) => tar.extract(file, output), } } } @@ -64,9 +74,12 @@ pub fn parse_format(file: &Path) -> Result { /// just using the extensions for format detection that behaviour isn't a problem. fn get_file_extensions(path: &Path) -> Option { let name = path.file_name()?.to_string_lossy(); - let extensions: Vec<&str> = name.split('.').skip(1).collect(); + let extensions: String = name + .split('.') + .skip(1) + .fold(String::new(), |acc, val| format!("{acc}.{val}")); - Some(extensions.join(".")) + Some(extensions) } /// Returns the first 32 bytes of the file that can be used to detect diff --git a/src/format/tar.rs b/src/format/tar.rs index 8b13789..a397184 100644 --- a/src/format/tar.rs +++ b/src/format/tar.rs @@ -1 +1,131 @@ +use crate::format::gzip::GZipFormat; +use crate::format::xz::XZFormat; +use crate::format::{get_file_header, FileFormat, FileObject}; +use anyhow::{bail, Context}; +use std::fs::File; +use std::io::BufReader; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; +use std::{fs, io}; +use tar::{Archive, EntryType}; +use tempfile::{tempdir, TempDir}; +const TAR_HEADER: &[u8] = &[0x75, 0x73, 0x74, 0x61, 0x72]; + +pub enum TarFormat { + Xz(XZFormat), + Gz(GZipFormat), + Uncompressed, +} + +impl FileFormat for TarFormat { + fn parse(file: &FileObject) -> anyhow::Result { + if file.header.starts_with(TAR_HEADER) { + tracing::info!("Detected uncompressed tar file"); + + Ok(Self::Uncompressed) + } else if file.ext.contains(".tar.") { + if let Ok(xz) = XZFormat::parse(file) { + tracing::info!("Detected tar file compressed with xz"); + + Ok(Self::Xz(xz)) + } else if let Ok(gz) = GZipFormat::parse(file) { + tracing::info!("Detected tarfile compressed with gz"); + + Ok(Self::Gz(gz)) + } else { + bail!("Not a tar file or a tar with unknown compression"); + } + } else if file.ext.ends_with(".tar") { + tracing::info!("Assuming tar based on the file extension"); + Ok(Self::Uncompressed) + } else { + bail!("Not a tar file"); + } + } + + fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> { + match self { + TarFormat::Xz(xz) => { + let (tmp, _h) = create_tempfile()?; + xz.extract(file, &tmp).context("Decompress with xz")?; + check_extract_tar(&tmp, output) + } + TarFormat::Gz(gz) => { + let (tmp, _h) = create_tempfile()?; + gz.extract(file, &tmp).context("Decompress with gz")?; + check_extract_tar(&tmp, output) + } + TarFormat::Uncompressed => extract_tar(file, output).context("Extract tar"), + } + } +} + +/// Checks if the given tar has a valid tar signature and extracts it if that's the case +fn check_extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { + if !has_tar_header(file)? { + tracing::debug!("The extracted tar doesn't have a valid tar signature. This is normal for non POSIX compliant tars."); + } + extract_tar(file, output).context("Extract tar") +} + +/// Extracts a tar file to the given output directory +fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> { + if output.is_file() { + bail!("The output must be a directory."); + } + let reader = BufReader::new(File::open(file).context("Opening input file")?); + let mut archive = Archive::new(reader); + + for file in archive.entries().context("Reading tar entries")? { + let mut file = file.context("Retrieving tar file entry")?; + let header = file.header(); + let file_path = header.path().context("Retrieving path of file")?; + let output_path = output.join(file_path); + + match header.entry_type() { + EntryType::Regular => { + if let Some(parent) = output_path.parent() { + if !parent.exists() { + tracing::debug!("Creating parent path {parent:?}"); + fs::create_dir_all(parent)?; + } + } + tracing::debug!("Decompressing entry to {output_path:?}"); + let mut output_file = File::create(&output_path) + .with_context(|| format!("Create output file {output_path:?}"))?; + io::copy(&mut file, &mut output_file).context("writing tar entry to output")?; + } + EntryType::Directory => { + tracing::debug!("Creating output directory {output_path:?}"); + fs::create_dir_all(&output_path) + .with_context(|| format!("Failed to create output directory {output_path:?}"))? + } + other => { + tracing::debug!("Ignoring entry of type {other:?}"); + } + } + } + + Ok(()) +} + +fn create_tempfile() -> anyhow::Result<(PathBuf, TempDir)> { + let tmp_dir = tempdir().context("Create tempdir")?; + let tmp_file = tmp_dir.path().join(format!( + ".extract-file-{}", + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() + )); + + Ok((tmp_file, tmp_dir)) +} + +/// Reads the header of the given file to check if it's a tar file +fn has_tar_header(file: &Path) -> anyhow::Result { + let header = get_file_header(file).context("Get file header")?; + + Ok(header.starts_with(TAR_HEADER)) +} diff --git a/src/format/xz.rs b/src/format/xz.rs index 05de7ea..37521e7 100644 --- a/src/format/xz.rs +++ b/src/format/xz.rs @@ -1,7 +1,6 @@ use crate::format::{FileFormat, FileObject}; use anyhow::{bail, Context}; use std::fs::File; -use std::io; use std::io::BufReader; use std::path::Path; @@ -12,8 +11,7 @@ pub struct XZFormat; impl FileFormat for XZFormat { fn parse(file: &FileObject) -> anyhow::Result { if file.header.starts_with(XZ_HEADER) { - if !file.ext.ends_with("xz") && !file.ext.ends_with("lzma") && !file.ext.ends_with("7z") - { + if !file.ext.ends_with(".xz") && !file.ext.ends_with(".lzma") { tracing::warn!("The file has a xz signature but not a xz extension."); } Ok(Self) diff --git a/src/format/zip.rs b/src/format/zip.rs index b1c455b..62d29c4 100644 --- a/src/format/zip.rs +++ b/src/format/zip.rs @@ -12,7 +12,7 @@ pub struct ZipFormat; impl FileFormat for ZipFormat { fn parse(file: &FileObject) -> Result { if file.header.starts_with(ZIP_HEADER) { - if !file.ext.ends_with("zip") { + if !file.ext.ends_with(".zip") { tracing::warn!("The file has a zip signature but no zip extension."); } Ok(Self) diff --git a/src/headers.rs b/src/headers.rs deleted file mode 100644 index 42b2cf1..0000000 --- a/src/headers.rs +++ /dev/null @@ -1,4 +0,0 @@ -/// Container Formats - -/// Compression Methods -pub const GZIP_HEADER: &[u8] = &[0x1f, 0x8b]; diff --git a/src/lib.rs b/src/lib.rs index f09da69..db7b59d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1 @@ pub mod format; -mod headers; diff --git a/src/main.rs b/src/main.rs index 749b1a8..627fb83 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,9 @@ use clap::{Parser, Subcommand}; -use extract_archive::format::{parse_format, FileFormat}; -use std::fs; use std::path::PathBuf; use std::str::FromStr; use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::EnvFilter; +use universal_archiver::format::{parse_format, FileFormat}; #[derive(Debug, Clone, Parser)] #[clap(name="extract-archive", version=env!("CARGO_PKG_VERSION"), about=env!("CARGO_PKG_DESCRIPTION"))] @@ -22,7 +21,7 @@ pub enum Operation { file: PathBuf, /// The output folder for the given file - output: PathBuf, + output: Option, }, } @@ -33,6 +32,11 @@ fn main() { match args.operation { Operation::Extract { output, file } => { let format = parse_format(&file).expect("Failed to parse file format"); + let output = output.unwrap_or_else(|| { + file.file_stem() + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("out")) + }); format .extract(&file, &output) .expect("Failed to extract file");