Add README, LICENSE, handling of tars and stuff

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 2 years ago
parent ad83f38056
commit c0c532f0c0
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

2
.gitignore vendored

@ -4,6 +4,8 @@
*.gz
*.xz
*.zip
*.7z
*.tar
test.txt
out
out.txt

143
Cargo.lock generated

@ -17,6 +17,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "adler32"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
[[package]]
name = "aes"
version = "0.7.5"
@ -284,19 +290,6 @@ dependencies = [
"subtle",
]
[[package]]
name = "extract-archive"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"color-eyre",
"lzma-rs",
"tracing",
"tracing-subscriber",
"zip",
]
[[package]]
name = "eyre"
version = "0.6.8"
@ -307,6 +300,27 @@ dependencies = [
"once_cell",
]
[[package]]
name = "fastrand"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
dependencies = [
"instant",
]
[[package]]
name = "filetime"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0408e2626025178a6a7f7ffc05a25bc47103229f19c113755de7bf63816290c"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"winapi",
]
[[package]]
name = "flate2"
version = "1.0.23"
@ -381,6 +395,15 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "itoa"
version = "1.0.1"
@ -408,6 +431,26 @@ version = "0.2.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
[[package]]
name = "libflate"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093"
dependencies = [
"adler32",
"crc32fast",
"libflate_lz77",
]
[[package]]
name = "libflate_lz77"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a"
dependencies = [
"rle-decode-fast",
]
[[package]]
name = "log"
version = "0.4.16"
@ -576,6 +619,15 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
[[package]]
name = "redox_syscall"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.5.5"
@ -600,6 +652,21 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "rle-decode-fast"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
[[package]]
name = "rustc-demangle"
version = "0.1.21"
@ -666,6 +733,31 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "tar"
version = "0.4.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]]
name = "tempfile"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
dependencies = [
"cfg-if",
"fastrand",
"libc",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]]
name = "termcolor"
version = "1.1.3"
@ -792,6 +884,22 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "universal-archiver"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"color-eyre",
"libflate",
"lzma-rs",
"tar",
"tempfile",
"tracing",
"tracing-subscriber",
"zip",
]
[[package]]
name = "valuable"
version = "0.1.0"
@ -835,6 +943,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "xattr"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c"
dependencies = [
"libc",
]
[[package]]
name = "zip"
version = "0.6.2"

@ -1,12 +1,16 @@
[package]
name = "extract-archive"
name = "universal-archiver"
description = "A tool to unpack archives without having to specify the archive type."
repository = "https://github.com/Trivernis/universal-archiver.git"
authors = ["Trivernis <trivernis@pm.me>"]
version = "0.1.0"
edition = "2021"
keywords = ["compression", "extract-archive", "archiver", "cli"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]]
name = "extract-archive"
name = "universal-archiver"
[dependencies]
anyhow = "1.0.57"
@ -14,6 +18,9 @@ color-eyre = "0.6.1"
tracing = "0.1.34"
zip = "0.6.2"
lzma-rs = "0.2.0"
libflate = "1.2.0"
tar = "0.4.38"
tempfile = "3.3.0"
tracing-subscriber = {version = "0.3.11", features = ["env-filter"]}
[dependencies.clap]

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Trivernis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,29 @@
# Universal Archiver
Universal Archiver is a tool to easily extract well known archive files
based on their signature. The type of the file doesn't need to be specified.
## Why
Because it's annoying to learn all the tar and zip commands.
## Usage
```sh
USAGE:
universal-archiver <SUBCOMMAND>
OPTIONS:
-h, --help Print help information
-V, --version Print version information
SUBCOMMANDS:
extract Extracts a given file
help Print this message or the help of the given subcommand(s)
```
## License
MIT

@ -0,0 +1,35 @@
use crate::format::{FileFormat, FileObject};
use anyhow::{bail, Context};
use libflate::gzip::Decoder;
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::path::Path;
pub const GZIP_HEADER: &[u8] = &[0x1f, 0x8b];
pub struct GZipFormat;
impl FileFormat for GZipFormat {
fn parse(file: &FileObject) -> anyhow::Result<Self> {
if file.header.starts_with(GZIP_HEADER) {
if !file.ext.ends_with(".gz") && !file.ext.ends_with(".gzip") {
tracing::error!("The file has a valid gzip signature but not a gzip extension");
}
Ok(Self)
} else {
bail!("Not a gzip file")
}
}
fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> {
let mut reader = BufReader::new(File::open(file).context("Opening input")?);
let mut decoder = Decoder::new(&mut reader).context("Creating decoder")?;
let mut output_file =
File::create(output).with_context(|| format!("Creating output file {output:?}"))?;
tracing::debug!("Extracting to {output:?}");
io::copy(&mut decoder, &mut output_file).context("Deompressing file to output")?;
Ok(())
}
}

@ -1,7 +1,10 @@
mod gzip;
mod tar;
mod xz;
mod zip;
use crate::format::gzip::GZipFormat;
use crate::format::tar::TarFormat;
use crate::format::xz::XZFormat;
use crate::format::zip::ZipFormat;
use anyhow::{bail, Result};
@ -12,8 +15,8 @@ use std::path::Path;
pub enum Format {
Zip(ZipFormat),
Xz(XZFormat),
Gz,
Tar,
Gz(GZipFormat),
Tar(TarFormat),
}
pub struct FileObject {
@ -31,9 +34,15 @@ impl FileFormat for Format {
if let Ok(zip) = ZipFormat::parse(file) {
tracing::info!("Detected zip format");
Ok(Self::Zip(zip))
} else if let Ok(tar) = TarFormat::parse(file) {
tracing::info!("Detected tar format");
Ok(Self::Tar(tar))
} else if let Ok(xz) = XZFormat::parse(file) {
tracing::info!("Detected xz format");
Ok(Self::Xz(xz))
} else if let Ok(gz) = GZipFormat::parse(file) {
tracing::info!("Detected gzip format");
Ok(Self::Gz(gz))
} else {
bail!("Unknown file format");
}
@ -43,7 +52,8 @@ impl FileFormat for Format {
match self {
Format::Zip(zip) => zip.extract(file, output),
Format::Xz(xz) => xz.extract(file, output),
_ => bail!("Not implemented"),
Format::Gz(gz) => gz.extract(file, output),
Format::Tar(tar) => tar.extract(file, output),
}
}
}
@ -64,9 +74,12 @@ pub fn parse_format(file: &Path) -> Result<Format> {
/// just using the extensions for format detection that behaviour isn't a problem.
fn get_file_extensions(path: &Path) -> Option<String> {
let name = path.file_name()?.to_string_lossy();
let extensions: Vec<&str> = name.split('.').skip(1).collect();
let extensions: String = name
.split('.')
.skip(1)
.fold(String::new(), |acc, val| format!("{acc}.{val}"));
Some(extensions.join("."))
Some(extensions)
}
/// Returns the first 32 bytes of the file that can be used to detect

@ -1 +1,131 @@
use crate::format::gzip::GZipFormat;
use crate::format::xz::XZFormat;
use crate::format::{get_file_header, FileFormat, FileObject};
use anyhow::{bail, Context};
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use std::{fs, io};
use tar::{Archive, EntryType};
use tempfile::{tempdir, TempDir};
const TAR_HEADER: &[u8] = &[0x75, 0x73, 0x74, 0x61, 0x72];
pub enum TarFormat {
Xz(XZFormat),
Gz(GZipFormat),
Uncompressed,
}
impl FileFormat for TarFormat {
fn parse(file: &FileObject) -> anyhow::Result<Self> {
if file.header.starts_with(TAR_HEADER) {
tracing::info!("Detected uncompressed tar file");
Ok(Self::Uncompressed)
} else if file.ext.contains(".tar.") {
if let Ok(xz) = XZFormat::parse(file) {
tracing::info!("Detected tar file compressed with xz");
Ok(Self::Xz(xz))
} else if let Ok(gz) = GZipFormat::parse(file) {
tracing::info!("Detected tarfile compressed with gz");
Ok(Self::Gz(gz))
} else {
bail!("Not a tar file or a tar with unknown compression");
}
} else if file.ext.ends_with(".tar") {
tracing::info!("Assuming tar based on the file extension");
Ok(Self::Uncompressed)
} else {
bail!("Not a tar file");
}
}
fn extract(&self, file: &Path, output: &Path) -> anyhow::Result<()> {
match self {
TarFormat::Xz(xz) => {
let (tmp, _h) = create_tempfile()?;
xz.extract(file, &tmp).context("Decompress with xz")?;
check_extract_tar(&tmp, output)
}
TarFormat::Gz(gz) => {
let (tmp, _h) = create_tempfile()?;
gz.extract(file, &tmp).context("Decompress with gz")?;
check_extract_tar(&tmp, output)
}
TarFormat::Uncompressed => extract_tar(file, output).context("Extract tar"),
}
}
}
/// Checks if the given tar has a valid tar signature and extracts it if that's the case
fn check_extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> {
if !has_tar_header(file)? {
tracing::debug!("The extracted tar doesn't have a valid tar signature. This is normal for non POSIX compliant tars.");
}
extract_tar(file, output).context("Extract tar")
}
/// Extracts a tar file to the given output directory
fn extract_tar(file: &Path, output: &Path) -> anyhow::Result<()> {
if output.is_file() {
bail!("The output must be a directory.");
}
let reader = BufReader::new(File::open(file).context("Opening input file")?);
let mut archive = Archive::new(reader);
for file in archive.entries().context("Reading tar entries")? {
let mut file = file.context("Retrieving tar file entry")?;
let header = file.header();
let file_path = header.path().context("Retrieving path of file")?;
let output_path = output.join(file_path);
match header.entry_type() {
EntryType::Regular => {
if let Some(parent) = output_path.parent() {
if !parent.exists() {
tracing::debug!("Creating parent path {parent:?}");
fs::create_dir_all(parent)?;
}
}
tracing::debug!("Decompressing entry to {output_path:?}");
let mut output_file = File::create(&output_path)
.with_context(|| format!("Create output file {output_path:?}"))?;
io::copy(&mut file, &mut output_file).context("writing tar entry to output")?;
}
EntryType::Directory => {
tracing::debug!("Creating output directory {output_path:?}");
fs::create_dir_all(&output_path)
.with_context(|| format!("Failed to create output directory {output_path:?}"))?
}
other => {
tracing::debug!("Ignoring entry of type {other:?}");
}
}
}
Ok(())
}
fn create_tempfile() -> anyhow::Result<(PathBuf, TempDir)> {
let tmp_dir = tempdir().context("Create tempdir")?;
let tmp_file = tmp_dir.path().join(format!(
".extract-file-{}",
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
));
Ok((tmp_file, tmp_dir))
}
/// Reads the header of the given file to check if it's a tar file
fn has_tar_header(file: &Path) -> anyhow::Result<bool> {
let header = get_file_header(file).context("Get file header")?;
Ok(header.starts_with(TAR_HEADER))
}

@ -1,7 +1,6 @@
use crate::format::{FileFormat, FileObject};
use anyhow::{bail, Context};
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::path::Path;
@ -12,8 +11,7 @@ pub struct XZFormat;
impl FileFormat for XZFormat {
fn parse(file: &FileObject) -> anyhow::Result<Self> {
if file.header.starts_with(XZ_HEADER) {
if !file.ext.ends_with("xz") && !file.ext.ends_with("lzma") && !file.ext.ends_with("7z")
{
if !file.ext.ends_with(".xz") && !file.ext.ends_with(".lzma") {
tracing::warn!("The file has a xz signature but not a xz extension.");
}
Ok(Self)

@ -12,7 +12,7 @@ pub struct ZipFormat;
impl FileFormat for ZipFormat {
fn parse(file: &FileObject) -> Result<Self> {
if file.header.starts_with(ZIP_HEADER) {
if !file.ext.ends_with("zip") {
if !file.ext.ends_with(".zip") {
tracing::warn!("The file has a zip signature but no zip extension.");
}
Ok(Self)

@ -1,4 +0,0 @@
/// Container Formats
/// Compression Methods
pub const GZIP_HEADER: &[u8] = &[0x1f, 0x8b];

@ -1,2 +1 @@
pub mod format;
mod headers;

@ -1,10 +1,9 @@
use clap::{Parser, Subcommand};
use extract_archive::format::{parse_format, FileFormat};
use std::fs;
use std::path::PathBuf;
use std::str::FromStr;
use tracing_subscriber::fmt::format::FmtSpan;
use tracing_subscriber::EnvFilter;
use universal_archiver::format::{parse_format, FileFormat};
#[derive(Debug, Clone, Parser)]
#[clap(name="extract-archive", version=env!("CARGO_PKG_VERSION"), about=env!("CARGO_PKG_DESCRIPTION"))]
@ -22,7 +21,7 @@ pub enum Operation {
file: PathBuf,
/// The output folder for the given file
output: PathBuf,
output: Option<PathBuf>,
},
}
@ -33,6 +32,11 @@ fn main() {
match args.operation {
Operation::Extract { output, file } => {
let format = parse_format(&file).expect("Failed to parse file format");
let output = output.unwrap_or_else(|| {
file.file_stem()
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("out"))
});
format
.extract(&file, &output)
.expect("Failed to extract file");

Loading…
Cancel
Save