1 //! Our representation of all the supported compression formats.
3 use std::{ffi::OsStr, fmt, path::Path};
7 use self::CompressionFormat::*;
8 use crate::{error::Error, warning};
10 /// A wrapper around `CompressionFormat` that allows combinations like `tgz`
11 #[derive(Debug, Clone, Eq)]
13 pub struct Extension {
14 /// One extension like "tgz" can be made of multiple CompressionFormats ([Tar, Gz])
15 pub compression_formats: &'static [CompressionFormat],
16 /// The input text for this extension, like "tgz", "tar" or "xz"
20 // The display_text should be ignored when comparing extensions
21 impl PartialEq for Extension {
22 fn eq(&self, other: &Self) -> bool {
23 self.compression_formats == other.compression_formats
29 /// Will panic if `formats` is empty
30 pub fn new(formats: &'static [CompressionFormat], text: impl ToString) -> Self {
31 assert!(!formats.is_empty());
33 compression_formats: formats,
34 display_text: text.to_string(),
38 /// Checks if the first format in `compression_formats` is an archive
39 pub fn is_archive(&self) -> bool {
40 // Safety: we check that `compression_formats` is not empty in `Self::new`
41 self.compression_formats[0].is_archive_format()
45 impl fmt::Display for Extension {
46 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
47 self.display_text.fmt(f)
51 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
52 /// Accepted extensions for input and output
53 pub enum CompressionFormat {
64 /// tar, tgz, tbz, tbz2, txz, tlz4, tlzma, tsz, tzst
72 impl CompressionFormat {
73 /// Currently supported archive formats are .tar (and aliases to it) and .zip
74 fn is_archive_format(&self) -> bool {
75 // Keep this match like that without a wildcard `_` so we don't forget to update it
88 pub const SUPPORTED_EXTENSIONS: &[&str] = &[
89 "tar", "tgz", "tbz", "tlz4", "txz", "tzlma", "tsz", "tzst", "zip", "bz", "bz2", "gz", "lz4", "xz", "lzma", "sz",
93 fn to_extension(ext: &[u8]) -> Option<Extension> {
97 b"tgz" => &[Tar, Gzip],
98 b"tbz" | b"tbz2" => &[Tar, Bzip],
99 b"tlz4" => &[Tar, Lz4],
100 b"txz" | b"tlzma" => &[Tar, Lzma],
101 b"tsz" => &[Tar, Snappy],
102 b"tzst" => &[Tar, Zstd],
104 b"bz" | b"bz2" => &[Bzip],
107 b"xz" | b"lzma" => &[Lzma],
116 fn split_extension<'a>(name: &mut &'a [u8]) -> Option<&'a [u8]> {
117 let (new_name, ext) = name.rsplit_once_str(b".")?;
118 if matches!(new_name, b"" | b"." | b"..") {
125 pub fn parse_format(fmt: &OsStr) -> crate::Result<Vec<Extension>> {
126 let fmt = <[u8] as ByteSlice>::from_os_str(fmt).ok_or_else(|| Error::InvalidFormat {
127 reason: "Invalid UTF-8".into(),
130 let mut extensions = Vec::new();
131 for extension in fmt.split_str(b".") {
132 let extension = to_extension(extension).ok_or_else(|| Error::InvalidFormat {
133 reason: format!("Unsupported extension: {}", extension.to_str_lossy()),
135 extensions.push(extension);
141 /// Extracts extensions from a path.
143 /// Returns both the remaining path and the list of extension objects
144 pub fn separate_known_extensions_from_name(path: &Path) -> (&Path, Vec<Extension>) {
145 let mut extensions = vec![];
147 let Some(mut name) = path.file_name().and_then(<[u8] as ByteSlice>::from_os_str) else {
148 return (path, extensions);
151 // While there is known extensions at the tail, grab them
152 while let Some(extension) = split_extension(&mut name).and_then(to_extension) {
153 extensions.insert(0, extension);
156 if let Ok(name) = name.to_str() {
157 let file_stem = name.trim_matches('.');
158 if SUPPORTED_EXTENSIONS.contains(&file_stem) {
159 warning!("Received a file with name '{file_stem}', but {file_stem} was expected as the extension.");
163 (name.to_path().unwrap(), extensions)
166 /// Extracts extensions from a path, return only the list of extension objects
167 pub fn extensions_from_path(path: &Path) -> Vec<Extension> {
168 let (_, extensions) = separate_known_extensions_from_name(path);
177 fn test_extensions_from_path() {
178 use CompressionFormat::*;
179 let path = Path::new("bolovo.tar.gz");
181 let extensions: Vec<Extension> = extensions_from_path(path);
182 let formats: Vec<CompressionFormat> = flatten_compression_formats(&extensions);
184 assert_eq!(formats, vec![Tar, Gzip]);
188 // Panics if formats has an empty list of compression formats
189 pub fn split_first_compression_format(formats: &[Extension]) -> (CompressionFormat, Vec<CompressionFormat>) {
190 let mut extensions: Vec<CompressionFormat> = flatten_compression_formats(formats);
191 let first_extension = extensions.remove(0);
192 (first_extension, extensions)
195 pub fn flatten_compression_formats(extensions: &[Extension]) -> Vec<CompressionFormat> {
198 .flat_map(|extension| extension.compression_formats.iter())