1 //! Core of the crate, where the `compress_files` and `decompress_file` functions are implemented
3 //! Also, where correctly call functions based on the detected `Command`.
6 io::{self, BufReader, BufWriter, Read, Write},
19 CompressionFormat::{self, *},
23 list::{self, ListOptions},
25 self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf, try_infer_extension,
26 user_wants_to_continue_decompressing,
28 warning, Opts, QuestionPolicy, Subcommand,
31 // Used in BufReader and BufWriter to perform less syscalls
32 const BUFFER_CAPACITY: usize = 1024 * 64;
34 fn represents_several_files(files: &[PathBuf]) -> bool {
35 let is_non_empty_dir = |path: &PathBuf| {
36 let is_non_empty = || !dir_is_empty(path);
38 path.is_dir().then(is_non_empty).unwrap_or_default()
41 files.iter().any(is_non_empty_dir) || files.len() > 1
44 /// Entrypoint of ouch, receives cli options and matches Subcommand to decide what to do
45 pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
47 Subcommand::Compress { files, output: output_path } => {
48 // Formats from path extension, like "file.tar.gz.xz" -> vec![Tar, Gzip, Lzma]
49 let mut formats = extension::extensions_from_path(&output_path);
51 if formats.is_empty() {
52 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
53 .detail("You shall supply the compression format")
54 .hint("Try adding supported extensions (see --help):")
55 .hint(format!(" ouch compress <FILES>... {}.tar.gz", to_utf(&output_path)))
56 .hint(format!(" ouch compress <FILES>... {}.zip", to_utf(&output_path)))
58 .hint("Alternatively, you can overwrite this option by using the '--format' flag:")
59 .hint(format!(" ouch compress <FILES>... {} --format tar.gz", to_utf(&output_path)));
61 return Err(error.into());
64 if !formats.get(0).map(Extension::is_archive).unwrap_or(false) && represents_several_files(&files) {
65 // This piece of code creates a suggestion for compressing multiple files
67 // Change from file.bz.xz
69 let extensions_text: String = formats.iter().map(|format| format.to_string()).collect();
71 let output_path = to_utf(output_path);
73 // Breaks if Lzma is .lz or .lzma and not .xz
74 // Or if Bzip is .bz2 and not .bz
75 let extensions_start_position = output_path.rfind(&extensions_text).unwrap();
76 let pos = extensions_start_position;
77 let empty_range = pos..pos;
78 let mut suggested_output_path = output_path.clone();
79 suggested_output_path.replace_range(empty_range, ".tar");
81 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
82 .detail("You are trying to compress multiple files.")
83 .detail(format!("The compression format '{}' cannot receive multiple files.", &formats[0]))
84 .detail("The only supported formats that archive files into an archive are .tar and .zip.")
85 .hint(format!("Try inserting '.tar' or '.zip' before '{}'.", &formats[0]))
86 .hint(format!("From: {}", output_path))
87 .hint(format!("To: {}", suggested_output_path));
89 return Err(error.into());
92 if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive()) {
93 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
94 .detail(format!("Found the format '{}' in an incorrect position.", format))
95 .detail(format!("'{}' can only be used at the start of the file extension.", format))
96 .hint(format!("If you wish to compress multiple files, start the extension with '{}'.", format))
97 .hint(format!("Otherwise, remove the last '{}' from '{}'.", format, to_utf(&output_path)));
99 return Err(error.into());
102 if output_path.exists() && !utils::user_wants_to_overwrite(&output_path, question_policy)? {
103 // User does not want to overwrite this file, skip and return without any errors
107 let output_file = fs::File::create(&output_path)?;
109 if !represents_several_files(&files) {
110 // It's possible the file is already partially compressed so we don't want to compress it again
111 // `ouch compress file.tar.gz file.tar.gz.xz` should produce `file.tar.gz.xz` and not `file.tar.gz.tar.gz.xz`
112 let input_extensions = extension::extensions_from_path(&files[0]);
114 // We calculate the formats that are left if we filter out a sublist at the start of what we have that's the same as the input formats
115 let mut new_formats = Vec::with_capacity(formats.len());
116 for (inp_ext, out_ext) in input_extensions.iter().zip(&formats) {
117 if inp_ext.compression_formats == out_ext.compression_formats {
118 new_formats.push(out_ext.clone());
122 .zip(out_ext.compression_formats.iter())
123 .all(|(inp, out)| inp == out)
125 let new_ext = Extension::new(
126 &out_ext.compression_formats[..inp_ext.compression_formats.len()],
127 &out_ext.display_text,
129 new_formats.push(new_ext);
133 // If the input is a sublist at the start of `formats` then remove the extensions
134 // Note: If input_extensions is empty then it will make `formats` empty too, which we don't want
135 if !input_extensions.is_empty() && new_formats != formats {
137 // We checked above that input_extensions isn't empty, so files[0] has an extension.
139 // Path::extension says: "if there is no file_name, then there is no extension".
140 // Using DeMorgan's law: "if there is extension, then there is file_name".
142 "Partial compression detected. Compressing {} into {}",
143 to_utf(files[0].as_path().file_name().unwrap()),
146 formats = new_formats;
149 let compress_result = compress_files(files, formats, output_file);
151 // If any error occurred, delete incomplete file
152 if compress_result.is_err() {
153 // Print an extra alert message pointing out that we left a possibly
154 // CORRUPTED FILE at `output_path`
155 if let Err(err) = fs::remove_file(&output_path) {
156 eprintln!("{red}FATAL ERROR:\n", red = *colors::RED);
157 eprintln!(" Please manually delete '{}'.", to_utf(&output_path));
158 eprintln!(" Compression failed and we could not delete '{}'.", to_utf(&output_path),);
159 eprintln!(" Error:{reset} {}{red}.{reset}\n", err, reset = *colors::RESET, red = *colors::RED);
162 info!("Successfully compressed '{}'.", to_utf(output_path));
167 Subcommand::Decompress { files, output_dir } => {
168 let mut output_paths = vec![];
169 let mut formats = vec![];
171 for path in files.iter() {
172 let (file_output_path, file_formats) = extension::separate_known_extensions_from_name(path);
173 output_paths.push(file_output_path);
174 formats.push(file_formats);
177 if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
181 let files_missing_format: Vec<PathBuf> = files
184 .filter(|(_, formats)| formats.is_empty())
185 .map(|(input_path, _)| PathBuf::from(input_path))
188 if !files_missing_format.is_empty() {
189 let error = FinalError::with_title("Cannot decompress files without extensions")
191 "Files without supported extensions: {}",
192 concatenate_list_of_os_str(&files_missing_format)
194 .detail("Decompression formats are detected automatically by the file extension")
195 .hint("Provide a file with a supported extension:")
196 .hint(" ouch decompress example.tar.gz")
198 .hint("Or overwrite this option with the '--format' flag:")
199 .hint(format!(" ouch decompress {} --format tar.gz", to_utf(&files_missing_format[0])));
201 return Err(error.into());
204 // From Option<PathBuf> to Option<&Path>
205 let output_dir = output_dir.as_ref().map(|path| path.as_ref());
207 for ((input_path, formats), file_name) in files.iter().zip(formats).zip(output_paths) {
208 decompress_file(input_path, formats, output_dir, file_name, question_policy)?;
211 Subcommand::List { archives: files, tree } => {
212 let mut formats = vec![];
214 for path in files.iter() {
215 let (_, file_formats) = extension::separate_known_extensions_from_name(path);
216 formats.push(file_formats);
219 if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
223 let not_archives: Vec<PathBuf> = files
226 .filter(|(_, formats)| !formats.get(0).map(Extension::is_archive).unwrap_or(false))
227 .map(|(path, _)| path.clone())
231 if !not_archives.is_empty() {
232 eprintln!("Some file you asked ouch to list the contents of is not an archive.");
233 for file in ¬_archives {
234 eprintln!("Could not list {}.", to_utf(file));
237 "Dev note: add this error variant and pass the Vec to it, all the files \
238 lacking extension shall be shown: {:#?}.",
243 let list_options = ListOptions { tree };
245 for (i, (archive_path, formats)) in files.iter().zip(formats).enumerate() {
249 let formats = formats.iter().flat_map(Extension::iter).map(Clone::clone).collect();
250 list_archive_contents(archive_path, formats, list_options)?;
257 // Compress files into an `output_file`
259 // files are the list of paths to be compressed: ["dir/file1.txt", "dir/file2.txt"]
260 // formats contains each format necessary for compression, example: [Tar, Gz] (in compression order)
261 // output_file is the resulting compressed file name, example: "compressed.tar.gz"
262 fn compress_files(files: Vec<PathBuf>, formats: Vec<Extension>, output_file: fs::File) -> crate::Result<()> {
263 let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file);
265 let mut writer: Box<dyn Write> = Box::new(file_writer);
267 // Grab previous encoder and wrap it inside of a new one
268 let chain_writer_encoder = |format: &CompressionFormat, encoder: Box<dyn Write>| -> crate::Result<Box<dyn Write>> {
269 let encoder: Box<dyn Write> = match format {
270 Gzip => Box::new(flate2::write::GzEncoder::new(encoder, Default::default())),
271 Bzip => Box::new(bzip2::write::BzEncoder::new(encoder, Default::default())),
272 Lz4 => Box::new(lzzzz::lz4f::WriteCompressor::new(encoder, Default::default())?),
273 Lzma => Box::new(xz2::write::XzEncoder::new(encoder, 6)),
275 let zstd_encoder = zstd::stream::write::Encoder::new(encoder, Default::default());
277 // Encoder::new() can only fail if `level` is invalid, but Default::default()
278 // is guaranteed to be valid
279 Box::new(zstd_encoder.unwrap().auto_finish())
281 Tar | Zip => unreachable!(),
286 for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
287 writer = chain_writer_encoder(format, writer)?;
290 match formats[0].compression_formats[0] {
291 Gzip | Bzip | Lz4 | Lzma | Zstd => {
292 writer = chain_writer_encoder(&formats[0].compression_formats[0], writer)?;
293 let mut reader = fs::File::open(&files[0]).unwrap();
294 io::copy(&mut reader, &mut writer)?;
297 let mut writer = archive::tar::build_archive_from_paths(&files, writer)?;
301 eprintln!("{yellow}Warning:{reset}", yellow = *colors::YELLOW, reset = *colors::RESET);
302 eprintln!("\tCompressing .zip entirely in memory.");
303 eprintln!("\tIf the file is too big, your PC might freeze!");
305 "\tThis is a limitation for formats like '{}'.",
306 formats.iter().map(|format| format.to_string()).collect::<String>()
308 eprintln!("\tThe design of .zip makes it impossible to compress via stream.");
310 let mut vec_buffer = io::Cursor::new(vec![]);
311 archive::zip::build_archive_from_paths(&files, &mut vec_buffer)?;
312 let vec_buffer = vec_buffer.into_inner();
313 io::copy(&mut vec_buffer.as_slice(), &mut writer)?;
322 // File at input_file_path is opened for reading, example: "archive.tar.gz"
323 // formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order)
324 // output_dir it's where the file will be decompressed to
325 // file_name is only used when extracting single file formats, no archive formats like .tar or .zip
327 input_file_path: &Path,
328 formats: Vec<Extension>,
329 output_dir: Option<&Path>,
331 question_policy: QuestionPolicy,
332 ) -> crate::Result<()> {
333 // TODO: improve error message
334 let reader = fs::File::open(&input_file_path)?;
336 // Output path is used by single file formats
338 if let Some(output_dir) = output_dir { output_dir.join(file_name) } else { file_name.to_path_buf() };
340 // Output folder is used by archive file formats (zip and tar)
341 let output_dir = output_dir.unwrap_or_else(|| Path::new("."));
343 // Zip archives are special, because they require io::Seek, so it requires it's logic separated
344 // from decoder chaining.
346 // This is the only case where we can read and unpack it directly, without having to do
347 // in-memory decompression/copying first.
349 // Any other Zip decompression done can take up the whole RAM and freeze ouch.
350 if formats.len() == 1 && *formats[0].compression_formats == [Zip] {
351 utils::create_dir_if_non_existent(output_dir)?;
352 let zip_archive = zip::ZipArchive::new(reader)?;
353 let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
354 info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir));
358 // Will be used in decoder chaining
359 let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
360 let mut reader: Box<dyn Read> = Box::new(reader);
362 // Grab previous decoder and wrap it inside of a new one
363 let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
364 let decoder: Box<dyn Read> = match format {
365 Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
366 Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
367 Lz4 => Box::new(lzzzz::lz4f::ReadDecompressor::new(decoder)?),
368 Lzma => Box::new(xz2::read::XzDecoder::new(decoder)),
369 Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
370 Tar | Zip => unreachable!(),
375 for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
376 reader = chain_reader_decoder(format, reader)?;
379 utils::create_dir_if_non_existent(output_dir)?;
383 match formats[0].compression_formats[0] {
384 Gzip | Bzip | Lz4 | Lzma | Zstd => {
385 reader = chain_reader_decoder(&formats[0].compression_formats[0], reader)?;
387 let writer = utils::create_or_ask_overwrite(&output_path, question_policy)?;
388 if writer.is_none() {
389 // Means that the user doesn't want to overwrite
392 let mut writer = writer.unwrap();
394 io::copy(&mut reader, &mut writer)?;
395 files_unpacked = vec![output_path];
398 files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
401 eprintln!("Compressing first into .zip.");
402 eprintln!("Warning: .zip archives with extra extensions have a downside.");
404 "The only way is loading everything into the RAM while compressing, and then write everything down."
406 eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!");
408 let mut vec = vec![];
409 io::copy(&mut reader, &mut vec)?;
410 let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?;
412 files_unpacked = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
416 info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir));
417 info!("Files unpacked: {}", files_unpacked.len());
422 // File at input_file_path is opened for reading, example: "archive.tar.gz"
423 // formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order)
424 fn list_archive_contents(
426 formats: Vec<CompressionFormat>,
427 list_options: ListOptions,
428 ) -> crate::Result<()> {
429 // TODO: improve error message
430 let reader = fs::File::open(&archive_path)?;
432 // Zip archives are special, because they require io::Seek, so it requires it's logic separated
433 // from decoder chaining.
435 // This is the only case where we can read and unpack it directly, without having to do
436 // in-memory decompression/copying first.
438 // Any other Zip decompression done can take up the whole RAM and freeze ouch.
439 if let [Zip] = *formats.as_slice() {
440 let zip_archive = zip::ZipArchive::new(reader)?;
441 let files = crate::archive::zip::list_archive(zip_archive)?;
442 list::list_files(archive_path, files, list_options);
446 // Will be used in decoder chaining
447 let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
448 let mut reader: Box<dyn Read> = Box::new(reader);
450 // Grab previous decoder and wrap it inside of a new one
451 let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
452 let decoder: Box<dyn Read> = match format {
453 Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
454 Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
455 Lz4 => Box::new(lzzzz::lz4f::ReadDecompressor::new(decoder)?),
456 Lzma => Box::new(xz2::read::XzDecoder::new(decoder)),
457 Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
458 Tar | Zip => unreachable!(),
463 for format in formats.iter().skip(1).rev() {
464 reader = chain_reader_decoder(format, reader)?;
467 let files = match formats[0] {
468 Tar => crate::archive::tar::list_archive(reader)?,
470 eprintln!("Listing files from zip archive.");
471 eprintln!("Warning: .zip archives with extra extensions have a downside.");
472 eprintln!("The only way is loading everything into the RAM while compressing, and then reading the archive contents.");
473 eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!");
475 let mut vec = vec![];
476 io::copy(&mut reader, &mut vec)?;
477 let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?;
479 crate::archive::zip::list_archive(zip_archive)?
481 Gzip | Bzip | Lz4 | Lzma | Zstd => {
482 panic!("Not an archive! This should never happen, if it does, something is wrong with `CompressionFormat::is_archive()`. Please report this error!");
485 list::list_files(archive_path, files, list_options);
491 formats: &mut Vec<Vec<Extension>>,
492 question_policy: QuestionPolicy,
493 ) -> crate::Result<ControlFlow<()>> {
494 for (path, format) in files.iter().zip(formats.iter_mut()) {
495 if format.is_empty() {
496 // File with no extension
497 // Try to detect it automatically and prompt the user about it
498 if let Some(detected_format) = try_infer_extension(path) {
499 info!("Detected file: `{}` extension as `{}`", path.display(), detected_format);
500 if user_wants_to_continue_decompressing(path, question_policy)? {
501 format.push(detected_format);
503 return Ok(ControlFlow::Break(()));
506 } else if let Some(detected_format) = try_infer_extension(path) {
507 // File ending with extension
508 // Try to detect the extension and warn the user if it differs from the written one
509 let outer_ext = format.iter().next().unwrap();
510 if outer_ext != &detected_format {
512 "The file extension: `{}` differ from the detected extension: `{}`",
516 if !user_wants_to_continue_decompressing(path, question_policy)? {
517 return Ok(ControlFlow::Break(()));
521 // NOTE: If this actually produces no false positives, we can upgrade it in the future
522 // to a warning and ask the user if he wants to continue decompressing.
523 info!("Could not detect the extension of `{}`", path.display());
526 Ok(ControlFlow::Continue(()))