1 //! Core of the crate, where the `compress_files` and `decompress_file` functions are implemented
3 //! Also, where correctly call functions based on the detected `Command`.
6 io::{self, BufReader, BufWriter, Read, Write},
19 CompressionFormat::{self, *},
23 list::{self, ListOptions},
25 self, concatenate_os_str_list, dir_is_empty, nice_directory_display, to_utf, try_infer_extension,
26 user_wants_to_continue_decompressing,
28 warning, Opts, QuestionPolicy, Subcommand,
31 // Used in BufReader and BufWriter to perform less syscalls
32 const BUFFER_CAPACITY: usize = 1024 * 64;
34 fn represents_several_files(files: &[PathBuf]) -> bool {
35 let is_non_empty_dir = |path: &PathBuf| {
36 let is_non_empty = || !dir_is_empty(path);
38 path.is_dir().then(is_non_empty).unwrap_or_default()
41 files.iter().any(is_non_empty_dir) || files.len() > 1
44 /// Entrypoint of ouch, receives cli options and matches Subcommand to decide what to do
45 pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
47 Subcommand::Compress { mut files, output: output_path } => {
48 // If the output_path file exists and is the same as some of the input files, warn the user and skip those inputs (in order to avoid compression recursion)
49 if output_path.exists() {
50 clean_input_files_if_needed(&mut files, &fs::canonicalize(&output_path)?);
52 // After cleaning, if there are no input files left, exit
54 return Err(FinalError::with_title("No files to compress").into());
57 // Formats from path extension, like "file.tar.gz.xz" -> vec![Tar, Gzip, Lzma]
58 let mut formats = extension::extensions_from_path(&output_path);
60 if formats.is_empty() {
61 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
62 .detail("You shall supply the compression format")
63 .hint("Try adding supported extensions (see --help):")
64 .hint(format!(" ouch compress <FILES>... {}.tar.gz", to_utf(&output_path)))
65 .hint(format!(" ouch compress <FILES>... {}.zip", to_utf(&output_path)))
67 .hint("Alternatively, you can overwrite this option by using the '--format' flag:")
68 .hint(format!(" ouch compress <FILES>... {} --format tar.gz", to_utf(&output_path)));
70 return Err(error.into());
73 if !formats.get(0).map(Extension::is_archive).unwrap_or(false) && represents_several_files(&files) {
74 // This piece of code creates a suggestion for compressing multiple files
76 // Change from file.bz.xz
78 let extensions_text: String = formats.iter().map(|format| format.to_string()).collect();
80 let output_path = to_utf(output_path);
82 // Breaks if Lzma is .lz or .lzma and not .xz
83 // Or if Bzip is .bz2 and not .bz
84 let extensions_start_position = output_path.rfind(&extensions_text).unwrap();
85 let pos = extensions_start_position;
86 let empty_range = pos..pos;
87 let mut suggested_output_path = output_path.clone();
88 suggested_output_path.replace_range(empty_range, ".tar");
90 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
91 .detail("You are trying to compress multiple files.")
92 .detail(format!("The compression format '{}' cannot receive multiple files.", &formats[0]))
93 .detail("The only supported formats that archive files into an archive are .tar and .zip.")
94 .hint(format!("Try inserting '.tar' or '.zip' before '{}'.", &formats[0]))
95 .hint(format!("From: {}", output_path))
96 .hint(format!("To: {}", suggested_output_path));
98 return Err(error.into());
101 if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive()) {
102 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
103 .detail(format!("Found the format '{}' in an incorrect position.", format))
104 .detail(format!("'{}' can only be used at the start of the file extension.", format))
105 .hint(format!("If you wish to compress multiple files, start the extension with '{}'.", format))
106 .hint(format!("Otherwise, remove the last '{}' from '{}'.", format, to_utf(&output_path)));
108 return Err(error.into());
111 if output_path.exists() && !utils::user_wants_to_overwrite(&output_path, question_policy)? {
112 // User does not want to overwrite this file, skip and return without any errors
116 let output_file = fs::File::create(&output_path)?;
118 if !represents_several_files(&files) {
119 // It's possible the file is already partially compressed so we don't want to compress it again
120 // `ouch compress file.tar.gz file.tar.gz.xz` should produce `file.tar.gz.xz` and not `file.tar.gz.tar.gz.xz`
121 let input_extensions = extension::extensions_from_path(&files[0]);
123 // We calculate the formats that are left if we filter out a sublist at the start of what we have that's the same as the input formats
124 let mut new_formats = Vec::with_capacity(formats.len());
125 for (inp_ext, out_ext) in input_extensions.iter().zip(&formats) {
126 if inp_ext.compression_formats == out_ext.compression_formats {
127 new_formats.push(out_ext.clone());
131 .zip(out_ext.compression_formats.iter())
132 .all(|(inp, out)| inp == out)
134 let new_ext = Extension::new(
135 &out_ext.compression_formats[..inp_ext.compression_formats.len()],
136 &out_ext.display_text,
138 new_formats.push(new_ext);
142 // If the input is a sublist at the start of `formats` then remove the extensions
143 // Note: If input_extensions is empty then it will make `formats` empty too, which we don't want
144 if !input_extensions.is_empty() && new_formats != formats {
146 // We checked above that input_extensions isn't empty, so files[0] has an extension.
148 // Path::extension says: "if there is no file_name, then there is no extension".
149 // Contrapositive statement: "if there is extension, then there is file_name".
151 "Partial compression detected. Compressing {} into {}",
152 to_utf(files[0].as_path().file_name().unwrap()),
155 formats = new_formats;
158 let compress_result = compress_files(files, formats, output_file);
160 // If any error occurred, delete incomplete file
161 if compress_result.is_err() {
162 // Print an extra alert message pointing out that we left a possibly
163 // CORRUPTED FILE at `output_path`
164 if let Err(err) = fs::remove_file(&output_path) {
165 eprintln!("{red}FATAL ERROR:\n", red = *colors::RED);
166 eprintln!(" Please manually delete '{}'.", to_utf(&output_path));
167 eprintln!(" Compression failed and we could not delete '{}'.", to_utf(&output_path),);
168 eprintln!(" Error:{reset} {}{red}.{reset}\n", err, reset = *colors::RESET, red = *colors::RED);
171 info!("Successfully compressed '{}'.", to_utf(output_path));
176 Subcommand::Decompress { files, output_dir } => {
177 let mut output_paths = vec![];
178 let mut formats = vec![];
180 for path in files.iter() {
181 let (file_output_path, file_formats) = extension::separate_known_extensions_from_name(path);
182 output_paths.push(file_output_path);
183 formats.push(file_formats);
186 if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
190 let files_missing_format: Vec<PathBuf> = files
193 .filter(|(_, formats)| formats.is_empty())
194 .map(|(input_path, _)| PathBuf::from(input_path))
197 if !files_missing_format.is_empty() {
198 let error = FinalError::with_title("Cannot decompress files without extensions")
200 "Files without supported extensions: {}",
201 concatenate_os_str_list(&files_missing_format)
203 .detail("Decompression formats are detected automatically by the file extension")
204 .hint("Provide a file with a supported extension:")
205 .hint(" ouch decompress example.tar.gz")
207 .hint("Or overwrite this option with the '--format' flag:")
208 .hint(format!(" ouch decompress {} --format tar.gz", to_utf(&files_missing_format[0])));
210 return Err(error.into());
213 // The directory that will contain the output files
214 // We default to the current directory if the user didn't specify an output directory with --dir
215 let output_dir = if let Some(dir) = output_dir {
216 if !utils::clear_path(&dir, question_policy)? {
217 // User doesn't want to overwrite
220 utils::create_dir_if_non_existent(&dir)?;
226 for ((input_path, formats), file_name) in files.iter().zip(formats).zip(output_paths) {
227 let output_file_path = output_dir.join(file_name); // Path used by single file format archives
228 decompress_file(input_path, formats, &output_dir, output_file_path, question_policy)?;
231 Subcommand::List { archives: files, tree } => {
232 let mut formats = vec![];
234 for path in files.iter() {
235 let (_, file_formats) = extension::separate_known_extensions_from_name(path);
236 formats.push(file_formats);
239 if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
243 let not_archives: Vec<PathBuf> = files
246 .filter(|(_, formats)| !formats.get(0).map(Extension::is_archive).unwrap_or(false))
247 .map(|(path, _)| path.clone())
250 if !not_archives.is_empty() {
251 let error = FinalError::with_title("Cannot list archive contents")
252 .detail("Only archives can have their contents listed")
253 .detail(format!("Files are not archives: {}", concatenate_os_str_list(¬_archives)));
255 return Err(error.into());
258 let list_options = ListOptions { tree };
260 for (i, (archive_path, formats)) in files.iter().zip(formats).enumerate() {
264 let formats = formats.iter().flat_map(Extension::iter).map(Clone::clone).collect();
265 list_archive_contents(archive_path, formats, list_options)?;
272 // Compress files into an `output_file`
274 // files are the list of paths to be compressed: ["dir/file1.txt", "dir/file2.txt"]
275 // formats contains each format necessary for compression, example: [Tar, Gz] (in compression order)
276 // output_file is the resulting compressed file name, example: "compressed.tar.gz"
277 fn compress_files(files: Vec<PathBuf>, formats: Vec<Extension>, output_file: fs::File) -> crate::Result<()> {
278 let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file);
280 let mut writer: Box<dyn Write> = Box::new(file_writer);
282 // Grab previous encoder and wrap it inside of a new one
283 let chain_writer_encoder = |format: &CompressionFormat, encoder: Box<dyn Write>| -> crate::Result<Box<dyn Write>> {
284 let encoder: Box<dyn Write> = match format {
285 Gzip => Box::new(flate2::write::GzEncoder::new(encoder, Default::default())),
286 Bzip => Box::new(bzip2::write::BzEncoder::new(encoder, Default::default())),
287 Lz4 => Box::new(lzzzz::lz4f::WriteCompressor::new(encoder, Default::default())?),
288 Lzma => Box::new(xz2::write::XzEncoder::new(encoder, 6)),
290 let zstd_encoder = zstd::stream::write::Encoder::new(encoder, Default::default());
292 // Encoder::new() can only fail if `level` is invalid, but Default::default()
293 // is guaranteed to be valid
294 Box::new(zstd_encoder.unwrap().auto_finish())
296 Tar | Zip => unreachable!(),
301 for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
302 writer = chain_writer_encoder(format, writer)?;
305 match formats[0].compression_formats[0] {
306 Gzip | Bzip | Lz4 | Lzma | Zstd => {
307 writer = chain_writer_encoder(&formats[0].compression_formats[0], writer)?;
308 let mut reader = fs::File::open(&files[0]).unwrap();
309 io::copy(&mut reader, &mut writer)?;
312 let mut writer = archive::tar::build_archive_from_paths(&files, writer)?;
316 eprintln!("{yellow}Warning:{reset}", yellow = *colors::YELLOW, reset = *colors::RESET);
317 eprintln!("\tCompressing .zip entirely in memory.");
318 eprintln!("\tIf the file is too big, your PC might freeze!");
320 "\tThis is a limitation for formats like '{}'.",
321 formats.iter().map(|format| format.to_string()).collect::<String>()
323 eprintln!("\tThe design of .zip makes it impossible to compress via stream.");
325 let mut vec_buffer = io::Cursor::new(vec![]);
326 archive::zip::build_archive_from_paths(&files, &mut vec_buffer)?;
327 let vec_buffer = vec_buffer.into_inner();
328 io::copy(&mut vec_buffer.as_slice(), &mut writer)?;
337 // File at input_file_path is opened for reading, example: "archive.tar.gz"
338 // formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order)
339 // output_dir it's where the file will be decompressed to, this function assumes that the directory exists
340 // output_file_path is only used when extracting single file formats, not archive formats like .tar or .zip
342 input_file_path: &Path,
343 formats: Vec<Extension>,
345 output_file_path: PathBuf,
346 question_policy: QuestionPolicy,
347 ) -> crate::Result<()> {
348 let reader = fs::File::open(&input_file_path)?;
349 // Zip archives are special, because they require io::Seek, so it requires it's logic separated
350 // from decoder chaining.
352 // This is the only case where we can read and unpack it directly, without having to do
353 // in-memory decompression/copying first.
355 // Any other Zip decompression done can take up the whole RAM and freeze ouch.
356 if formats.len() == 1 && *formats[0].compression_formats == [Zip] {
357 let zip_archive = zip::ZipArchive::new(reader)?;
358 let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
359 info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir));
363 // Will be used in decoder chaining
364 let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
365 let mut reader: Box<dyn Read> = Box::new(reader);
367 // Grab previous decoder and wrap it inside of a new one
368 let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
369 let decoder: Box<dyn Read> = match format {
370 Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
371 Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
372 Lz4 => Box::new(lzzzz::lz4f::ReadDecompressor::new(decoder)?),
373 Lzma => Box::new(xz2::read::XzDecoder::new(decoder)),
374 Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
375 Tar | Zip => unreachable!(),
380 for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
381 reader = chain_reader_decoder(format, reader)?;
385 match formats[0].compression_formats[0] {
386 Gzip | Bzip | Lz4 | Lzma | Zstd => {
387 reader = chain_reader_decoder(&formats[0].compression_formats[0], reader)?;
389 let writer = utils::create_or_ask_overwrite(&output_file_path, question_policy)?;
390 if writer.is_none() {
391 // Means that the user doesn't want to overwrite
394 let mut writer = writer.unwrap();
396 io::copy(&mut reader, &mut writer)?;
397 files_unpacked = vec![output_file_path];
400 files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
403 eprintln!("Compressing first into .zip.");
404 eprintln!("Warning: .zip archives with extra extensions have a downside.");
406 "The only way is loading everything into the RAM while compressing, and then write everything down."
408 eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!");
410 let mut vec = vec![];
411 io::copy(&mut reader, &mut vec)?;
412 let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?;
414 files_unpacked = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
418 info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir));
419 info!("Files unpacked: {}", files_unpacked.len());
424 // File at input_file_path is opened for reading, example: "archive.tar.gz"
425 // formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order)
426 fn list_archive_contents(
428 formats: Vec<CompressionFormat>,
429 list_options: ListOptions,
430 ) -> crate::Result<()> {
431 let reader = fs::File::open(&archive_path)?;
433 // Zip archives are special, because they require io::Seek, so it requires it's logic separated
434 // from decoder chaining.
436 // This is the only case where we can read and unpack it directly, without having to do
437 // in-memory decompression/copying first.
439 // Any other Zip decompression done can take up the whole RAM and freeze ouch.
440 if let [Zip] = *formats.as_slice() {
441 let zip_archive = zip::ZipArchive::new(reader)?;
442 let files = crate::archive::zip::list_archive(zip_archive)?;
443 list::list_files(archive_path, files, list_options);
447 // Will be used in decoder chaining
448 let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
449 let mut reader: Box<dyn Read> = Box::new(reader);
451 // Grab previous decoder and wrap it inside of a new one
452 let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
453 let decoder: Box<dyn Read> = match format {
454 Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
455 Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
456 Lz4 => Box::new(lzzzz::lz4f::ReadDecompressor::new(decoder)?),
457 Lzma => Box::new(xz2::read::XzDecoder::new(decoder)),
458 Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
459 Tar | Zip => unreachable!(),
464 for format in formats.iter().skip(1).rev() {
465 reader = chain_reader_decoder(format, reader)?;
468 let files = match formats[0] {
469 Tar => crate::archive::tar::list_archive(reader)?,
471 eprintln!("Listing files from zip archive.");
472 eprintln!("Warning: .zip archives with extra extensions have a downside.");
473 eprintln!("The only way is loading everything into the RAM while compressing, and then reading the archive contents.");
474 eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!");
476 let mut vec = vec![];
477 io::copy(&mut reader, &mut vec)?;
478 let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?;
480 crate::archive::zip::list_archive(zip_archive)?
482 Gzip | Bzip | Lz4 | Lzma | Zstd => {
483 panic!("Not an archive! This should never happen, if it does, something is wrong with `CompressionFormat::is_archive()`. Please report this error!");
486 list::list_files(archive_path, files, list_options);
492 formats: &mut Vec<Vec<Extension>>,
493 question_policy: QuestionPolicy,
494 ) -> crate::Result<ControlFlow<()>> {
495 for (path, format) in files.iter().zip(formats.iter_mut()) {
496 if format.is_empty() {
497 // File with no extension
498 // Try to detect it automatically and prompt the user about it
499 if let Some(detected_format) = try_infer_extension(path) {
500 info!("Detected file: `{}` extension as `{}`", path.display(), detected_format);
501 if user_wants_to_continue_decompressing(path, question_policy)? {
502 format.push(detected_format);
504 return Ok(ControlFlow::Break(()));
507 } else if let Some(detected_format) = try_infer_extension(path) {
508 // File ending with extension
509 // Try to detect the extension and warn the user if it differs from the written one
510 let outer_ext = format.iter().next_back().unwrap();
511 if outer_ext != &detected_format {
513 "The file extension: `{}` differ from the detected extension: `{}`",
517 if !user_wants_to_continue_decompressing(path, question_policy)? {
518 return Ok(ControlFlow::Break(()));
522 // NOTE: If this actually produces no false positives, we can upgrade it in the future
523 // to a warning and ask the user if he wants to continue decompressing.
524 info!("Could not detect the extension of `{}`", path.display());
527 Ok(ControlFlow::Continue(()))
530 fn clean_input_files_if_needed(files: &mut Vec<PathBuf>, output_path: &Path) {
532 while idx < files.len() {
533 if files[idx] == output_path {
534 warning!("The output file and the input file are the same: `{}`, skipping...", output_path.display());