1 //! Core of the crate, where the `compress_files` and `decompress_file` functions are implemented
3 //! Also, where correctly call functions based on the detected `Command`.
6 io::{self, BufReader, BufWriter, Read, Write},
19 CompressionFormat::{self, *},
23 list::{self, ListOptions},
25 self, concatenate_os_str_list, dir_is_empty, nice_directory_display, to_utf, try_infer_extension,
26 user_wants_to_continue_decompressing,
28 warning, Opts, QuestionPolicy, Subcommand,
31 // Used in BufReader and BufWriter to perform less syscalls
32 const BUFFER_CAPACITY: usize = 1024 * 64;
34 fn represents_several_files(files: &[PathBuf]) -> bool {
35 let is_non_empty_dir = |path: &PathBuf| {
36 let is_non_empty = || !dir_is_empty(path);
38 path.is_dir().then(is_non_empty).unwrap_or_default()
41 files.iter().any(is_non_empty_dir) || files.len() > 1
44 /// Entrypoint of ouch, receives cli options and matches Subcommand to decide what to do
45 pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
47 Subcommand::Compress { files, output: output_path } => {
48 // Formats from path extension, like "file.tar.gz.xz" -> vec![Tar, Gzip, Lzma]
49 let mut formats = extension::extensions_from_path(&output_path);
51 if formats.is_empty() {
52 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
53 .detail("You shall supply the compression format")
54 .hint("Try adding supported extensions (see --help):")
55 .hint(format!(" ouch compress <FILES>... {}.tar.gz", to_utf(&output_path)))
56 .hint(format!(" ouch compress <FILES>... {}.zip", to_utf(&output_path)))
58 .hint("Alternatively, you can overwrite this option by using the '--format' flag:")
59 .hint(format!(" ouch compress <FILES>... {} --format tar.gz", to_utf(&output_path)));
61 return Err(error.into());
64 if !formats.get(0).map(Extension::is_archive).unwrap_or(false) && represents_several_files(&files) {
65 // This piece of code creates a suggestion for compressing multiple files
67 // Change from file.bz.xz
69 let extensions_text: String = formats.iter().map(|format| format.to_string()).collect();
71 let output_path = to_utf(output_path);
73 // Breaks if Lzma is .lz or .lzma and not .xz
74 // Or if Bzip is .bz2 and not .bz
75 let extensions_start_position = output_path.rfind(&extensions_text).unwrap();
76 let pos = extensions_start_position;
77 let empty_range = pos..pos;
78 let mut suggested_output_path = output_path.clone();
79 suggested_output_path.replace_range(empty_range, ".tar");
81 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
82 .detail("You are trying to compress multiple files.")
83 .detail(format!("The compression format '{}' cannot receive multiple files.", &formats[0]))
84 .detail("The only supported formats that archive files into an archive are .tar and .zip.")
85 .hint(format!("Try inserting '.tar' or '.zip' before '{}'.", &formats[0]))
86 .hint(format!("From: {}", output_path))
87 .hint(format!("To: {}", suggested_output_path));
89 return Err(error.into());
92 if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive()) {
93 let error = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
94 .detail(format!("Found the format '{}' in an incorrect position.", format))
95 .detail(format!("'{}' can only be used at the start of the file extension.", format))
96 .hint(format!("If you wish to compress multiple files, start the extension with '{}'.", format))
97 .hint(format!("Otherwise, remove the last '{}' from '{}'.", format, to_utf(&output_path)));
99 return Err(error.into());
102 if output_path.exists() && !utils::user_wants_to_overwrite(&output_path, question_policy)? {
103 // User does not want to overwrite this file, skip and return without any errors
107 let output_file = fs::File::create(&output_path)?;
109 if !represents_several_files(&files) {
110 // It's possible the file is already partially compressed so we don't want to compress it again
111 // `ouch compress file.tar.gz file.tar.gz.xz` should produce `file.tar.gz.xz` and not `file.tar.gz.tar.gz.xz`
112 let input_extensions = extension::extensions_from_path(&files[0]);
114 // We calculate the formats that are left if we filter out a sublist at the start of what we have that's the same as the input formats
115 let mut new_formats = Vec::with_capacity(formats.len());
116 for (inp_ext, out_ext) in input_extensions.iter().zip(&formats) {
117 if inp_ext.compression_formats == out_ext.compression_formats {
118 new_formats.push(out_ext.clone());
122 .zip(out_ext.compression_formats.iter())
123 .all(|(inp, out)| inp == out)
125 let new_ext = Extension::new(
126 &out_ext.compression_formats[..inp_ext.compression_formats.len()],
127 &out_ext.display_text,
129 new_formats.push(new_ext);
133 // If the input is a sublist at the start of `formats` then remove the extensions
134 // Note: If input_extensions is empty then it will make `formats` empty too, which we don't want
135 if !input_extensions.is_empty() && new_formats != formats {
137 // We checked above that input_extensions isn't empty, so files[0] has an extension.
139 // Path::extension says: "if there is no file_name, then there is no extension".
140 // Contrapositive statement: "if there is extension, then there is file_name".
142 "Partial compression detected. Compressing {} into {}",
143 to_utf(files[0].as_path().file_name().unwrap()),
146 formats = new_formats;
149 let compress_result = compress_files(files, formats, output_file);
151 // If any error occurred, delete incomplete file
152 if compress_result.is_err() {
153 // Print an extra alert message pointing out that we left a possibly
154 // CORRUPTED FILE at `output_path`
155 if let Err(err) = fs::remove_file(&output_path) {
156 eprintln!("{red}FATAL ERROR:\n", red = *colors::RED);
157 eprintln!(" Please manually delete '{}'.", to_utf(&output_path));
158 eprintln!(" Compression failed and we could not delete '{}'.", to_utf(&output_path),);
159 eprintln!(" Error:{reset} {}{red}.{reset}\n", err, reset = *colors::RESET, red = *colors::RED);
162 info!("Successfully compressed '{}'.", to_utf(output_path));
167 Subcommand::Decompress { files, output_dir } => {
168 let mut output_paths = vec![];
169 let mut formats = vec![];
171 for path in files.iter() {
172 let (file_output_path, file_formats) = extension::separate_known_extensions_from_name(path);
173 output_paths.push(file_output_path);
174 formats.push(file_formats);
177 if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
181 let files_missing_format: Vec<PathBuf> = files
184 .filter(|(_, formats)| formats.is_empty())
185 .map(|(input_path, _)| PathBuf::from(input_path))
188 if !files_missing_format.is_empty() {
189 let error = FinalError::with_title("Cannot decompress files without extensions")
191 "Files without supported extensions: {}",
192 concatenate_os_str_list(&files_missing_format)
194 .detail("Decompression formats are detected automatically by the file extension")
195 .hint("Provide a file with a supported extension:")
196 .hint(" ouch decompress example.tar.gz")
198 .hint("Or overwrite this option with the '--format' flag:")
199 .hint(format!(" ouch decompress {} --format tar.gz", to_utf(&files_missing_format[0])));
201 return Err(error.into());
204 // From Option<PathBuf> to Option<&Path>
205 let output_dir = output_dir.as_ref().map(|path| path.as_ref());
207 for ((input_path, formats), file_name) in files.iter().zip(formats).zip(output_paths) {
208 decompress_file(input_path, formats, output_dir, file_name, question_policy)?;
211 Subcommand::List { archives: files, tree } => {
212 let mut formats = vec![];
214 for path in files.iter() {
215 let (_, file_formats) = extension::separate_known_extensions_from_name(path);
216 formats.push(file_formats);
219 if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
223 let not_archives: Vec<PathBuf> = files
226 .filter(|(_, formats)| !formats.get(0).map(Extension::is_archive).unwrap_or(false))
227 .map(|(path, _)| path.clone())
230 if !not_archives.is_empty() {
231 let error = FinalError::with_title("Cannot list archive contents")
232 .detail("Only archives can have their contents listed")
233 .detail(format!("Files are not archives: {}", concatenate_os_str_list(¬_archives)));
235 return Err(error.into());
238 let list_options = ListOptions { tree };
240 for (i, (archive_path, formats)) in files.iter().zip(formats).enumerate() {
244 let formats = formats.iter().flat_map(Extension::iter).map(Clone::clone).collect();
245 list_archive_contents(archive_path, formats, list_options)?;
252 // Compress files into an `output_file`
254 // files are the list of paths to be compressed: ["dir/file1.txt", "dir/file2.txt"]
255 // formats contains each format necessary for compression, example: [Tar, Gz] (in compression order)
256 // output_file is the resulting compressed file name, example: "compressed.tar.gz"
257 fn compress_files(files: Vec<PathBuf>, formats: Vec<Extension>, output_file: fs::File) -> crate::Result<()> {
258 let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file);
260 let mut writer: Box<dyn Write> = Box::new(file_writer);
262 // Grab previous encoder and wrap it inside of a new one
263 let chain_writer_encoder = |format: &CompressionFormat, encoder: Box<dyn Write>| -> crate::Result<Box<dyn Write>> {
264 let encoder: Box<dyn Write> = match format {
265 Gzip => Box::new(flate2::write::GzEncoder::new(encoder, Default::default())),
266 Bzip => Box::new(bzip2::write::BzEncoder::new(encoder, Default::default())),
267 Lz4 => Box::new(lzzzz::lz4f::WriteCompressor::new(encoder, Default::default())?),
268 Lzma => Box::new(xz2::write::XzEncoder::new(encoder, 6)),
270 let zstd_encoder = zstd::stream::write::Encoder::new(encoder, Default::default());
272 // Encoder::new() can only fail if `level` is invalid, but Default::default()
273 // is guaranteed to be valid
274 Box::new(zstd_encoder.unwrap().auto_finish())
276 Tar | Zip => unreachable!(),
281 for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
282 writer = chain_writer_encoder(format, writer)?;
285 match formats[0].compression_formats[0] {
286 Gzip | Bzip | Lz4 | Lzma | Zstd => {
287 writer = chain_writer_encoder(&formats[0].compression_formats[0], writer)?;
288 let mut reader = fs::File::open(&files[0]).unwrap();
289 io::copy(&mut reader, &mut writer)?;
292 let mut writer = archive::tar::build_archive_from_paths(&files, writer)?;
296 eprintln!("{yellow}Warning:{reset}", yellow = *colors::YELLOW, reset = *colors::RESET);
297 eprintln!("\tCompressing .zip entirely in memory.");
298 eprintln!("\tIf the file is too big, your PC might freeze!");
300 "\tThis is a limitation for formats like '{}'.",
301 formats.iter().map(|format| format.to_string()).collect::<String>()
303 eprintln!("\tThe design of .zip makes it impossible to compress via stream.");
305 let mut vec_buffer = io::Cursor::new(vec![]);
306 archive::zip::build_archive_from_paths(&files, &mut vec_buffer)?;
307 let vec_buffer = vec_buffer.into_inner();
308 io::copy(&mut vec_buffer.as_slice(), &mut writer)?;
317 // File at input_file_path is opened for reading, example: "archive.tar.gz"
318 // formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order)
319 // output_dir it's where the file will be decompressed to
320 // file_name is only used when extracting single file formats, no archive formats like .tar or .zip
322 input_file_path: &Path,
323 formats: Vec<Extension>,
324 output_dir: Option<&Path>,
326 question_policy: QuestionPolicy,
327 ) -> crate::Result<()> {
328 let reader = fs::File::open(&input_file_path)?;
330 // Output path is used by single file formats
332 if let Some(output_dir) = output_dir { output_dir.join(file_name) } else { file_name.to_path_buf() };
334 // Output folder is used by archive file formats (zip and tar)
335 let output_dir = output_dir.unwrap_or_else(|| Path::new("."));
337 // Zip archives are special, because they require io::Seek, so it requires it's logic separated
338 // from decoder chaining.
340 // This is the only case where we can read and unpack it directly, without having to do
341 // in-memory decompression/copying first.
343 // Any other Zip decompression done can take up the whole RAM and freeze ouch.
344 if formats.len() == 1 && *formats[0].compression_formats == [Zip] {
345 if !utils::clear_path(output_dir, question_policy)? {
346 // User doesn't want to overwrite
349 utils::create_dir_if_non_existent(output_dir)?;
350 let zip_archive = zip::ZipArchive::new(reader)?;
351 let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
352 info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir));
356 // Will be used in decoder chaining
357 let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
358 let mut reader: Box<dyn Read> = Box::new(reader);
360 // Grab previous decoder and wrap it inside of a new one
361 let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
362 let decoder: Box<dyn Read> = match format {
363 Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
364 Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
365 Lz4 => Box::new(lzzzz::lz4f::ReadDecompressor::new(decoder)?),
366 Lzma => Box::new(xz2::read::XzDecoder::new(decoder)),
367 Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
368 Tar | Zip => unreachable!(),
373 for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
374 reader = chain_reader_decoder(format, reader)?;
377 if !utils::clear_path(&output_path, question_policy)? {
378 // User doesn't want to overwrite
381 utils::create_dir_if_non_existent(output_dir)?;
385 match formats[0].compression_formats[0] {
386 Gzip | Bzip | Lz4 | Lzma | Zstd => {
387 reader = chain_reader_decoder(&formats[0].compression_formats[0], reader)?;
389 let writer = utils::create_or_ask_overwrite(&output_path, question_policy)?;
390 if writer.is_none() {
391 // Means that the user doesn't want to overwrite
394 let mut writer = writer.unwrap();
396 io::copy(&mut reader, &mut writer)?;
397 files_unpacked = vec![output_path];
400 files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
403 eprintln!("Compressing first into .zip.");
404 eprintln!("Warning: .zip archives with extra extensions have a downside.");
406 "The only way is loading everything into the RAM while compressing, and then write everything down."
408 eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!");
410 let mut vec = vec![];
411 io::copy(&mut reader, &mut vec)?;
412 let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?;
414 files_unpacked = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
418 info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir));
419 info!("Files unpacked: {}", files_unpacked.len());
424 // File at input_file_path is opened for reading, example: "archive.tar.gz"
425 // formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order)
426 fn list_archive_contents(
428 formats: Vec<CompressionFormat>,
429 list_options: ListOptions,
430 ) -> crate::Result<()> {
431 let reader = fs::File::open(&archive_path)?;
433 // Zip archives are special, because they require io::Seek, so it requires it's logic separated
434 // from decoder chaining.
436 // This is the only case where we can read and unpack it directly, without having to do
437 // in-memory decompression/copying first.
439 // Any other Zip decompression done can take up the whole RAM and freeze ouch.
440 if let [Zip] = *formats.as_slice() {
441 let zip_archive = zip::ZipArchive::new(reader)?;
442 let files = crate::archive::zip::list_archive(zip_archive)?;
443 list::list_files(archive_path, files, list_options);
447 // Will be used in decoder chaining
448 let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
449 let mut reader: Box<dyn Read> = Box::new(reader);
451 // Grab previous decoder and wrap it inside of a new one
452 let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
453 let decoder: Box<dyn Read> = match format {
454 Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
455 Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
456 Lz4 => Box::new(lzzzz::lz4f::ReadDecompressor::new(decoder)?),
457 Lzma => Box::new(xz2::read::XzDecoder::new(decoder)),
458 Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
459 Tar | Zip => unreachable!(),
464 for format in formats.iter().skip(1).rev() {
465 reader = chain_reader_decoder(format, reader)?;
468 let files = match formats[0] {
469 Tar => crate::archive::tar::list_archive(reader)?,
471 eprintln!("Listing files from zip archive.");
472 eprintln!("Warning: .zip archives with extra extensions have a downside.");
473 eprintln!("The only way is loading everything into the RAM while compressing, and then reading the archive contents.");
474 eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!");
476 let mut vec = vec![];
477 io::copy(&mut reader, &mut vec)?;
478 let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?;
480 crate::archive::zip::list_archive(zip_archive)?
482 Gzip | Bzip | Lz4 | Lzma | Zstd => {
483 panic!("Not an archive! This should never happen, if it does, something is wrong with `CompressionFormat::is_archive()`. Please report this error!");
486 list::list_files(archive_path, files, list_options);
492 formats: &mut Vec<Vec<Extension>>,
493 question_policy: QuestionPolicy,
494 ) -> crate::Result<ControlFlow<()>> {
495 for (path, format) in files.iter().zip(formats.iter_mut()) {
496 if format.is_empty() {
497 // File with no extension
498 // Try to detect it automatically and prompt the user about it
499 if let Some(detected_format) = try_infer_extension(path) {
500 info!("Detected file: `{}` extension as `{}`", path.display(), detected_format);
501 if user_wants_to_continue_decompressing(path, question_policy)? {
502 format.push(detected_format);
504 return Ok(ControlFlow::Break(()));
507 } else if let Some(detected_format) = try_infer_extension(path) {
508 // File ending with extension
509 // Try to detect the extension and warn the user if it differs from the written one
510 let outer_ext = format.iter().next().unwrap();
511 if outer_ext != &detected_format {
513 "The file extension: `{}` differ from the detected extension: `{}`",
517 if !user_wants_to_continue_decompressing(path, question_policy)? {
518 return Ok(ControlFlow::Break(()));
522 // NOTE: If this actually produces no false positives, we can upgrade it in the future
523 // to a warning and ask the user if he wants to continue decompressing.
524 info!("Could not detect the extension of `{}`", path.display());
527 Ok(ControlFlow::Continue(()))