1 /* $NetBSD: unzip.c,v 1.9 2009/09/30 10:04:54 wiz Exp $ */
4 * Copyright (c) 2009 Joerg Sonnenberger <joerg@NetBSD.org>
5 * Copyright (c) 2007-2008 Dag-Erling Coïdan Smørgrav
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * $FreeBSD: revision 180124$
32 * This file would be much shorter if we didn't care about command-line
33 * compatibility with Info-ZIP's UnZip, which requires us to duplicate
34 * parts of libarchive in order to gain more detailed control of its
35 * behaviour for the purpose of implementing the -n, -o, -L and -a
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: unzip.c,v 1.9 2009/09/30 10:04:54 wiz Exp $");
42 #include <sys/queue.h>
56 #include <archive_entry.h>
58 /* command-line options */
59 static int a_opt
; /* convert EOL */
60 static int C_opt
; /* match case-insensitively */
61 static int c_opt
; /* extract to stdout */
62 static const char *d_arg
; /* directory */
63 static int f_opt
; /* update existing files only */
64 static int j_opt
; /* junk directories */
65 static int L_opt
; /* lowercase names */
66 static int n_opt
; /* never overwrite */
67 static int o_opt
; /* always overwrite */
68 static int p_opt
; /* extract to stdout, quiet */
69 static int q_opt
; /* quiet */
70 static int t_opt
; /* test */
71 static int u_opt
; /* update */
72 static int v_opt
; /* verbose/list */
74 /* time when unzip started */
78 static int unzip_debug
;
83 /* convenience macro */
84 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
88 if (acret != ARCHIVE_OK) \
89 errorx("%s", archive_error_string(a)); \
93 * Indicates that last info() did not end with EOL. This helps error() et
94 * al. avoid printing an error message on the same line as an incomplete
95 * informational message.
99 /* fatal error message + errno */
101 error(const char *fmt
, ...)
106 fprintf(stdout
, "\n");
108 fprintf(stderr
, "unzip: ");
110 vfprintf(stderr
, fmt
, ap
);
112 fprintf(stderr
, ": %s\n", strerror(errno
));
116 /* fatal error message, no errno */
118 errorx(const char *fmt
, ...)
123 fprintf(stdout
, "\n");
125 fprintf(stderr
, "unzip: ");
127 vfprintf(stderr
, fmt
, ap
);
129 fprintf(stderr
, "\n");
134 /* non-fatal error message + errno */
136 warning(const char *fmt
, ...)
141 fprintf(stdout
, "\n");
143 fprintf(stderr
, "unzip: ");
145 vfprintf(stderr
, fmt
, ap
);
147 fprintf(stderr
, ": %s\n", strerror(errno
));
151 /* non-fatal error message, no errno */
153 warningx(const char *fmt
, ...)
158 fprintf(stdout
, "\n");
160 fprintf(stderr
, "unzip: ");
162 vfprintf(stderr
, fmt
, ap
);
164 fprintf(stderr
, "\n");
167 /* informational message (if not -q) */
169 info(const char *fmt
, ...)
173 if (q_opt
&& !unzip_debug
)
176 vfprintf(stdout
, fmt
, ap
);
183 noeol
= fmt
[strlen(fmt
) - 1] != '\n';
186 /* debug message (if unzip_debug) */
188 debug(const char *fmt
, ...)
195 vfprintf(stderr
, fmt
, ap
);
202 noeol
= fmt
[strlen(fmt
) - 1] != '\n';
205 /* duplicate a path name, possibly converting to lower case */
207 pathdup(const char *path
)
213 while (len
&& path
[len
- 1] == '/')
215 if ((str
= malloc(len
+ 1)) == NULL
) {
220 for (i
= 0; i
< len
; ++i
)
221 str
[i
] = tolower((unsigned char)path
[i
]);
223 memcpy(str
, path
, len
);
230 /* concatenate two path names */
232 pathcat(const char *prefix
, const char *path
)
237 prelen
= prefix
? strlen(prefix
) + 1 : 0;
238 len
= strlen(path
) + 1;
239 if ((str
= malloc(prelen
+ len
)) == NULL
) {
244 memcpy(str
, prefix
, prelen
); /* includes zero */
245 str
[prelen
- 1] = '/'; /* splat zero */
247 memcpy(str
+ prelen
, path
, len
); /* includes zero */
253 * Pattern lists for include / exclude processing
256 STAILQ_ENTRY(pattern
) link
;
260 STAILQ_HEAD(pattern_list
, pattern
);
261 static struct pattern_list include
= STAILQ_HEAD_INITIALIZER(include
);
262 static struct pattern_list exclude
= STAILQ_HEAD_INITIALIZER(exclude
);
265 * Add an entry to a pattern list
268 add_pattern(struct pattern_list
*list
, const char *pattern
)
270 struct pattern
*entry
;
273 debug("adding pattern '%s'\n", pattern
);
274 len
= strlen(pattern
);
275 if ((entry
= malloc(sizeof *entry
+ len
+ 1)) == NULL
) {
279 memcpy(entry
->pattern
, pattern
, len
+ 1);
280 STAILQ_INSERT_TAIL(list
, entry
, link
);
284 * Match a string against a list of patterns
287 match_pattern(struct pattern_list
*list
, const char *str
)
289 struct pattern
*entry
;
291 STAILQ_FOREACH(entry
, list
, link
) {
292 if (fnmatch(entry
->pattern
, str
, C_opt
? FNM_CASEFOLD
: 0) == 0)
299 * Verify that a given pathname is in the include list and not in the
303 accept_pathname(const char *pathname
)
306 if (!STAILQ_EMPTY(&include
) && !match_pattern(&include
, pathname
))
308 if (!STAILQ_EMPTY(&exclude
) && match_pattern(&exclude
, pathname
))
314 * Create the specified directory with the specified mode, taking certain
315 * precautions on they way.
318 make_dir(const char *path
, int mode
)
322 if (lstat(path
, &sb
) == 0) {
323 if (S_ISDIR(sb
.st_mode
))
326 * Normally, we should either ask the user about removing
327 * the non-directory of the same name as a directory we
328 * wish to create, or respect the -n or -o command-line
329 * options. However, this may lead to a later failure or
330 * even compromise (if this non-directory happens to be a
331 * symlink to somewhere unsafe), so we don't.
335 * Don't check unlink() result; failure will cause mkdir()
336 * to fail later, which we will catch.
340 if (mkdir(path
, mode
) != 0 && errno
!= EEXIST
)
341 error("mkdir('%s')", path
);
345 * Ensure that all directories leading up to (but not including) the
346 * specified path exist.
348 * XXX inefficient + modifies the file in-place
351 make_parent(char *path
)
356 sep
= strrchr(path
, '/');
357 if (sep
== NULL
|| sep
== path
)
360 if (lstat(path
, &sb
) == 0) {
361 if (S_ISDIR(sb
.st_mode
)) {
372 for (sep
= path
; (sep
= strchr(sep
, '/')) != NULL
; sep
++) {
373 /* root in case of absolute d_arg */
377 make_dir(path
, 0755);
384 * Extract a directory.
387 extract_dir(struct archive
*a
, struct archive_entry
*e
, const char *path
)
391 mode
= archive_entry_filetype(e
) & 0777;
396 * Some zipfiles contain directories with weird permissions such
397 * as 0644 or 0444. This can cause strange issues such as being
398 * unable to extract files into the directory we just created, or
399 * the user being unable to remove the directory later without
400 * first manually changing its permissions. Therefore, we whack
401 * the permissions into shape, assuming that the user wants full
402 * access and that anyone who gets read access also gets execute
411 info(" creating: %s/\n", path
);
412 make_dir(path
, mode
);
413 ac(archive_read_data_skip(a
));
416 static unsigned char buffer
[8192];
417 static char spinner
[] = { '|', '/', '-', '\\' };
420 handle_existing_file(char **path
)
428 "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
430 fgets(buf
, 4, stdin
);
446 printf("New name: ");
451 len
= getline(path
, &alen
, stdin
);
452 if ((*path
)[len
- 1] != '\n')
453 (*path
)[len
- 1] = '\0';
462 * Extract a regular file.
465 extract_file(struct archive
*a
, struct archive_entry
*e
, char **path
)
470 struct timeval tv
[2];
471 int cr
, fd
, text
, warn
, check
;
473 unsigned char *p
, *q
, *end
;
475 mode
= archive_entry_filetype(e
) & 0777;
478 mtime
= archive_entry_mtime(e
);
480 /* look for existing file of same name */
482 if (lstat(*path
, &sb
) == 0) {
483 if (u_opt
|| f_opt
) {
484 /* check if up-to-date */
485 if (S_ISREG(sb
.st_mode
) && sb
.st_mtime
>= mtime
)
492 /* do not overwrite */
495 check
= handle_existing_file(path
);
499 return; /* do not overwrite */
506 if ((fd
= open(*path
, O_RDWR
|O_CREAT
|O_TRUNC
, mode
)) < 0)
507 error("open('%s')", *path
);
509 /* loop over file contents and write to disk */
510 info(" extracting: %s", *path
);
514 for (int n
= 0; ; n
++) {
515 if (tty
&& (n
% 4) == 0)
516 info(" %c\b\b", spinner
[(n
/ 4) % sizeof spinner
]);
518 len
= archive_read_data(a
, buffer
, sizeof buffer
);
523 /* left over CR from previous buffer */
525 if (len
== 0 || buffer
[0] != '\n')
526 if (write(fd
, "\r", 1) != 1)
527 error("write('%s')", *path
);
537 * Detect whether this is a text file. The correct way to
538 * do this is to check the least significant bit of the
539 * "internal file attributes" field of the corresponding
540 * file header in the central directory, but libarchive
541 * does not read the central directory, so we have to
542 * guess by looking for non-ASCII characters in the
543 * buffer. Hopefully we won't guess wrong. If we do
544 * guess wrong, we print a warning message later.
546 if (a_opt
&& n
== 0) {
547 for (p
= buffer
; p
< end
; ++p
) {
548 if (!isascii((unsigned char)*p
)) {
556 if (!a_opt
|| !text
) {
557 if (write(fd
, buffer
, len
) != len
)
558 error("write('%s')", path
);
562 /* hard case: convert \r\n to \n (sigh...) */
563 for (p
= buffer
; p
< end
; p
= q
+ 1) {
564 for (q
= p
; q
< end
; q
++) {
565 if (!warn
&& !isascii(*q
)) {
566 warningx("%s may be corrupted due"
567 " to weak text file detection"
568 " heuristic", *path
);
580 if (write(fd
, p
, q
- p
) != q
- p
)
581 error("write('%s')", *path
);
590 /* set access and modification time */
593 tv
[1].tv_sec
= mtime
;
595 if (futimes(fd
, tv
) != 0)
596 error("utimes('%s')", *path
);
598 error("close('%s')", *path
);
602 * Extract a zipfile entry: first perform some sanity checks to ensure
603 * that it is either a directory or a regular file and that the path is
604 * not absolute and does not try to break out of the current directory;
605 * then call either extract_dir() or extract_file() as appropriate.
607 * This is complicated a bit by the various ways in which we need to
608 * manipulate the path name. Case conversion (if requested by the -L
609 * option) happens first, but the include / exclude patterns are applied
610 * to the full converted path name, before the directory part of the path
611 * is removed in accordance with the -j option. Sanity checks are
612 * intentionally done earlier than they need to be, so the user will get a
613 * warning about insecure paths even for files or directories which
614 * wouldn't be extracted anyway.
617 extract(struct archive
*a
, struct archive_entry
*e
)
619 char *pathname
, *realpathname
;
623 pathname
= pathdup(archive_entry_pathname(e
));
624 filetype
= archive_entry_filetype(e
);
627 if (pathname
[0] == '/' ||
628 strncmp(pathname
, "../", 3) == 0 ||
629 strstr(pathname
, "/../") != NULL
) {
630 warningx("skipping insecure entry '%s'", pathname
);
631 ac(archive_read_data_skip(a
));
636 /* I don't think this can happen in a zipfile.. */
637 if (!S_ISDIR(filetype
) && !S_ISREG(filetype
)) {
638 warningx("skipping non-regular entry '%s'", pathname
);
639 ac(archive_read_data_skip(a
));
644 /* skip directories in -j case */
645 if (S_ISDIR(filetype
) && j_opt
) {
646 ac(archive_read_data_skip(a
));
651 /* apply include / exclude patterns */
652 if (!accept_pathname(pathname
)) {
653 ac(archive_read_data_skip(a
));
658 /* apply -j and -d */
660 for (p
= q
= pathname
; *p
; ++p
)
663 realpathname
= pathcat(d_arg
, q
);
665 realpathname
= pathcat(d_arg
, pathname
);
668 /* ensure that parent directory exists */
669 make_parent(realpathname
);
671 if (S_ISDIR(filetype
))
672 extract_dir(a
, e
, realpathname
);
674 extract_file(a
, e
, &realpathname
);
681 extract_stdout(struct archive
*a
, struct archive_entry
*e
)
687 unsigned char *p
, *q
, *end
;
689 pathname
= pathdup(archive_entry_pathname(e
));
690 filetype
= archive_entry_filetype(e
);
692 /* I don't think this can happen in a zipfile.. */
693 if (!S_ISDIR(filetype
) && !S_ISREG(filetype
)) {
694 warningx("skipping non-regular entry '%s'", pathname
);
695 ac(archive_read_data_skip(a
));
700 /* skip directories in -j case */
701 if (S_ISDIR(filetype
)) {
702 ac(archive_read_data_skip(a
));
707 /* apply include / exclude patterns */
708 if (!accept_pathname(pathname
)) {
709 ac(archive_read_data_skip(a
));
715 info("x %s\n", pathname
);
720 for (int n
= 0; ; n
++) {
721 len
= archive_read_data(a
, buffer
, sizeof buffer
);
726 /* left over CR from previous buffer */
728 if (len
== 0 || buffer
[0] != '\n') {
729 if (fwrite("\r", 1, 1, stderr
) != 1)
730 error("write('%s')", pathname
);
741 * Detect whether this is a text file. The correct way to
742 * do this is to check the least significant bit of the
743 * "internal file attributes" field of the corresponding
744 * file header in the central directory, but libarchive
745 * does not read the central directory, so we have to
746 * guess by looking for non-ASCII characters in the
747 * buffer. Hopefully we won't guess wrong. If we do
748 * guess wrong, we print a warning message later.
750 if (a_opt
&& n
== 0) {
751 for (p
= buffer
; p
< end
; ++p
) {
752 if (!isascii((unsigned char)*p
)) {
760 if (!a_opt
|| !text
) {
761 if (fwrite(buffer
, 1, len
, stdout
) != (size_t)len
)
762 error("write('%s')", pathname
);
766 /* hard case: convert \r\n to \n (sigh...) */
767 for (p
= buffer
; p
< end
; p
= q
+ 1) {
768 for (q
= p
; q
< end
; q
++) {
769 if (!warn
&& !isascii(*q
)) {
770 warningx("%s may be corrupted due"
771 " to weak text file detection"
772 " heuristic", pathname
);
784 if (fwrite(p
, 1, q
- p
, stdout
) != (size_t)(q
- p
))
785 error("write('%s')", pathname
);
793 * Print the name of an entry to stdout.
796 list(struct archive
*a
, struct archive_entry
*e
)
801 mtime
= archive_entry_mtime(e
);
802 strftime(buf
, sizeof(buf
), "%m-%d-%g %R", localtime(&mtime
));
805 printf(" %8ju %s %s\n",
806 (uintmax_t)archive_entry_size(e
),
807 buf
, archive_entry_pathname(e
));
808 } else if (v_opt
== 2) {
809 printf("%8ju Stored %7ju 0%% %s %08x %s\n",
810 (uintmax_t)archive_entry_size(e
),
811 (uintmax_t)archive_entry_size(e
),
814 archive_entry_pathname(e
));
816 ac(archive_read_data_skip(a
));
820 * Extract to memory to check CRC
823 test(struct archive
*a
, struct archive_entry
*e
)
829 if (S_ISDIR(archive_entry_filetype(e
)))
832 info(" testing: %s\t", archive_entry_pathname(e
));
833 while ((len
= archive_read_data(a
, buffer
, sizeof buffer
)) > 0)
836 info(" %s\n", archive_error_string(a
));
842 /* shouldn't be necessary, but it doesn't hurt */
843 ac(archive_read_data_skip(a
));
850 * Main loop: open the zipfile, iterate over its contents and decide what
851 * to do with each entry.
854 unzip(const char *fn
)
857 struct archive_entry
*e
;
859 uintmax_t total_size
, file_count
, error_count
;
861 if ((fd
= open(fn
, O_RDONLY
)) < 0)
864 a
= archive_read_new();
865 ac(archive_read_support_format_zip(a
));
866 ac(archive_read_open_fd(a
, fd
, 8192));
869 printf("Archive: %s\n", fn
);
872 printf(" Length Date Time Name\n");
873 printf(" -------- ---- ---- ----\n");
874 } else if (v_opt
== 2) {
875 printf(" Length Method Size Ratio Date Time CRC-32 Name\n");
876 printf("-------- ------ ------- ----- ---- ---- ------ ----\n");
883 ret
= archive_read_next_header(a
, &e
);
884 if (ret
== ARCHIVE_EOF
)
888 error_count
+= test(a
, e
);
891 else if (p_opt
|| c_opt
)
892 extract_stdout(a
, e
);
896 total_size
+= archive_entry_size(e
);
901 printf(" -------- -------\n");
902 printf(" %8ju %ju file%s\n",
903 total_size
, file_count
, file_count
!= 1 ? "s" : "");
904 } else if (v_opt
== 2) {
905 printf("-------- ------- --- -------\n");
906 printf("%8ju %7ju 0%% %ju file%s\n",
907 total_size
, total_size
, file_count
,
908 file_count
!= 1 ? "s" : "");
911 ac(archive_read_close(a
));
912 (void)archive_read_finish(a
);
918 if (error_count
> 0) {
919 errorx("%d checksum error(s) found.", error_count
);
922 printf("No errors detected in compressed data of %s.\n",
932 fprintf(stderr
, "usage: unzip [-aCcfjLlnopqtuv] [-d dir] [-x pattern] zipfile\n");
937 getopts(int argc
, char *argv
[])
941 optreset
= optind
= 1;
942 while ((opt
= getopt(argc
, argv
, "aCcd:fjLlnopqtuvx:")) != -1)
992 add_pattern(&exclude
, optarg
);
1002 main(int argc
, char *argv
[])
1004 const char *zipfile
;
1007 if (isatty(STDOUT_FILENO
))
1010 if (getenv("UNZIP_DEBUG") != NULL
)
1012 for (int i
= 0; i
< argc
; ++i
)
1013 debug("%s%c", argv
[i
], (i
< argc
- 1) ? ' ' : '\n');
1016 * Info-ZIP's unzip(1) expects certain options to come before the
1017 * zipfile name, and others to come after - though it does not
1018 * enforce this. For simplicity, we accept *all* options both
1019 * before and after the zipfile name.
1021 nopts
= getopts(argc
, argv
);
1025 zipfile
= argv
[nopts
++];
1027 while (nopts
< argc
&& *argv
[nopts
] != '-')
1028 add_pattern(&include
, argv
[nopts
++]);
1030 nopts
--; /* fake argv[0] */
1031 nopts
+= getopts(argc
- nopts
, argv
+ nopts
);
1033 if (n_opt
+ o_opt
+ u_opt
> 1)
1034 errorx("-n, -o and -u are contradictory");