1 /* $NetBSD: unzip.c,v 1.19 2011/09/06 18:43:41 joerg Exp $ */
4 * Copyright (c) 2009, 2010 Joerg Sonnenberger <joerg@NetBSD.org>
5 * Copyright (c) 2007-2008 Dag-Erling Coïdan Smørgrav
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * $FreeBSD: revision 180124$
32 * This file would be much shorter if we didn't care about command-line
33 * compatibility with Info-ZIP's UnZip, which requires us to duplicate
34 * parts of libarchive in order to gain more detailed control of its
35 * behaviour for the purpose of implementing the -n, -o, -L and -a
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: unzip.c,v 1.19 2011/09/06 18:43:41 joerg Exp $");
42 #include <sys/queue.h>
56 #include <archive_entry.h>
58 /* command-line options */
59 static int a_opt
; /* convert EOL */
60 static int C_opt
; /* match case-insensitively */
61 static int c_opt
; /* extract to stdout */
62 static const char *d_arg
; /* directory */
63 static int f_opt
; /* update existing files only */
64 static int j_opt
; /* junk directories */
65 static int L_opt
; /* lowercase names */
66 static int n_opt
; /* never overwrite */
67 static int o_opt
; /* always overwrite */
68 static int p_opt
; /* extract to stdout, quiet */
69 static int q_opt
; /* quiet */
70 static int t_opt
; /* test */
71 static int u_opt
; /* update */
72 static int v_opt
; /* verbose/list */
73 static const char * y_str
= ""; /* 4 digit year */
75 /* time when unzip started */
79 static int unzip_debug
;
84 /* convenience macro */
85 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
89 if (acret != ARCHIVE_OK) \
90 errorx("%s", archive_error_string(a)); \
94 * Indicates that last info() did not end with EOL. This helps error() et
95 * al. avoid printing an error message on the same line as an incomplete
96 * informational message.
100 /* fatal error message + errno */
101 __dead
__printflike(1, 2) static void
102 error(const char *fmt
, ...)
107 fprintf(stdout
, "\n");
109 fprintf(stderr
, "unzip: ");
111 vfprintf(stderr
, fmt
, ap
);
113 fprintf(stderr
, ": %s\n", strerror(errno
));
117 /* fatal error message, no errno */
118 __dead
__printflike(1, 2) static void
119 errorx(const char *fmt
, ...)
124 fprintf(stdout
, "\n");
126 fprintf(stderr
, "unzip: ");
128 vfprintf(stderr
, fmt
, ap
);
130 fprintf(stderr
, "\n");
135 /* non-fatal error message + errno */
136 __printflike(1, 2) static void
137 warning(const char *fmt
, ...)
142 fprintf(stdout
, "\n");
144 fprintf(stderr
, "unzip: ");
146 vfprintf(stderr
, fmt
, ap
);
148 fprintf(stderr
, ": %s\n", strerror(errno
));
151 /* non-fatal error message, no errno */
152 __printflike(1, 2) static void
153 warningx(const char *fmt
, ...)
158 fprintf(stdout
, "\n");
160 fprintf(stderr
, "unzip: ");
162 vfprintf(stderr
, fmt
, ap
);
164 fprintf(stderr
, "\n");
167 /* informational message (if not -q) */
168 __printflike(1, 2) static void
169 info(const char *fmt
, ...)
173 if (q_opt
&& !unzip_debug
)
176 vfprintf(stdout
, fmt
, ap
);
183 noeol
= fmt
[strlen(fmt
) - 1] != '\n';
186 /* debug message (if unzip_debug) */
187 __printflike(1, 2) static void
188 debug(const char *fmt
, ...)
195 vfprintf(stderr
, fmt
, ap
);
202 noeol
= fmt
[strlen(fmt
) - 1] != '\n';
205 /* duplicate a path name, possibly converting to lower case */
207 pathdup(const char *path
)
213 while (len
&& path
[len
- 1] == '/')
215 if ((str
= malloc(len
+ 1)) == NULL
) {
220 for (i
= 0; i
< len
; ++i
)
221 str
[i
] = tolower((unsigned char)path
[i
]);
223 memcpy(str
, path
, len
);
230 /* concatenate two path names */
232 pathcat(const char *prefix
, const char *path
)
237 prelen
= prefix
? strlen(prefix
) + 1 : 0;
238 len
= strlen(path
) + 1;
239 if ((str
= malloc(prelen
+ len
)) == NULL
) {
244 memcpy(str
, prefix
, prelen
); /* includes zero */
245 str
[prelen
- 1] = '/'; /* splat zero */
247 memcpy(str
+ prelen
, path
, len
); /* includes zero */
253 * Pattern lists for include / exclude processing
256 STAILQ_ENTRY(pattern
) link
;
260 STAILQ_HEAD(pattern_list
, pattern
);
261 static struct pattern_list include
= STAILQ_HEAD_INITIALIZER(include
);
262 static struct pattern_list exclude
= STAILQ_HEAD_INITIALIZER(exclude
);
265 * Add an entry to a pattern list
268 add_pattern(struct pattern_list
*list
, const char *pattern
)
270 struct pattern
*entry
;
273 debug("adding pattern '%s'\n", pattern
);
274 len
= strlen(pattern
);
275 if ((entry
= malloc(sizeof *entry
+ len
+ 1)) == NULL
) {
279 memcpy(entry
->pattern
, pattern
, len
+ 1);
280 STAILQ_INSERT_TAIL(list
, entry
, link
);
284 * Match a string against a list of patterns
287 match_pattern(struct pattern_list
*list
, const char *str
)
289 struct pattern
*entry
;
291 STAILQ_FOREACH(entry
, list
, link
) {
292 if (fnmatch(entry
->pattern
, str
, C_opt
? FNM_CASEFOLD
: 0) == 0)
299 * Verify that a given pathname is in the include list and not in the
303 accept_pathname(const char *pathname
)
306 if (!STAILQ_EMPTY(&include
) && !match_pattern(&include
, pathname
))
308 if (!STAILQ_EMPTY(&exclude
) && match_pattern(&exclude
, pathname
))
314 * Create the specified directory with the specified mode, taking certain
315 * precautions on they way.
318 make_dir(const char *path
, int mode
)
322 if (lstat(path
, &sb
) == 0) {
323 if (S_ISDIR(sb
.st_mode
))
326 * Normally, we should either ask the user about removing
327 * the non-directory of the same name as a directory we
328 * wish to create, or respect the -n or -o command-line
329 * options. However, this may lead to a later failure or
330 * even compromise (if this non-directory happens to be a
331 * symlink to somewhere unsafe), so we don't.
335 * Don't check unlink() result; failure will cause mkdir()
336 * to fail later, which we will catch.
340 if (mkdir(path
, mode
) != 0 && errno
!= EEXIST
)
341 error("mkdir('%s')", path
);
345 * Ensure that all directories leading up to (but not including) the
346 * specified path exist.
348 * XXX inefficient + modifies the file in-place
351 make_parent(char *path
)
356 sep
= strrchr(path
, '/');
357 if (sep
== NULL
|| sep
== path
)
360 if (lstat(path
, &sb
) == 0) {
361 if (S_ISDIR(sb
.st_mode
)) {
372 for (sep
= path
; (sep
= strchr(sep
, '/')) != NULL
; sep
++) {
373 /* root in case of absolute d_arg */
377 make_dir(path
, 0755);
384 * Extract a directory.
387 extract_dir(struct archive
*a
, struct archive_entry
*e
, const char *path
)
391 mode
= archive_entry_mode(e
) & 0777;
396 * Some zipfiles contain directories with weird permissions such
397 * as 0644 or 0444. This can cause strange issues such as being
398 * unable to extract files into the directory we just created, or
399 * the user being unable to remove the directory later without
400 * first manually changing its permissions. Therefore, we whack
401 * the permissions into shape, assuming that the user wants full
402 * access and that anyone who gets read access also gets execute
411 info(" creating: %s/\n", path
);
412 make_dir(path
, mode
);
413 ac(archive_read_data_skip(a
));
416 static unsigned char buffer
[8192];
417 static char spinner
[] = { '|', '/', '-', '\\' };
420 handle_existing_file(char **path
)
428 "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
430 fgets(buf
, 4, stdin
);
446 printf("New name: ");
451 len
= getline(path
, &alen
, stdin
);
452 if ((*path
)[len
- 1] == '\n')
453 (*path
)[len
- 1] = '\0';
462 * Detect binary files by a combination of character white list and
463 * black list. NUL bytes and other control codes without use in text files
464 * result directly in switching the file to binary mode. Otherwise, at least
465 * one white-listed byte has to be found.
467 * Black-listed: 0..6, 14..25, 28..31
468 * White-listed: 9..10, 13, >= 32
470 * See the proginfo/txtvsbin.txt in the zip sources for a detailed discussion.
472 #define BYTE_IS_BINARY(x) ((x) < 32 && (0xf3ffc07fU & (1U << (x))))
473 #define BYTE_IS_TEXT(x) ((x) >= 32 || (0x00002600U & (1U << (x))))
476 check_binary(const unsigned char *buf
, size_t len
)
479 for (rv
= 1; len
--; ++buf
) {
480 if (BYTE_IS_BINARY(*buf
))
482 if (BYTE_IS_TEXT(*buf
))
490 * Extract a regular file.
493 extract_file(struct archive
*a
, struct archive_entry
*e
, char **path
)
498 struct timeval tv
[2];
499 int cr
, fd
, text
, warn
, check
;
501 unsigned char *p
, *q
, *end
;
503 mode
= archive_entry_mode(e
) & 0777;
506 mtime
= archive_entry_mtime(e
);
508 /* look for existing file of same name */
510 if (lstat(*path
, &sb
) == 0) {
511 if (u_opt
|| f_opt
) {
512 /* check if up-to-date */
513 if (S_ISREG(sb
.st_mode
) && sb
.st_mtime
>= mtime
)
520 /* do not overwrite */
523 check
= handle_existing_file(path
);
527 return; /* do not overwrite */
534 if ((fd
= open(*path
, O_RDWR
|O_CREAT
|O_TRUNC
, mode
)) < 0)
535 error("open('%s')", *path
);
537 /* loop over file contents and write to disk */
538 info(" extracting: %s", *path
);
542 for (int n
= 0; ; n
++) {
543 if (tty
&& (n
% 4) == 0)
544 info(" %c\b\b", spinner
[(n
/ 4) % sizeof spinner
]);
546 len
= archive_read_data(a
, buffer
, sizeof buffer
);
551 /* left over CR from previous buffer */
553 if (len
== 0 || buffer
[0] != '\n')
554 if (write(fd
, "\r", 1) != 1)
555 error("write('%s')", *path
);
565 * Detect whether this is a text file. The correct way to
566 * do this is to check the least significant bit of the
567 * "internal file attributes" field of the corresponding
568 * file header in the central directory, but libarchive
569 * does not read the central directory, so we have to
570 * guess by looking for non-ASCII characters in the
571 * buffer. Hopefully we won't guess wrong. If we do
572 * guess wrong, we print a warning message later.
574 if (a_opt
&& n
== 0) {
575 if (check_binary(buffer
, len
))
580 if (!a_opt
|| !text
) {
581 if (write(fd
, buffer
, len
) != len
)
582 error("write('%s')", *path
);
586 /* hard case: convert \r\n to \n (sigh...) */
587 for (p
= buffer
; p
< end
; p
= q
+ 1) {
588 for (q
= p
; q
< end
; q
++) {
589 if (!warn
&& BYTE_IS_BINARY(*q
)) {
590 warningx("%s may be corrupted due"
591 " to weak text file detection"
592 " heuristic", *path
);
604 if (write(fd
, p
, q
- p
) != q
- p
)
605 error("write('%s')", *path
);
614 /* set access and modification time */
617 tv
[1].tv_sec
= mtime
;
619 if (futimes(fd
, tv
) != 0)
620 error("utimes('%s')", *path
);
622 error("close('%s')", *path
);
626 * Extract a zipfile entry: first perform some sanity checks to ensure
627 * that it is either a directory or a regular file and that the path is
628 * not absolute and does not try to break out of the current directory;
629 * then call either extract_dir() or extract_file() as appropriate.
631 * This is complicated a bit by the various ways in which we need to
632 * manipulate the path name. Case conversion (if requested by the -L
633 * option) happens first, but the include / exclude patterns are applied
634 * to the full converted path name, before the directory part of the path
635 * is removed in accordance with the -j option. Sanity checks are
636 * intentionally done earlier than they need to be, so the user will get a
637 * warning about insecure paths even for files or directories which
638 * wouldn't be extracted anyway.
641 extract(struct archive
*a
, struct archive_entry
*e
)
643 char *pathname
, *realpathname
;
647 pathname
= pathdup(archive_entry_pathname(e
));
648 filetype
= archive_entry_filetype(e
);
651 if (pathname
[0] == '/' ||
652 strncmp(pathname
, "../", 3) == 0 ||
653 strstr(pathname
, "/../") != NULL
) {
654 warningx("skipping insecure entry '%s'", pathname
);
655 ac(archive_read_data_skip(a
));
660 /* I don't think this can happen in a zipfile.. */
661 if (!S_ISDIR(filetype
) && !S_ISREG(filetype
)) {
662 warningx("skipping non-regular entry '%s'", pathname
);
663 ac(archive_read_data_skip(a
));
668 /* skip directories in -j case */
669 if (S_ISDIR(filetype
) && j_opt
) {
670 ac(archive_read_data_skip(a
));
675 /* apply include / exclude patterns */
676 if (!accept_pathname(pathname
)) {
677 ac(archive_read_data_skip(a
));
682 /* apply -j and -d */
684 for (p
= q
= pathname
; *p
; ++p
)
687 realpathname
= pathcat(d_arg
, q
);
689 realpathname
= pathcat(d_arg
, pathname
);
692 /* ensure that parent directory exists */
693 make_parent(realpathname
);
695 if (S_ISDIR(filetype
))
696 extract_dir(a
, e
, realpathname
);
698 extract_file(a
, e
, &realpathname
);
705 extract_stdout(struct archive
*a
, struct archive_entry
*e
)
711 unsigned char *p
, *q
, *end
;
713 pathname
= pathdup(archive_entry_pathname(e
));
714 filetype
= archive_entry_filetype(e
);
716 /* I don't think this can happen in a zipfile.. */
717 if (!S_ISDIR(filetype
) && !S_ISREG(filetype
)) {
718 warningx("skipping non-regular entry '%s'", pathname
);
719 ac(archive_read_data_skip(a
));
724 /* skip directories in -j case */
725 if (S_ISDIR(filetype
)) {
726 ac(archive_read_data_skip(a
));
731 /* apply include / exclude patterns */
732 if (!accept_pathname(pathname
)) {
733 ac(archive_read_data_skip(a
));
739 info("x %s\n", pathname
);
744 for (int n
= 0; ; n
++) {
745 len
= archive_read_data(a
, buffer
, sizeof buffer
);
750 /* left over CR from previous buffer */
752 if (len
== 0 || buffer
[0] != '\n') {
753 if (fwrite("\r", 1, 1, stderr
) != 1)
754 error("write('%s')", pathname
);
765 * Detect whether this is a text file. The correct way to
766 * do this is to check the least significant bit of the
767 * "internal file attributes" field of the corresponding
768 * file header in the central directory, but libarchive
769 * does not read the central directory, so we have to
770 * guess by looking for non-ASCII characters in the
771 * buffer. Hopefully we won't guess wrong. If we do
772 * guess wrong, we print a warning message later.
774 if (a_opt
&& n
== 0) {
775 for (p
= buffer
; p
< end
; ++p
) {
776 if (!isascii((unsigned char)*p
)) {
784 if (!a_opt
|| !text
) {
785 if (fwrite(buffer
, 1, len
, stdout
) != (size_t)len
)
786 error("write('%s')", pathname
);
790 /* hard case: convert \r\n to \n (sigh...) */
791 for (p
= buffer
; p
< end
; p
= q
+ 1) {
792 for (q
= p
; q
< end
; q
++) {
793 if (!warn
&& !isascii(*q
)) {
794 warningx("%s may be corrupted due"
795 " to weak text file detection"
796 " heuristic", pathname
);
808 if (fwrite(p
, 1, q
- p
, stdout
) != (size_t)(q
- p
))
809 error("write('%s')", pathname
);
817 * Print the name of an entry to stdout.
820 list(struct archive
*a
, struct archive_entry
*e
)
826 mtime
= archive_entry_mtime(e
);
827 tm
= localtime(&mtime
);
829 strftime(buf
, sizeof(buf
), "%m-%d-%G %R", tm
);
831 strftime(buf
, sizeof(buf
), "%m-%d-%g %R", tm
);
834 printf(" %8ju %s %s\n",
835 (uintmax_t)archive_entry_size(e
),
836 buf
, archive_entry_pathname(e
));
837 } else if (v_opt
== 2) {
838 printf("%8ju Stored %7ju 0%% %s %08x %s\n",
839 (uintmax_t)archive_entry_size(e
),
840 (uintmax_t)archive_entry_size(e
),
843 archive_entry_pathname(e
));
845 ac(archive_read_data_skip(a
));
849 * Extract to memory to check CRC
852 test(struct archive
*a
, struct archive_entry
*e
)
858 if (S_ISDIR(archive_entry_filetype(e
)))
861 info(" testing: %s\t", archive_entry_pathname(e
));
862 while ((len
= archive_read_data(a
, buffer
, sizeof buffer
)) > 0)
865 info(" %s\n", archive_error_string(a
));
871 /* shouldn't be necessary, but it doesn't hurt */
872 ac(archive_read_data_skip(a
));
879 * Main loop: open the zipfile, iterate over its contents and decide what
880 * to do with each entry.
883 unzip(const char *fn
)
886 struct archive_entry
*e
;
888 uintmax_t total_size
, file_count
, error_count
;
890 if ((fd
= open(fn
, O_RDONLY
)) < 0)
893 a
= archive_read_new();
894 ac(archive_read_support_format_zip(a
));
895 ac(archive_read_open_fd(a
, fd
, 8192));
897 if (!q_opt
&& !p_opt
)
898 printf("Archive: %s\n", fn
);
901 printf(" Length %sDate Time Name\n", y_str
);
902 printf(" -------- %s---- ---- ----\n", y_str
);
903 } else if (v_opt
== 2) {
904 printf(" Length Method Size Ratio %sDate Time CRC-32 Name\n", y_str
);
905 printf("-------- ------ ------- ----- %s---- ---- ------ ----\n", y_str
);
912 ret
= archive_read_next_header(a
, &e
);
913 if (ret
== ARCHIVE_EOF
)
917 error_count
+= test(a
, e
);
920 else if (p_opt
|| c_opt
)
921 extract_stdout(a
, e
);
925 total_size
+= archive_entry_size(e
);
930 printf(" -------- %s-------\n", y_str
);
931 printf(" %8ju %s%ju file%s\n",
932 total_size
, y_str
, file_count
, file_count
!= 1 ? "s" : "");
933 } else if (v_opt
== 2) {
934 printf("-------- ------- --- %s-------\n", y_str
);
935 printf("%8ju %7ju 0%% %s%ju file%s\n",
936 total_size
, total_size
, y_str
, file_count
,
937 file_count
!= 1 ? "s" : "");
940 ac(archive_read_close(a
));
941 (void)archive_read_finish(a
);
947 if (error_count
> 0) {
948 errorx("%ju checksum error(s) found.", error_count
);
951 printf("No errors detected in compressed data of %s.\n",
961 fprintf(stderr
, "Usage: %s [-aCcfjLlnopqtuvy] [-d dir] [-x pattern] "
962 "zipfile\n", getprogname());
967 getopts(int argc
, char *argv
[])
971 optreset
= optind
= 1;
972 while ((opt
= getopt(argc
, argv
, "aCcd:fjLlnopqtuvyx:")) != -1)
1022 add_pattern(&exclude
, optarg
);
1035 main(int argc
, char *argv
[])
1037 const char *zipfile
;
1040 if (isatty(STDOUT_FILENO
))
1043 if (getenv("UNZIP_DEBUG") != NULL
)
1045 for (int i
= 0; i
< argc
; ++i
)
1046 debug("%s%c", argv
[i
], (i
< argc
- 1) ? ' ' : '\n');
1049 * Info-ZIP's unzip(1) expects certain options to come before the
1050 * zipfile name, and others to come after - though it does not
1051 * enforce this. For simplicity, we accept *all* options both
1052 * before and after the zipfile name.
1054 nopts
= getopts(argc
, argv
);
1058 zipfile
= argv
[nopts
++];
1060 while (nopts
< argc
&& *argv
[nopts
] != '-')
1061 add_pattern(&include
, argv
[nopts
++]);
1063 nopts
--; /* fake argv[0] */
1064 nopts
+= getopts(argc
- nopts
, argv
+ nopts
);
1066 if (n_opt
+ o_opt
+ u_opt
> 1)
1067 errorx("-n, -o and -u are contradictory");