1 /* gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface
2 * Copyright (C) 1992-1993 Jean-loup Gailly
3 * The unzip code was written and put in the public domain by Mark Adler.
4 * Portions of the lzw code are derived from the public domain 'compress'
5 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
6 * Ken Turkowski, Dave Mack and Peter Jannesen.
8 * See the license_msg below and the file COPYING for the software license.
9 * See the file algorithm.doc for the compression algorithms and file formats.
12 static char *license_msg
[] = {
13 " Copyright (C) 1992-1993 Jean-loup Gailly",
14 " This program is free software; you can redistribute it and/or modify",
15 " it under the terms of the GNU General Public License as published by",
16 " the Free Software Foundation;\n"
17 " either version 2, or (at your option)",
18 " any later version.",
20 " This program is distributed in the hope that it will be useful,",
21 " but WITHOUT ANY WARRANTY; without even the implied warranty of",
22 " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the",
23 " GNU General Public License for more details.",
25 " You should have received a copy of the GNU General Public License",
26 " along with this program; if not, write to the Free Software",
27 " Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.",
32 static char rcsid
[] = "$Id: gzip.c,v 0.24 1993/06/24 10:52:07 jloup Exp $";
36 #include <sys/types.h>
61 # define O_BINARY 0 /* creation mode for open() */
63 #define RW_USER (S_IRUSR | S_IWUSR) /* creation mode for open() */
68 # define MAX_PATH_LEN 1024 /* max pathname length */
77 off_t lseek
OF((int fd
, off_t offset
, int whence
));
83 DECLARE(uch
, inbuf
, INBUFSIZ
+INBUF_EXTRA
);
84 /* DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); */
86 DECLARE(ush
, d_buf
, DIST_BUFSIZE
);
87 DECLARE(uch
, window
, 2L*WSIZE
);
88 DECLARE(ush
, tab_prefix
, 1L<<BITS
);
92 int ascii
= 0; /* convert end-of-lines to local OS conventions */
93 int to_stdout
= 0; /* output to stdout (-c) */
94 int decompress
= 0; /* decompress (-d) */
95 int force
= 0; /* don't ask questions, compress links (-f) */
96 int no_name
= -1; /* don't save or restore the original file name */
97 int no_time
= -1; /* don't save or restore the original file time */
98 int recursive
= 0; /* recurse through directories (-r) */
99 int list
= 0; /* list the file contents (-l) */
100 int verbose
= 0; /* be verbose (-v) */
101 int quiet
= 0; /* be very quiet (-q) */
102 int do_lzw
= 0; /* generate output compatible with old compress (-Z) */
103 int test
= 0; /* test .gz file integrity */
104 int foreground
; /* set if program run in foreground */
105 char progname
[] = "GUNZIP"; /* program name */
106 int maxbits
= BITS
; /* max bits per code for LZW */
107 int method
= DEFLATED
;/* compression method */
108 int level
= 6; /* compression level */
109 int exit_code
= OK
; /* program exit code */
110 int save_orig_name
; /* set if original name must be saved */
111 int last_member
; /* set for .zip and .Z files */
112 int part_nb
; /* number of parts in .gz file */
113 long time_stamp
; /* original time stamp (modification time) */
114 long ifile_size
; /* input file size, -1 for devices (debug only) */
115 char *env
; /* contents of GZIP env variable */
116 char **args
= NULL
; /* argv pointer if GZIP env variable defined */
117 char z_suffix
[MAX_SUFFIX
+1]; /* default suffix (can be set with --suffix) */
118 int z_len
; /* strlen(z_suffix) */
120 long bytes_in
; /* number of input bytes */
121 long bytes_out
; /* number of output bytes */
122 long total_in
= 0; /* input bytes for all files */
123 long total_out
= 0; /* output bytes for all files */
124 char ifname
[MAX_PATH_LEN
]; /* input file name */
125 char ofname
[MAX_PATH_LEN
]; /* output file name */
126 int remove_ofname
= 0; /* remove output file on error */
127 struct stat istat
; /* status for input file */
128 int ifd
; /* input file descriptor */
129 int ofd
; /* output file descriptor */
130 unsigned insize
; /* valid bytes in inbuf */
131 unsigned inptr
; /* index of next byte to be processed in inbuf */
132 unsigned outcnt
; /* bytes in output buffer */
133 unsigned outbuflen
; /* added for PADRE */
134 unsigned outbufptr
; /* bytes in output buffer added for PADRE */
137 struct option longopts
[] =
139 /* { name has_arg *flag val } */
140 {"ascii", 0, 0, 'a'}, /* ascii text mode */
141 {"to-stdout", 0, 0, 'c'}, /* write output on standard output */
142 {"stdout", 0, 0, 'c'}, /* write output on standard output */
143 {"decompress", 0, 0, 'd'}, /* decompress */
144 {"uncompress", 0, 0, 'd'}, /* decompress */
145 /* {"encrypt", 0, 0, 'e'}, encrypt */
146 {"force", 0, 0, 'f'}, /* force overwrite of output file */
147 {"help", 0, 0, 'h'}, /* give help */
148 /* {"pkzip", 0, 0, 'k'}, force output in pkzip format */
149 {"list", 0, 0, 'l'}, /* list .gz file contents */
150 {"license", 0, 0, 'L'}, /* display software license */
151 {"no-name", 0, 0, 'n'}, /* don't save or restore original name & time */
152 {"name", 0, 0, 'N'}, /* save or restore original name & time */
153 {"quiet", 0, 0, 'q'}, /* quiet mode */
154 {"silent", 0, 0, 'q'}, /* quiet mode */
155 {"recursive", 0, 0, 'r'}, /* recurse through directories */
156 {"suffix", 1, 0, 'S'}, /* use given suffix instead of .gz */
157 {"test", 0, 0, 't'}, /* test compressed file integrity */
158 {"no-time", 0, 0, 'T'}, /* don't save or restore the time stamp */
159 {"verbose", 0, 0, 'v'}, /* verbose mode */
160 {"version", 0, 0, 'V'}, /* display version number */
161 {"fast", 0, 0, '1'}, /* compress faster */
162 {"best", 0, 0, '9'}, /* compress better */
163 {"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */
164 {"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */
168 /* local functions */
170 /* local void usage OF((void)); */
171 /* local void help OF((void)); */
172 /* local void license OF((void)); */
173 /* local void version OF((void)); */
174 /* local void treat_stdin OF((void)); */
175 /*local void treat_file OF((char *iname)); */
176 local
void treat_file
OF((uch
*iname
, uch
*obuf
, int obuflen
));
177 /* local int create_outfile OF((void)); */
178 local
int do_stat
OF((char *name
, struct stat
*sbuf
));
179 local
char *get_suffix
OF((char *name
));
180 local
int get_istat
OF((char *iname
, struct stat
*sbuf
));
181 local
int make_ofname
OF((void));
182 local
int same_file
OF((struct stat
*stat1
, struct stat
*stat2
));
183 local
int name_too_long
OF((char *name
, struct stat
*statb
));
184 local
void shorten_name
OF((char *name
));
185 local
int get_method
OF((int in
));
186 local
void do_list
OF((int ifd
, int method
));
187 local
int check_ofname
OF((void));
188 /* local void copy_stat OF((struct stat *ifstat)); */
189 local
void do_exit
OF((int exitcode
));
190 int main
OF((int argc
, char **argv
));
191 int (*work
) OF((int infile
, int outfile
)) = unzip
; /* function to call */
194 #define strequ(s1, s2) (strcmp((s1),(s2)) == 0)
196 /* ======================================================================== */
197 int decompress_bundle(uch
*cifname
, uch
*obuf
, int obuflen
)
199 int i
, proglen
; /* length of progname */
201 proglen
= strlen(progname
);
204 /* Allocate all global buffers (for DYN_ALLOC option) */
205 ALLOC(uch
, inbuf
, INBUFSIZ
+INBUF_EXTRA
);
206 /* ALLOC(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); */
211 ALLOC(ush
, d_buf
, DIST_BUFSIZE
);
212 ALLOC(uch
, window
, 2L*WSIZE
);
213 ALLOC(ush
, tab_prefix
, 1L<<BITS
);
215 /* And get to work */
216 treat_file(cifname
, obuf
, obuflen
);
218 printf("Decompressed file has %d characters\n", outbufptr
);
219 for (i
= 0; i
< 200; i
++) if (i
>= outbufptr
) break; else putchar(outbuf
[i
]);
228 /* methods 4 to 7 reserved */
230 #define MAX_METHODS 9
232 uch
*meth_name
[] = {"STORED", "COMPRESSED", "PACKED", "LZHED", "RESERVED",
233 "RESERVED","RESERVED","RESERVED", "DEFLATED"};
235 /* ========================================================================
236 * decompress the given file
238 local
void treat_file(uch
*iname
, uch
*obuf
, int obuflen
)
241 /* Check if the input file is present, set ifname and istat: */
242 if (get_istat(iname
, &istat
) != OK
) return;
244 ifile_size
= istat
.st_size
;
246 /* Open the input file and determine compression method. The mode
247 * parameter is ignored but required by some systems (VMS) and forbidden
248 * on other systems (MacOS).
250 ifd
= OPEN(ifname
, ascii
&& !decompress
? O_RDONLY
: O_RDONLY
| O_BINARY
,
253 fprintf(stderr
, "%s: ", progname
);
258 clear_bufs(); /* clear input and output buffers */
261 method
= get_method(ifd
);
262 /*if (method >= 0 && method < MAX_METHODS)
263 printf(" (%s)\n", meth_name[method]);
264 else printf(" Compression method %d\n", method);*/
267 return; /* error message already emitted */
271 /* Actually do the compression/decompression. Loop over zipped members.
274 if ((*work
)(ifd
, ofd
) != OK
) { /* ofd is ignored in PADRE version */
275 method
= -1; /* force cleanup */
278 if (last_member
|| inptr
== insize
) break;
281 method
= get_method(ifd
);
282 if (method
< 0) break; /* error message already emitted */
283 bytes_out
= 0; /* required for length check */
290 /* ========================================================================
291 * Use lstat if available, except for -c or -f. Use stat otherwise.
292 * This allows links when not removing the original file.
294 local
int do_stat(name
, sbuf
)
299 #if (defined(S_IFLNK) || defined (S_ISLNK)) && !defined(NO_SYMLINK)
300 if (!to_stdout
&& !force
) {
301 return lstat(name
, sbuf
);
304 return stat(name
, sbuf
);
307 /* ========================================================================
308 * Return a pointer to the 'z' suffix of a file name, or NULL. For all
309 * systems, ".gz", ".z", ".Z", ".taz", ".tgz", "-gz", "-z" and "_z" are
310 * accepted suffixes, in addition to the value of the --suffix option.
311 * ".tgz" is a useful convention for tar.z files on systems limited
312 * to 3 characters extensions. On such systems, ".?z" and ".??z" are
313 * also accepted suffixes. For Unix, we do not want to accept any
314 * .??z suffix as indicating a compressed file; some people use .xyz
315 * to denote volume data.
316 * On systems allowing multiple versions of the same file (such as VMS),
317 * this function removes any version suffix in the given name.
319 local
char *get_suffix(name
)
323 char suffix
[MAX_SUFFIX
+3]; /* last chars of name, forced to lower case */
324 static char *known_suffixes
[] =
325 {z_suffix
, ".gz", ".z", ".taz", ".tgz", "-gz", "-z", "_z",
330 char **suf
= known_suffixes
;
332 if (strequ(z_suffix
, "z")) suf
++; /* check long suffixes first */
335 /* strip a version number from the file name */
337 char *v
= strrchr(name
, SUFFIX_SEP
);
338 if (v
!= NULL
) *v
= '\0';
342 if (nlen
<= MAX_SUFFIX
+2) {
343 strcpy(suffix
, name
);
345 strcpy(suffix
, name
+nlen
-MAX_SUFFIX
-2);
348 slen
= strlen(suffix
);
350 int s
= strlen(*suf
);
351 if (slen
> s
&& suffix
[slen
-s
-1] != PATH_SEP
352 && strequ(suffix
+ slen
- s
, *suf
)) {
355 } while (*++suf
!= NULL
);
361 /* ========================================================================
362 * Set ifname to the input file name (with a suffix appended if necessary)
363 * and istat to its stats. For decompression, if no file exists with the
364 * original name, try adding successively z_suffix, .gz, .z, -z and .Z.
365 * For MSDOS, we try only z_suffix and z.
366 * Return OK or ERROR.
368 local
int get_istat(iname
, sbuf
)
372 int ilen
; /* strlen(ifname) */
373 static char *suffixes
[] = {z_suffix
, ".gz", ".z", "-z", ".Z", NULL
};
374 char **suf
= suffixes
;
376 #ifdef NO_MULTIPLE_DOTS
377 char *dot
; /* pointer to ifname extension, or NULL */
380 strcpy(ifname
, iname
);
382 /* If input file exists, return OK. */
383 if (do_stat(ifname
, sbuf
) == 0) return OK
;
385 if (!decompress
|| errno
!= ENOENT
) {
390 /* file.ext doesn't exist, try adding a suffix (after removing any
391 * version number for VMS).
393 s
= get_suffix(ifname
);
395 perror(ifname
); /* ifname already has z suffix and does not exist */
399 #ifdef NO_MULTIPLE_DOTS
400 dot
= strrchr(ifname
, '.');
403 dot
= strrchr(ifname
, '.');
406 ilen
= strlen(ifname
);
407 if (strequ(z_suffix
, ".gz")) suf
++;
409 /* Search for all suffixes */
412 #ifdef NO_MULTIPLE_DOTS
416 strcpy(ifname
, iname
);
417 /* Needed if the suffixes are not sorted by increasing length */
419 if (*dot
== '\0') strcpy(dot
, ".");
420 dot
[MAX_EXT_CHARS
+1-strlen(s
)] = '\0';
423 if (do_stat(ifname
, sbuf
) == 0) return OK
;
425 } while (*++suf
!= NULL
);
427 /* No suffix found, complain using z_suffix: */
429 strcpy(ifname
, iname
);
430 if (*dot
== '\0') strcpy(dot
, ".");
431 dot
[MAX_EXT_CHARS
+1-z_len
] = '\0';
433 strcat(ifname
, z_suffix
);
440 /* ========================================================================
441 * Check the magic number of the input file and update ofname if an
442 * original name was given and to_stdout is not set.
443 * Return the compression method, -1 for error, -2 for warning.
444 * Set inptr to the offset of the next byte to be processed.
445 * Updates time_stamp if there is one and --no-time is not used.
446 * This function may be called repeatedly for an input file consisting
447 * of several contiguous gzip'ed members.
448 * IN assertions: there is at least one remaining compressed member.
449 * If the member is a zip file, it must be the only one.
451 local
int get_method(in
)
452 int in
; /* input file descriptor */
454 uch flags
; /* compression flags */
455 char magic
[2]; /* magic header */
456 ulg stamp
; /* time stamp */
459 magic
[0] = (char)get_byte();
460 magic
[1] = (char)get_byte();
461 method
= -1; /* unknown yet */
462 part_nb
++; /* number of parts in gzip file */
464 last_member
= RECORD_IO
;
465 /* assume multiple members in gzip file except for record oriented I/O */
467 if (memcmp(magic
, GZIP_MAGIC
, 2) == 0
468 || memcmp(magic
, OLD_GZIP_MAGIC
, 2) == 0) {
470 method
= (int)get_byte();
471 if (method
!= DEFLATED
) {
473 "%s: %s: unknown method %d -- get newer version of gzip\n",
474 progname
, ifname
, method
);
479 flags
= (uch
)get_byte();
481 if ((flags
& ENCRYPTED
) != 0) {
483 "%s: %s is encrypted -- get newer version of gzip\n",
488 if ((flags
& CONTINUATION
) != 0) {
490 "%s: %s is a a multi-part gzip file -- get newer version of gzip\n",
493 if (force
<= 1) return -1;
495 if ((flags
& RESERVED
) != 0) {
497 "%s: %s has flags 0x%x -- get newer version of gzip\n",
498 progname
, ifname
, flags
);
500 if (force
<= 1) return -1;
502 stamp
= (ulg
)get_byte();
503 stamp
|= ((ulg
)get_byte()) << 8;
504 stamp
|= ((ulg
)get_byte()) << 16;
505 stamp
|= ((ulg
)get_byte()) << 24;
506 if (stamp
!= 0 && !no_time
) time_stamp
= stamp
;
508 (void)get_byte(); /* Ignore extra flags for the moment */
509 (void)get_byte(); /* Ignore OS type for the moment */
511 if ((flags
& CONTINUATION
) != 0) {
512 unsigned part
= (unsigned)get_byte();
513 part
|= ((unsigned)get_byte())<<8;
515 fprintf(stderr
,"%s: %s: part number %u\n",
516 progname
, ifname
, part
);
519 if ((flags
& EXTRA_FIELD
) != 0) {
520 unsigned len
= (unsigned)get_byte();
521 len
|= ((unsigned)get_byte())<<8;
523 fprintf(stderr
,"%s: %s: extra field of %u bytes ignored\n",
524 progname
, ifname
, len
);
526 while (len
--) (void)get_byte();
529 /* Get original file name if it was truncated */
530 if ((flags
& ORIG_NAME
) != 0) {
531 /* Discard the old name */
532 char c
; /* dummy used for NeXTstep 3.0 cc optimizer bug */
533 do {c
=get_byte();} while (c
!= 0);
537 /* Discard file comment if any */
538 if ((flags
& COMMENT
) != 0) {
539 while (get_char() != 0) /* null */ ;
542 header_bytes
= inptr
+ 2*sizeof(long); /* include crc and size */
545 } else if (memcmp(magic
, PKZIP_MAGIC
, 2) == 0 && inptr
== 2
546 && memcmp((char*)inbuf
, PKZIP_MAGIC
, 4) == 0) {
547 /* To simplify the code, we support a zip file when alone only.
548 * We are thus guaranteed that the entire local header fits in inbuf.
552 if (check_zipfile(in
) != OK
) return -1;
553 /* check_zipfile may get ofname from the local header */
556 } else if (memcmp(magic
, PACK_MAGIC
, 2) == 0) {
560 } else if (memcmp(magic
, LZW_MAGIC
, 2) == 0) {
565 } else if (memcmp(magic
, LZH_MAGIC
, 2) == 0) {
570 } else { /* pass input unchanged */
571 /*fprintf(stderr, "Assuming %s is already decompressed.\n", ifname);*/
577 if (method
>= 0) return method
;
580 fprintf(stderr
, "\n%s: %s: not in gzip format\n", progname
, ifname
);
581 fprintf(stderr
, "Assuming %s is already decompressed.\n", ifname
);
588 WARN((stderr
, "\n%s: %s: decompression OK, trailing garbage ignored\n",
594 /* ========================================================================
595 * Display the characteristics of the compressed file.
596 * If the given method is < 0, display the accumulated totals.
597 * IN assertions: time_stamp, header_bytes and ifile_size are initialized.
599 local
void do_list(ifd
, method
)
600 int ifd
; /* input file descriptor */
601 int method
; /* compression method */
603 ulg crc
; /* original crc */
604 static int first_time
= 1;
605 static char* methods
[MAX_METHODS
] = {
610 "", "", "", "", /* 4 to 7 reserved */
614 if (first_time
&& method
>= 0) {
617 printf("method crc date time ");
620 printf("compressed uncompr. ratio uncompressed_name\n");
622 } else if (method
< 0) {
623 if (total_in
<= 0 || total_out
<= 0) return;
625 printf(" %9lu %9lu ",
626 total_in
, total_out
);
628 printf("%9ld %9ld ", total_in
, total_out
);
630 display_ratio(total_out
-(total_in
-header_bytes
), total_out
, stdout
);
631 /* header_bytes is not meaningful but used to ensure the same
632 * ratio if there is a single file.
634 printf(" (totals)\n");
637 crc
= (ulg
)~0; /* unknown */
639 bytes_in
= ifile_size
;
642 if (method
== DEFLATED
&& !last_member
) {
643 /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files.
644 * If the lseek fails, we could use read() to get to the end, but
645 * --list is used to get quick results.
646 * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if
647 * you are not concerned about speed.
649 bytes_in
= (long)lseek(ifd
, (off_t
)(-8), SEEK_END
);
650 if (bytes_in
!= -1L) {
653 if (read(ifd
, (char*)buf
, sizeof(buf
)) != sizeof(buf
)) {
657 bytes_out
= LG(buf
+4);
660 #endif /* RECORD_IO */
661 date
= ctime((time_t*)&time_stamp
) + 4; /* skip the day of the week */
662 date
[12] = '\0'; /* suppress the 1/100sec and the year */
664 printf("%5s %08lx %11s ", methods
[method
], crc
, date
);
666 printf("%9ld %9ld ", bytes_in
, bytes_out
);
667 if (bytes_in
== -1L) {
669 bytes_in
= bytes_out
= header_bytes
= 0;
670 } else if (total_in
>= 0) {
671 total_in
+= bytes_in
;
673 if (bytes_out
== -1L) {
675 bytes_in
= bytes_out
= header_bytes
= 0;
676 } else if (total_out
>= 0) {
677 total_out
+= bytes_out
;
679 display_ratio(bytes_out
-(bytes_in
-header_bytes
), bytes_out
, stdout
);
680 printf(" %s\n", ofname
);
683 /* ========================================================================
684 * Return true if the two stat structures correspond to the same file.
686 local
int same_file(stat1
, stat2
)
690 return stat1
->st_ino
== stat2
->st_ino
691 && stat1
->st_dev
== stat2
->st_dev
693 /* Can't rely on st_ino and st_dev, use other fields: */
694 && stat1
->st_mode
== stat2
->st_mode
695 && stat1
->st_uid
== stat2
->st_uid
696 && stat1
->st_gid
== stat2
->st_gid
697 && stat1
->st_size
== stat2
->st_size
698 && stat1
->st_atime
== stat2
->st_atime
699 && stat1
->st_mtime
== stat2
->st_mtime
700 && stat1
->st_ctime
== stat2
->st_ctime
705 /* ========================================================================
706 * Return true if a file name is ambiguous because the operating system
707 * truncates file names.
709 local
int name_too_long(name
, statb
)
710 char *name
; /* file name to check */
711 struct stat
*statb
; /* stat buf for this file name */
713 int s
= strlen(name
);
715 struct stat tstat
; /* stat for truncated name */
718 tstat
= *statb
; /* Just in case OS does not fill all fields */
720 res
= stat(name
, &tstat
) == 0 && same_file(statb
, &tstat
);
722 Trace((stderr
, " too_long(%s) => %d\n", name
, res
));
726 /* ========================================================================
727 * Shorten the given name by one character, or replace a .tar extension
728 * with .tgz. Truncate the last part of the name which is longer than
729 * MIN_PART characters: 1234.678.012.gz -> 123.678.012.gz. If the name
730 * has only parts shorter than MIN_PART truncate the longest part.
731 * For decompression, just remove the last character of the name.
733 * IN assertion: for compression, the suffix of the given name is z_suffix.
735 local
void shorten_name(name
)
742 /* ========================================================================
743 * Free all dynamically allocated variables and exit with the given code.
745 local
void do_exit(exitcode
)
748 static int in_exit
= 0;
750 if (in_exit
) exit(exitcode
);
752 if (env
!= NULL
) free(env
), env
= NULL
;
753 if (args
!= NULL
) free((char*)args
), args
= NULL
;