3 /* gun.c -- simple gunzip to give an example of the use of inflateBack()
4 * Copyright (C) 2003, 2005 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
6 Version 1.3 12 June 2005 Mark Adler */
9 1.0 16 Feb 2003 First version for testing of inflateBack()
10 1.1 21 Feb 2005 Decompress concatenated gzip streams
11 Remove use of "this" variable (C++ keyword)
12 Fix return value for in()
13 Improve allocation failure checking
14 Add typecasting for void * structures
15 Add -h option for command version and usage
16 Add a bunch of comments
17 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
18 Copy file attributes from input file to output file
19 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
23 gun [ -t ] [ name ... ]
25 decompresses the data in the named gzip files. If no arguments are given,
26 gun will decompress from stdin to stdout. The names must end in .gz, -gz,
27 .z, -z, _z, or .Z. The uncompressed data will be written to a file name
28 with the suffix stripped. On success, the original file is deleted. On
29 failure, the output file is deleted. For most failures, the command will
30 continue to process the remaining names on the command line. A memory
31 allocation failure will abort the command. If -t is specified, then the
32 listed files or stdin will be tested as gzip files for integrity (without
33 checking for a proper suffix), no output will be written, and no files
36 Like gzip, gun allows concatenated gzip streams and will decompress them,
37 writing all of the uncompressed data to the output. Unlike gzip, gun allows
38 an empty file on input, and will produce no error writing an empty output
41 gun will also decompress files made by Unix compress, which uses LZW
42 compression. These files are automatically detected by virtue of their
43 magic header bytes. Since the end of Unix compress stream is marked by the
44 end-of-file, they cannot be concantenated. If a Unix compress stream is
45 encountered in an input file, it is the last stream in that file.
47 Like gunzip and uncompress, the file attributes of the orignal compressed
48 file are maintained in the final uncompressed file, to the extent that the
49 user permissions allow it.
51 On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
52 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
53 LZW decompression provided by gun is about twice as fast as the standard
54 Unix uncompress command.
57 /* external functions and related types and constants */
58 #include <stdio.h> /* fprintf() */
59 #include <stdlib.h> /* malloc(), free() */
60 #include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
61 #include <errno.h> /* errno */
62 #include <fcntl.h> /* open() */
63 #include <unistd.h> /* read(), write(), close(), chown(), unlink() */
64 #include <sys/types.h>
65 #include <sys/stat.h> /* stat(), chmod() */
66 #include <utime.h> /* utime() */
67 #include "zlib.h" /* inflateBackInit(), inflateBack(), */
68 /* inflateBackEnd(), crc32() */
70 /* function declaration */
73 /* buffer constants */
74 #define SIZE 32768U /* input and output buffer sizes */
75 #define PIECE 16384 /* limits i/o chunks for 16-bit int case */
77 /* structure for infback() to pass to input function in() -- it maintains the
78 input file and a buffer of size SIZE */
84 /* Load input buffer, assumed to be empty, and return bytes loaded and a
85 pointer to them. read() is called until the buffer is full, or until it
86 returns end-of-file or error. Return 0 on error. */
87 local
unsigned in(void *in_desc
, unsigned char **buf
)
92 struct ind
*me
= (struct ind
*)in_desc
;
99 if ((unsigned)ret
> SIZE
- len
)
100 ret
= (int)(SIZE
- len
);
101 ret
= (int)read(me
->infile
, next
, ret
);
108 } while (ret
!= 0 && len
< SIZE
);
112 /* structure for infback() to pass to output function out() -- it maintains the
113 output file, a running CRC-32 check on the output and the total number of
114 bytes output, both for checking against the gzip trailer. (The length in
115 the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
116 the output is greater than 4 GB.) */
119 int check
; /* true if checking crc and total */
124 /* Write output buffer and update the CRC-32 and total bytes written. write()
125 is called until all of the output is written or an error is encountered.
126 On success out() returns 0. For a write failure, out() returns 1. If the
127 output file descriptor is -1, then nothing is written.
129 local
int out(void *out_desc
, unsigned char *buf
, unsigned len
)
132 struct outd
*me
= (struct outd
*)out_desc
;
135 me
->crc
= crc32(me
->crc
, buf
, len
);
138 if (me
->outfile
!= -1)
141 if ((unsigned)ret
> len
)
143 ret
= (int)write(me
->outfile
, buf
, ret
);
152 /* next input byte macro for use inside lunpipe() and gunpipe() */
153 #define NEXT() (have ? 0 : (have = in(indp, &next)), \
154 last = have ? (have--, (int)(*next++)) : -1)
156 /* memory for gunpipe() and lunpipe() --
157 the first 256 entries of prefix[] and suffix[] are never used, could
158 have offset the index, but it's faster to waste the memory */
159 unsigned char inbuf
[SIZE
]; /* input buffer */
160 unsigned char outbuf
[SIZE
]; /* output buffer */
161 unsigned short prefix
[65536]; /* index to LZW prefix string */
162 unsigned char suffix
[65536]; /* one-character LZW suffix */
163 unsigned char match
[65280 + 2]; /* buffer for reversed match or gzip
164 32K sliding window */
166 /* throw out what's left in the current bits byte buffer (this is a vestigial
167 aspect of the compressed data format derived from an implementation that
168 made use of a special VAX machine instruction!) */
169 #define FLUSHCODE() \
173 if (chunk > have) { \
179 if (chunk > have) { \
189 /* Decompress a compress (LZW) file from indp to outfile. The compress magic
190 header (two bytes) has already been read and verified. There are have bytes
191 of buffered input at next. strm is used for passing error information back
194 lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
195 file, read error, or write error (a write error indicated by strm->next_in
196 not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
198 local
int lunpipe(unsigned have
, unsigned char *next
, struct ind
*indp
,
199 int outfile
, z_stream
*strm
)
201 int last
; /* last byte read by NEXT(), or -1 if EOF */
202 int chunk
; /* bytes left in current chunk */
203 int left
; /* bits left in rem */
204 unsigned rem
; /* unused bits from input */
205 int bits
; /* current bits per code */
206 unsigned code
; /* code, table traversal index */
207 unsigned mask
; /* mask for current bits codes */
208 int max
; /* maximum bits per code for this stream */
209 int flags
; /* compress flags, then block compress flag */
210 unsigned end
; /* last valid entry in prefix/suffix tables */
211 unsigned temp
; /* current code */
212 unsigned prev
; /* previous code */
213 unsigned final
; /* last character written for previous code */
214 unsigned stack
; /* next position for reversed string */
215 unsigned outcnt
; /* bytes in output buffer */
216 struct outd outd
; /* output structure */
219 outd
.outfile
= outfile
;
222 /* process remainder of compress header -- a flags byte */
227 strm
->msg
= (char *)"unknown lzw flags set";
231 if (max
< 9 || max
> 16) {
232 strm
->msg
= (char *)"lzw bits out of range";
235 if (max
== 9) /* 9 doesn't really mean 9 */
237 flags
&= 0x80; /* true if block compress */
242 end
= flags
? 256 : 255;
244 /* set up: get first 9-bit code, which is the first decompressed byte, but
245 don't create a table entry until the next code */
246 if (NEXT() == -1) /* no compressed data is ok */
248 final
= prev
= (unsigned)last
; /* low 8 bits of code */
249 if (NEXT() == -1) /* missing a bit */
251 if (last
& 1) { /* code must be < 256 */
252 strm
->msg
= (char *)"invalid lzw code";
255 rem
= (unsigned)last
>> 1; /* remaining 7 bits */
257 chunk
= bits
- 2; /* 7 bytes left in this chunk */
258 outbuf
[0] = (unsigned char)final
; /* write first decompressed byte */
264 /* if the table will be full after this, increment the code size */
265 if (end
>= mask
&& bits
< max
) {
272 /* get a code of length bits */
273 if (chunk
== 0) /* decrement chunk modulo bits */
275 code
= rem
; /* low bits of code */
276 if (NEXT() == -1) { /* EOF is end of compressed data */
277 /* write remaining buffered output */
278 if (outcnt
&& out(&outd
, outbuf
, outcnt
)) {
279 strm
->next_in
= outbuf
; /* signal write error */
284 code
+= (unsigned)last
<< left
; /* middle (or high) bits of code */
287 if (bits
> left
) { /* need more bits */
288 if (NEXT() == -1) /* can't end in middle of code */
290 code
+= (unsigned)last
<< left
; /* high bits of code */
294 code
&= mask
; /* mask to current code length */
295 left
-= bits
; /* number of unused bits */
296 rem
= (unsigned)last
>> (8 - left
); /* unused bits from last byte */
298 /* process clear code (256) */
299 if (code
== 256 && flags
) {
301 bits
= 9; /* initialize bits and mask */
303 end
= 255; /* empty table */
304 continue; /* get next code */
307 /* special code to reuse last match */
308 temp
= code
; /* save the current code */
310 /* Be picky on the allowed code here, and make sure that the code
311 we drop through (prev) will be a valid index so that random
312 input does not cause an exception. The code != end + 1 check is
313 empirically derived, and not checked in the original uncompress
314 code. If this ever causes a problem, that check could be safely
315 removed. Leaving this check in greatly improves gun's ability
316 to detect random or corrupted input after a compress header.
317 In any case, the prev > end check must be retained. */
318 if (code
!= end
+ 1 || prev
> end
) {
319 strm
->msg
= (char *)"invalid lzw code";
322 match
[stack
++] = (unsigned char)final
;
326 /* walk through linked list to generate output in reverse order */
327 while (code
>= 256) {
328 match
[stack
++] = suffix
[code
];
331 match
[stack
++] = (unsigned char)code
;
334 /* link new table entry */
337 prefix
[end
] = (unsigned short)prev
;
338 suffix
[end
] = (unsigned char)final
;
341 /* set previous code for next iteration */
344 /* write output in forward order */
345 while (stack
> SIZE
- outcnt
) {
346 while (outcnt
< SIZE
)
347 outbuf
[outcnt
++] = match
[--stack
];
348 if (out(&outd
, outbuf
, outcnt
)) {
349 strm
->next_in
= outbuf
; /* signal write error */
355 outbuf
[outcnt
++] = match
[--stack
];
358 /* loop for next code with final and prev as the last match, rem and
359 left provide the first 0..7 bits of the next code, end is the last
364 /* Decompress a gzip file from infile to outfile. strm is assumed to have been
365 successfully initialized with inflateBackInit(). The input file may consist
366 of a series of gzip streams, in which case all of them will be decompressed
367 to the output file. If outfile is -1, then the gzip stream(s) integrity is
368 checked and nothing is written.
370 The return value is a zlib error code: Z_MEM_ERROR if out of memory,
371 Z_DATA_ERROR if the header or the compressed data is invalid, or if the
372 trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
373 prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
374 stream) follows a valid gzip stream.
376 local
int gunpipe(z_stream
*strm
, int infile
, int outfile
)
378 int ret
, first
, last
;
379 unsigned have
, flags
, len
;
381 struct ind ind
, *indp
;
384 /* setup input buffer */
389 /* decompress concatenated gzip streams */
390 have
= 0; /* no input data read in yet */
391 first
= 1; /* looking for first gzip header */
392 strm
->next_in
= Z_NULL
; /* so Z_BUF_ERROR means EOF */
394 /* look for the two magic header bytes for a gzip stream */
397 break; /* empty gzip stream is ok */
399 if (last
!= 31 || (NEXT() != 139 && last
!= 157)) {
400 strm
->msg
= (char *)"incorrect header check";
401 ret
= first
? Z_DATA_ERROR
: Z_ERRNO
;
402 break; /* not a gzip or compress header */
404 first
= 0; /* next non-header is junk */
406 /* process a compress (LZW) file -- can't be concatenated after this */
408 ret
= lunpipe(have
, next
, indp
, outfile
, strm
);
412 /* process remainder of gzip header */
414 if (NEXT() != 8) { /* only deflate method allowed */
415 if (last
== -1) break;
416 strm
->msg
= (char *)"unknown compression method";
420 flags
= NEXT(); /* header flags */
421 NEXT(); /* discard mod time, xflgs, os */
427 if (last
== -1) break;
429 strm
->msg
= (char *)"unknown header flags set";
433 if (flags
& 4) { /* extra field */
435 len
+= (unsigned)(NEXT()) << 8;
436 if (last
== -1) break;
440 if (NEXT() == -1) break;
443 if (last
== -1) break;
447 if (flags
& 8) /* file name */
448 while (NEXT() != 0 && last
!= -1)
450 if (flags
& 16) /* comment */
451 while (NEXT() != 0 && last
!= -1)
453 if (flags
& 2) { /* header crc */
457 if (last
== -1) break;
460 outd
.outfile
= outfile
;
462 outd
.crc
= crc32(0L, Z_NULL
, 0);
465 /* decompress data to output */
466 strm
->next_in
= next
;
467 strm
->avail_in
= have
;
468 ret
= inflateBack(strm
, in
, indp
, out
, &outd
);
469 if (ret
!= Z_STREAM_END
) break;
470 next
= strm
->next_in
;
471 have
= strm
->avail_in
;
472 strm
->next_in
= Z_NULL
; /* so Z_BUF_ERROR means EOF */
476 if (NEXT() != (outd
.crc
& 0xff) ||
477 NEXT() != ((outd
.crc
>> 8) & 0xff) ||
478 NEXT() != ((outd
.crc
>> 16) & 0xff) ||
479 NEXT() != ((outd
.crc
>> 24) & 0xff)) {
482 strm
->msg
= (char *)"incorrect data check";
487 if (NEXT() != (outd
.total
& 0xff) ||
488 NEXT() != ((outd
.total
>> 8) & 0xff) ||
489 NEXT() != ((outd
.total
>> 16) & 0xff) ||
490 NEXT() != ((outd
.total
>> 24) & 0xff)) {
493 strm
->msg
= (char *)"incorrect length check";
499 /* go back and look for another gzip stream */
502 /* clean up and return */
506 /* Copy file attributes, from -> to, as best we can. This is best effort, so
507 no errors are reported. The mode bits, including suid, sgid, and the sticky
508 bit are copied (if allowed), the owner's user id and group id are copied
509 (again if allowed), and the access and modify times are copied. */
510 local
void copymeta(char *from
, char *to
)
515 /* get all of from's Unix meta data, return if not a regular file */
516 if (stat(from
, &was
) != 0 || (was
.st_mode
& S_IFMT
) != S_IFREG
)
519 /* set to's mode bits, ignore errors */
520 (void)chmod(to
, was
.st_mode
& 07777);
522 /* copy owner's user and group, ignore errors */
523 (void)chown(to
, was
.st_uid
, was
.st_gid
);
525 /* copy access and modify times, ignore errors */
526 when
.actime
= was
.st_atime
;
527 when
.modtime
= was
.st_mtime
;
528 (void)utime(to
, &when
);
531 /* Decompress the file inname to the file outnname, of if test is true, just
532 decompress without writing and check the gzip trailer for integrity. If
533 inname is NULL or an empty string, read from stdin. If outname is NULL or
534 an empty string, write to stdout. strm is a pre-initialized inflateBack
535 structure. When appropriate, copy the file attributes from inname to
538 gunzip() returns 1 if there is an out-of-memory error or an unexpected
539 return code from gunpipe(). Otherwise it returns 0.
541 local
int gunzip(z_stream
*strm
, char *inname
, char *outname
, int test
)
547 if (inname
== NULL
|| *inname
== 0) {
549 infile
= 0; /* stdin */
552 infile
= open(inname
, O_RDONLY
, 0);
554 fprintf(stderr
, "gun cannot open %s\n", inname
);
560 else if (outname
== NULL
|| *outname
== 0) {
562 outfile
= 1; /* stdout */
565 outfile
= open(outname
, O_CREAT
| O_TRUNC
| O_WRONLY
, 0666);
568 fprintf(stderr
, "gun cannot create %s\n", outname
);
575 ret
= gunpipe(strm
, infile
, outfile
);
576 if (outfile
> 2) close(outfile
);
577 if (infile
> 2) close(infile
);
579 /* interpret result */
583 if (infile
> 2 && outfile
> 2) {
584 copymeta(inname
, outname
); /* copy attributes */
588 fprintf(stderr
, "gun warning: trailing garbage ignored in %s\n",
592 if (outfile
> 2) unlink(outname
);
593 fprintf(stderr
, "gun data error on %s: %s\n", inname
, strm
->msg
);
596 if (outfile
> 2) unlink(outname
);
597 fprintf(stderr
, "gun out of memory error--aborting\n");
600 if (outfile
> 2) unlink(outname
);
601 if (strm
->next_in
!= Z_NULL
) {
602 fprintf(stderr
, "gun write error on %s: %s\n",
603 outname
, strerror(errno
));
606 fprintf(stderr
, "gun read error on %s: %s\n",
607 inname
, strerror(errno
));
610 fprintf(stderr
, "gun unexpected end of file on %s\n",
615 if (outfile
> 2) unlink(outname
);
616 fprintf(stderr
, "gun internal error--aborting\n");
622 /* Process the gun command line arguments. See the command syntax near the
623 beginning of this source file. */
624 int main(int argc
, char **argv
)
628 unsigned char *window
;
631 /* initialize inflateBack state for repeated use */
632 window
= match
; /* reuse LZW match buffer */
633 strm
.zalloc
= Z_NULL
;
635 strm
.opaque
= Z_NULL
;
636 ret
= inflateBackInit(&strm
, 15, window
);
638 fprintf(stderr
, "gun out of memory error--aborting\n");
642 /* decompress each file to the same name with the suffix removed */
646 if (argc
&& strcmp(*argv
, "-h") == 0) {
647 fprintf(stderr
, "gun 1.3 (12 Jun 2005)\n");
648 fprintf(stderr
, "Copyright (c) 2005 Mark Adler\n");
649 fprintf(stderr
, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
652 if (argc
&& strcmp(*argv
, "-t") == 0) {
662 len
= (int)strlen(*argv
);
663 if (strcmp(*argv
+ len
- 3, ".gz") == 0 ||
664 strcmp(*argv
+ len
- 3, "-gz") == 0)
666 else if (strcmp(*argv
+ len
- 2, ".z") == 0 ||
667 strcmp(*argv
+ len
- 2, "-z") == 0 ||
668 strcmp(*argv
+ len
- 2, "_z") == 0 ||
669 strcmp(*argv
+ len
- 2, ".Z") == 0)
672 fprintf(stderr
, "gun error: no gz type on %s--skipping\n",
676 outname
= malloc(len
+ 1);
677 if (outname
== NULL
) {
678 fprintf(stderr
, "gun out of memory error--aborting\n");
682 memcpy(outname
, *argv
, len
);
685 ret
= gunzip(&strm
, *argv
, outname
, test
);
686 if (outname
!= NULL
) free(outname
);
688 } while (argv
++, --argc
);
690 ret
= gunzip(&strm
, NULL
, NULL
, test
);
693 inflateBackEnd(&strm
);