Sync usage with man page.
[netbsd-mini2440.git] / common / dist / zlib / examples / gun.c
blob5c27c54e3c82a18b6d523421e6cce2fd1448f557
1 /* $NetBSD$ */
3 /* gun.c -- simple gunzip to give an example of the use of inflateBack()
4 * Copyright (C) 2003, 2005 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
6 Version 1.3 12 June 2005 Mark Adler */
8 /* Version history:
9 1.0 16 Feb 2003 First version for testing of inflateBack()
10 1.1 21 Feb 2005 Decompress concatenated gzip streams
11 Remove use of "this" variable (C++ keyword)
12 Fix return value for in()
13 Improve allocation failure checking
14 Add typecasting for void * structures
15 Add -h option for command version and usage
16 Add a bunch of comments
17 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
18 Copy file attributes from input file to output file
19 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
23 gun [ -t ] [ name ... ]
25 decompresses the data in the named gzip files. If no arguments are given,
26 gun will decompress from stdin to stdout. The names must end in .gz, -gz,
27 .z, -z, _z, or .Z. The uncompressed data will be written to a file name
28 with the suffix stripped. On success, the original file is deleted. On
29 failure, the output file is deleted. For most failures, the command will
30 continue to process the remaining names on the command line. A memory
31 allocation failure will abort the command. If -t is specified, then the
32 listed files or stdin will be tested as gzip files for integrity (without
33 checking for a proper suffix), no output will be written, and no files
34 will be deleted.
36 Like gzip, gun allows concatenated gzip streams and will decompress them,
37 writing all of the uncompressed data to the output. Unlike gzip, gun allows
38 an empty file on input, and will produce no error writing an empty output
39 file.
41 gun will also decompress files made by Unix compress, which uses LZW
42 compression. These files are automatically detected by virtue of their
43 magic header bytes. Since the end of Unix compress stream is marked by the
44 end-of-file, they cannot be concantenated. If a Unix compress stream is
45 encountered in an input file, it is the last stream in that file.
47 Like gunzip and uncompress, the file attributes of the orignal compressed
48 file are maintained in the final uncompressed file, to the extent that the
49 user permissions allow it.
51 On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
52 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
53 LZW decompression provided by gun is about twice as fast as the standard
54 Unix uncompress command.
57 /* external functions and related types and constants */
58 #include <stdio.h> /* fprintf() */
59 #include <stdlib.h> /* malloc(), free() */
60 #include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
61 #include <errno.h> /* errno */
62 #include <fcntl.h> /* open() */
63 #include <unistd.h> /* read(), write(), close(), chown(), unlink() */
64 #include <sys/types.h>
65 #include <sys/stat.h> /* stat(), chmod() */
66 #include <utime.h> /* utime() */
67 #include "zlib.h" /* inflateBackInit(), inflateBack(), */
68 /* inflateBackEnd(), crc32() */
70 /* function declaration */
71 #define local static
73 /* buffer constants */
74 #define SIZE 32768U /* input and output buffer sizes */
75 #define PIECE 16384 /* limits i/o chunks for 16-bit int case */
77 /* structure for infback() to pass to input function in() -- it maintains the
78 input file and a buffer of size SIZE */
79 struct ind {
80 int infile;
81 unsigned char *inbuf;
84 /* Load input buffer, assumed to be empty, and return bytes loaded and a
85 pointer to them. read() is called until the buffer is full, or until it
86 returns end-of-file or error. Return 0 on error. */
87 local unsigned in(void *in_desc, unsigned char **buf)
89 int ret;
90 unsigned len;
91 unsigned char *next;
92 struct ind *me = (struct ind *)in_desc;
94 next = me->inbuf;
95 *buf = next;
96 len = 0;
97 do {
98 ret = PIECE;
99 if ((unsigned)ret > SIZE - len)
100 ret = (int)(SIZE - len);
101 ret = (int)read(me->infile, next, ret);
102 if (ret == -1) {
103 len = 0;
104 break;
106 next += ret;
107 len += ret;
108 } while (ret != 0 && len < SIZE);
109 return len;
112 /* structure for infback() to pass to output function out() -- it maintains the
113 output file, a running CRC-32 check on the output and the total number of
114 bytes output, both for checking against the gzip trailer. (The length in
115 the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
116 the output is greater than 4 GB.) */
117 struct outd {
118 int outfile;
119 int check; /* true if checking crc and total */
120 unsigned long crc;
121 unsigned long total;
124 /* Write output buffer and update the CRC-32 and total bytes written. write()
125 is called until all of the output is written or an error is encountered.
126 On success out() returns 0. For a write failure, out() returns 1. If the
127 output file descriptor is -1, then nothing is written.
129 local int out(void *out_desc, unsigned char *buf, unsigned len)
131 int ret;
132 struct outd *me = (struct outd *)out_desc;
134 if (me->check) {
135 me->crc = crc32(me->crc, buf, len);
136 me->total += len;
138 if (me->outfile != -1)
139 do {
140 ret = PIECE;
141 if ((unsigned)ret > len)
142 ret = (int)len;
143 ret = (int)write(me->outfile, buf, ret);
144 if (ret == -1)
145 return 1;
146 buf += ret;
147 len -= ret;
148 } while (len != 0);
149 return 0;
152 /* next input byte macro for use inside lunpipe() and gunpipe() */
153 #define NEXT() (have ? 0 : (have = in(indp, &next)), \
154 last = have ? (have--, (int)(*next++)) : -1)
156 /* memory for gunpipe() and lunpipe() --
157 the first 256 entries of prefix[] and suffix[] are never used, could
158 have offset the index, but it's faster to waste the memory */
159 unsigned char inbuf[SIZE]; /* input buffer */
160 unsigned char outbuf[SIZE]; /* output buffer */
161 unsigned short prefix[65536]; /* index to LZW prefix string */
162 unsigned char suffix[65536]; /* one-character LZW suffix */
163 unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
164 32K sliding window */
166 /* throw out what's left in the current bits byte buffer (this is a vestigial
167 aspect of the compressed data format derived from an implementation that
168 made use of a special VAX machine instruction!) */
169 #define FLUSHCODE() \
170 do { \
171 left = 0; \
172 rem = 0; \
173 if (chunk > have) { \
174 chunk -= have; \
175 have = 0; \
176 if (NEXT() == -1) \
177 break; \
178 chunk--; \
179 if (chunk > have) { \
180 chunk = have = 0; \
181 break; \
184 have -= chunk; \
185 next += chunk; \
186 chunk = 0; \
187 } while (0)
189 /* Decompress a compress (LZW) file from indp to outfile. The compress magic
190 header (two bytes) has already been read and verified. There are have bytes
191 of buffered input at next. strm is used for passing error information back
192 to gunpipe().
194 lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
195 file, read error, or write error (a write error indicated by strm->next_in
196 not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
198 local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
199 int outfile, z_stream *strm)
201 int last; /* last byte read by NEXT(), or -1 if EOF */
202 int chunk; /* bytes left in current chunk */
203 int left; /* bits left in rem */
204 unsigned rem; /* unused bits from input */
205 int bits; /* current bits per code */
206 unsigned code; /* code, table traversal index */
207 unsigned mask; /* mask for current bits codes */
208 int max; /* maximum bits per code for this stream */
209 int flags; /* compress flags, then block compress flag */
210 unsigned end; /* last valid entry in prefix/suffix tables */
211 unsigned temp; /* current code */
212 unsigned prev; /* previous code */
213 unsigned final; /* last character written for previous code */
214 unsigned stack; /* next position for reversed string */
215 unsigned outcnt; /* bytes in output buffer */
216 struct outd outd; /* output structure */
218 /* set up output */
219 outd.outfile = outfile;
220 outd.check = 0;
222 /* process remainder of compress header -- a flags byte */
223 flags = NEXT();
224 if (last == -1)
225 return Z_BUF_ERROR;
226 if (flags & 0x60) {
227 strm->msg = (char *)"unknown lzw flags set";
228 return Z_DATA_ERROR;
230 max = flags & 0x1f;
231 if (max < 9 || max > 16) {
232 strm->msg = (char *)"lzw bits out of range";
233 return Z_DATA_ERROR;
235 if (max == 9) /* 9 doesn't really mean 9 */
236 max = 10;
237 flags &= 0x80; /* true if block compress */
239 /* clear table */
240 bits = 9;
241 mask = 0x1ff;
242 end = flags ? 256 : 255;
244 /* set up: get first 9-bit code, which is the first decompressed byte, but
245 don't create a table entry until the next code */
246 if (NEXT() == -1) /* no compressed data is ok */
247 return Z_OK;
248 final = prev = (unsigned)last; /* low 8 bits of code */
249 if (NEXT() == -1) /* missing a bit */
250 return Z_BUF_ERROR;
251 if (last & 1) { /* code must be < 256 */
252 strm->msg = (char *)"invalid lzw code";
253 return Z_DATA_ERROR;
255 rem = (unsigned)last >> 1; /* remaining 7 bits */
256 left = 7;
257 chunk = bits - 2; /* 7 bytes left in this chunk */
258 outbuf[0] = (unsigned char)final; /* write first decompressed byte */
259 outcnt = 1;
261 /* decode codes */
262 stack = 0;
263 for (;;) {
264 /* if the table will be full after this, increment the code size */
265 if (end >= mask && bits < max) {
266 FLUSHCODE();
267 bits++;
268 mask <<= 1;
269 mask++;
272 /* get a code of length bits */
273 if (chunk == 0) /* decrement chunk modulo bits */
274 chunk = bits;
275 code = rem; /* low bits of code */
276 if (NEXT() == -1) { /* EOF is end of compressed data */
277 /* write remaining buffered output */
278 if (outcnt && out(&outd, outbuf, outcnt)) {
279 strm->next_in = outbuf; /* signal write error */
280 return Z_BUF_ERROR;
282 return Z_OK;
284 code += (unsigned)last << left; /* middle (or high) bits of code */
285 left += 8;
286 chunk--;
287 if (bits > left) { /* need more bits */
288 if (NEXT() == -1) /* can't end in middle of code */
289 return Z_BUF_ERROR;
290 code += (unsigned)last << left; /* high bits of code */
291 left += 8;
292 chunk--;
294 code &= mask; /* mask to current code length */
295 left -= bits; /* number of unused bits */
296 rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
298 /* process clear code (256) */
299 if (code == 256 && flags) {
300 FLUSHCODE();
301 bits = 9; /* initialize bits and mask */
302 mask = 0x1ff;
303 end = 255; /* empty table */
304 continue; /* get next code */
307 /* special code to reuse last match */
308 temp = code; /* save the current code */
309 if (code > end) {
310 /* Be picky on the allowed code here, and make sure that the code
311 we drop through (prev) will be a valid index so that random
312 input does not cause an exception. The code != end + 1 check is
313 empirically derived, and not checked in the original uncompress
314 code. If this ever causes a problem, that check could be safely
315 removed. Leaving this check in greatly improves gun's ability
316 to detect random or corrupted input after a compress header.
317 In any case, the prev > end check must be retained. */
318 if (code != end + 1 || prev > end) {
319 strm->msg = (char *)"invalid lzw code";
320 return Z_DATA_ERROR;
322 match[stack++] = (unsigned char)final;
323 code = prev;
326 /* walk through linked list to generate output in reverse order */
327 while (code >= 256) {
328 match[stack++] = suffix[code];
329 code = prefix[code];
331 match[stack++] = (unsigned char)code;
332 final = code;
334 /* link new table entry */
335 if (end < mask) {
336 end++;
337 prefix[end] = (unsigned short)prev;
338 suffix[end] = (unsigned char)final;
341 /* set previous code for next iteration */
342 prev = temp;
344 /* write output in forward order */
345 while (stack > SIZE - outcnt) {
346 while (outcnt < SIZE)
347 outbuf[outcnt++] = match[--stack];
348 if (out(&outd, outbuf, outcnt)) {
349 strm->next_in = outbuf; /* signal write error */
350 return Z_BUF_ERROR;
352 outcnt = 0;
354 do {
355 outbuf[outcnt++] = match[--stack];
356 } while (stack);
358 /* loop for next code with final and prev as the last match, rem and
359 left provide the first 0..7 bits of the next code, end is the last
360 valid table entry */
364 /* Decompress a gzip file from infile to outfile. strm is assumed to have been
365 successfully initialized with inflateBackInit(). The input file may consist
366 of a series of gzip streams, in which case all of them will be decompressed
367 to the output file. If outfile is -1, then the gzip stream(s) integrity is
368 checked and nothing is written.
370 The return value is a zlib error code: Z_MEM_ERROR if out of memory,
371 Z_DATA_ERROR if the header or the compressed data is invalid, or if the
372 trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
373 prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
374 stream) follows a valid gzip stream.
376 local int gunpipe(z_stream *strm, int infile, int outfile)
378 int ret, first, last;
379 unsigned have, flags, len;
380 unsigned char *next;
381 struct ind ind, *indp;
382 struct outd outd;
384 /* setup input buffer */
385 ind.infile = infile;
386 ind.inbuf = inbuf;
387 indp = &ind;
389 /* decompress concatenated gzip streams */
390 have = 0; /* no input data read in yet */
391 first = 1; /* looking for first gzip header */
392 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
393 for (;;) {
394 /* look for the two magic header bytes for a gzip stream */
395 if (NEXT() == -1) {
396 ret = Z_OK;
397 break; /* empty gzip stream is ok */
399 if (last != 31 || (NEXT() != 139 && last != 157)) {
400 strm->msg = (char *)"incorrect header check";
401 ret = first ? Z_DATA_ERROR : Z_ERRNO;
402 break; /* not a gzip or compress header */
404 first = 0; /* next non-header is junk */
406 /* process a compress (LZW) file -- can't be concatenated after this */
407 if (last == 157) {
408 ret = lunpipe(have, next, indp, outfile, strm);
409 break;
412 /* process remainder of gzip header */
413 ret = Z_BUF_ERROR;
414 if (NEXT() != 8) { /* only deflate method allowed */
415 if (last == -1) break;
416 strm->msg = (char *)"unknown compression method";
417 ret = Z_DATA_ERROR;
418 break;
420 flags = NEXT(); /* header flags */
421 NEXT(); /* discard mod time, xflgs, os */
422 NEXT();
423 NEXT();
424 NEXT();
425 NEXT();
426 NEXT();
427 if (last == -1) break;
428 if (flags & 0xe0) {
429 strm->msg = (char *)"unknown header flags set";
430 ret = Z_DATA_ERROR;
431 break;
433 if (flags & 4) { /* extra field */
434 len = NEXT();
435 len += (unsigned)(NEXT()) << 8;
436 if (last == -1) break;
437 while (len > have) {
438 len -= have;
439 have = 0;
440 if (NEXT() == -1) break;
441 len--;
443 if (last == -1) break;
444 have -= len;
445 next += len;
447 if (flags & 8) /* file name */
448 while (NEXT() != 0 && last != -1)
450 if (flags & 16) /* comment */
451 while (NEXT() != 0 && last != -1)
453 if (flags & 2) { /* header crc */
454 NEXT();
455 NEXT();
457 if (last == -1) break;
459 /* set up output */
460 outd.outfile = outfile;
461 outd.check = 1;
462 outd.crc = crc32(0L, Z_NULL, 0);
463 outd.total = 0;
465 /* decompress data to output */
466 strm->next_in = next;
467 strm->avail_in = have;
468 ret = inflateBack(strm, in, indp, out, &outd);
469 if (ret != Z_STREAM_END) break;
470 next = strm->next_in;
471 have = strm->avail_in;
472 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
474 /* check trailer */
475 ret = Z_BUF_ERROR;
476 if (NEXT() != (outd.crc & 0xff) ||
477 NEXT() != ((outd.crc >> 8) & 0xff) ||
478 NEXT() != ((outd.crc >> 16) & 0xff) ||
479 NEXT() != ((outd.crc >> 24) & 0xff)) {
480 /* crc error */
481 if (last != -1) {
482 strm->msg = (char *)"incorrect data check";
483 ret = Z_DATA_ERROR;
485 break;
487 if (NEXT() != (outd.total & 0xff) ||
488 NEXT() != ((outd.total >> 8) & 0xff) ||
489 NEXT() != ((outd.total >> 16) & 0xff) ||
490 NEXT() != ((outd.total >> 24) & 0xff)) {
491 /* length error */
492 if (last != -1) {
493 strm->msg = (char *)"incorrect length check";
494 ret = Z_DATA_ERROR;
496 break;
499 /* go back and look for another gzip stream */
502 /* clean up and return */
503 return ret;
506 /* Copy file attributes, from -> to, as best we can. This is best effort, so
507 no errors are reported. The mode bits, including suid, sgid, and the sticky
508 bit are copied (if allowed), the owner's user id and group id are copied
509 (again if allowed), and the access and modify times are copied. */
510 local void copymeta(char *from, char *to)
512 struct stat was;
513 struct utimbuf when;
515 /* get all of from's Unix meta data, return if not a regular file */
516 if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
517 return;
519 /* set to's mode bits, ignore errors */
520 (void)chmod(to, was.st_mode & 07777);
522 /* copy owner's user and group, ignore errors */
523 (void)chown(to, was.st_uid, was.st_gid);
525 /* copy access and modify times, ignore errors */
526 when.actime = was.st_atime;
527 when.modtime = was.st_mtime;
528 (void)utime(to, &when);
531 /* Decompress the file inname to the file outnname, of if test is true, just
532 decompress without writing and check the gzip trailer for integrity. If
533 inname is NULL or an empty string, read from stdin. If outname is NULL or
534 an empty string, write to stdout. strm is a pre-initialized inflateBack
535 structure. When appropriate, copy the file attributes from inname to
536 outname.
538 gunzip() returns 1 if there is an out-of-memory error or an unexpected
539 return code from gunpipe(). Otherwise it returns 0.
541 local int gunzip(z_stream *strm, char *inname, char *outname, int test)
543 int ret;
544 int infile, outfile;
546 /* open files */
547 if (inname == NULL || *inname == 0) {
548 inname = "-";
549 infile = 0; /* stdin */
551 else {
552 infile = open(inname, O_RDONLY, 0);
553 if (infile == -1) {
554 fprintf(stderr, "gun cannot open %s\n", inname);
555 return 0;
558 if (test)
559 outfile = -1;
560 else if (outname == NULL || *outname == 0) {
561 outname = "-";
562 outfile = 1; /* stdout */
564 else {
565 outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
566 if (outfile == -1) {
567 close(infile);
568 fprintf(stderr, "gun cannot create %s\n", outname);
569 return 0;
572 errno = 0;
574 /* decompress */
575 ret = gunpipe(strm, infile, outfile);
576 if (outfile > 2) close(outfile);
577 if (infile > 2) close(infile);
579 /* interpret result */
580 switch (ret) {
581 case Z_OK:
582 case Z_ERRNO:
583 if (infile > 2 && outfile > 2) {
584 copymeta(inname, outname); /* copy attributes */
585 unlink(inname);
587 if (ret == Z_ERRNO)
588 fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
589 inname);
590 break;
591 case Z_DATA_ERROR:
592 if (outfile > 2) unlink(outname);
593 fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
594 break;
595 case Z_MEM_ERROR:
596 if (outfile > 2) unlink(outname);
597 fprintf(stderr, "gun out of memory error--aborting\n");
598 return 1;
599 case Z_BUF_ERROR:
600 if (outfile > 2) unlink(outname);
601 if (strm->next_in != Z_NULL) {
602 fprintf(stderr, "gun write error on %s: %s\n",
603 outname, strerror(errno));
605 else if (errno) {
606 fprintf(stderr, "gun read error on %s: %s\n",
607 inname, strerror(errno));
609 else {
610 fprintf(stderr, "gun unexpected end of file on %s\n",
611 inname);
613 break;
614 default:
615 if (outfile > 2) unlink(outname);
616 fprintf(stderr, "gun internal error--aborting\n");
617 return 1;
619 return 0;
622 /* Process the gun command line arguments. See the command syntax near the
623 beginning of this source file. */
624 int main(int argc, char **argv)
626 int ret, len, test;
627 char *outname;
628 unsigned char *window;
629 z_stream strm;
631 /* initialize inflateBack state for repeated use */
632 window = match; /* reuse LZW match buffer */
633 strm.zalloc = Z_NULL;
634 strm.zfree = Z_NULL;
635 strm.opaque = Z_NULL;
636 ret = inflateBackInit(&strm, 15, window);
637 if (ret != Z_OK) {
638 fprintf(stderr, "gun out of memory error--aborting\n");
639 return 1;
642 /* decompress each file to the same name with the suffix removed */
643 argc--;
644 argv++;
645 test = 0;
646 if (argc && strcmp(*argv, "-h") == 0) {
647 fprintf(stderr, "gun 1.3 (12 Jun 2005)\n");
648 fprintf(stderr, "Copyright (c) 2005 Mark Adler\n");
649 fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
650 return 0;
652 if (argc && strcmp(*argv, "-t") == 0) {
653 test = 1;
654 argc--;
655 argv++;
657 if (argc)
658 do {
659 if (test)
660 outname = NULL;
661 else {
662 len = (int)strlen(*argv);
663 if (strcmp(*argv + len - 3, ".gz") == 0 ||
664 strcmp(*argv + len - 3, "-gz") == 0)
665 len -= 3;
666 else if (strcmp(*argv + len - 2, ".z") == 0 ||
667 strcmp(*argv + len - 2, "-z") == 0 ||
668 strcmp(*argv + len - 2, "_z") == 0 ||
669 strcmp(*argv + len - 2, ".Z") == 0)
670 len -= 2;
671 else {
672 fprintf(stderr, "gun error: no gz type on %s--skipping\n",
673 *argv);
674 continue;
676 outname = malloc(len + 1);
677 if (outname == NULL) {
678 fprintf(stderr, "gun out of memory error--aborting\n");
679 ret = 1;
680 break;
682 memcpy(outname, *argv, len);
683 outname[len] = 0;
685 ret = gunzip(&strm, *argv, outname, test);
686 if (outname != NULL) free(outname);
687 if (ret) break;
688 } while (argv++, --argc);
689 else
690 ret = gunzip(&strm, NULL, NULL, test);
692 /* clean up */
693 inflateBackEnd(&strm);
694 return ret;