Hint added.
[AROS.git] / workbench / libs / z / examples / gun.c
blob89e484fee600190331dff67f6aa3d8bd906aaf9f
1 /* gun.c -- simple gunzip to give an example of the use of inflateBack()
2 * Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 Version 1.7 12 August 2012 Mark Adler */
6 /* Version history:
7 1.0 16 Feb 2003 First version for testing of inflateBack()
8 1.1 21 Feb 2005 Decompress concatenated gzip streams
9 Remove use of "this" variable (C++ keyword)
10 Fix return value for in()
11 Improve allocation failure checking
12 Add typecasting for void * structures
13 Add -h option for command version and usage
14 Add a bunch of comments
15 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
16 Copy file attributes from input file to output file
17 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
18 1.4 8 Dec 2006 LZW decompression speed improvements
19 1.5 9 Feb 2008 Avoid warning in latest version of gcc
20 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings
21 1.7 12 Aug 2012 Update for z_const usage in zlib 1.2.8
25 gun [ -t ] [ name ... ]
27 decompresses the data in the named gzip files. If no arguments are given,
28 gun will decompress from stdin to stdout. The names must end in .gz, -gz,
29 .z, -z, _z, or .Z. The uncompressed data will be written to a file name
30 with the suffix stripped. On success, the original file is deleted. On
31 failure, the output file is deleted. For most failures, the command will
32 continue to process the remaining names on the command line. A memory
33 allocation failure will abort the command. If -t is specified, then the
34 listed files or stdin will be tested as gzip files for integrity (without
35 checking for a proper suffix), no output will be written, and no files
36 will be deleted.
38 Like gzip, gun allows concatenated gzip streams and will decompress them,
39 writing all of the uncompressed data to the output. Unlike gzip, gun allows
40 an empty file on input, and will produce no error writing an empty output
41 file.
43 gun will also decompress files made by Unix compress, which uses LZW
44 compression. These files are automatically detected by virtue of their
45 magic header bytes. Since the end of Unix compress stream is marked by the
46 end-of-file, they cannot be concantenated. If a Unix compress stream is
47 encountered in an input file, it is the last stream in that file.
49 Like gunzip and uncompress, the file attributes of the orignal compressed
50 file are maintained in the final uncompressed file, to the extent that the
51 user permissions allow it.
53 On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
54 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
55 LZW decompression provided by gun is about twice as fast as the standard
56 Unix uncompress command.
59 /* external functions and related types and constants */
60 #include <stdio.h> /* fprintf() */
61 #include <stdlib.h> /* malloc(), free() */
62 #include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
63 #include <errno.h> /* errno */
64 #include <fcntl.h> /* open() */
65 #include <unistd.h> /* read(), write(), close(), chown(), unlink() */
66 #include <sys/types.h>
67 #include <sys/stat.h> /* stat(), chmod() */
68 #include <utime.h> /* utime() */
69 #include "zlib.h" /* inflateBackInit(), inflateBack(), */
70 /* inflateBackEnd(), crc32() */
72 /* function declaration */
73 #define local static
75 /* buffer constants */
76 #define SIZE 32768U /* input and output buffer sizes */
77 #define PIECE 16384 /* limits i/o chunks for 16-bit int case */
79 /* structure for infback() to pass to input function in() -- it maintains the
80 input file and a buffer of size SIZE */
81 struct ind {
82 int infile;
83 unsigned char *inbuf;
86 /* Load input buffer, assumed to be empty, and return bytes loaded and a
87 pointer to them. read() is called until the buffer is full, or until it
88 returns end-of-file or error. Return 0 on error. */
89 local unsigned in(void *in_desc, z_const unsigned char **buf)
91 int ret;
92 unsigned len;
93 unsigned char *next;
94 struct ind *me = (struct ind *)in_desc;
96 next = me->inbuf;
97 *buf = next;
98 len = 0;
99 do {
100 ret = PIECE;
101 if ((unsigned)ret > SIZE - len)
102 ret = (int)(SIZE - len);
103 ret = (int)read(me->infile, next, ret);
104 if (ret == -1) {
105 len = 0;
106 break;
108 next += ret;
109 len += ret;
110 } while (ret != 0 && len < SIZE);
111 return len;
114 /* structure for infback() to pass to output function out() -- it maintains the
115 output file, a running CRC-32 check on the output and the total number of
116 bytes output, both for checking against the gzip trailer. (The length in
117 the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
118 the output is greater than 4 GB.) */
119 struct outd {
120 int outfile;
121 int check; /* true if checking crc and total */
122 unsigned long crc;
123 unsigned long total;
126 /* Write output buffer and update the CRC-32 and total bytes written. write()
127 is called until all of the output is written or an error is encountered.
128 On success out() returns 0. For a write failure, out() returns 1. If the
129 output file descriptor is -1, then nothing is written.
131 local int out(void *out_desc, unsigned char *buf, unsigned len)
133 int ret;
134 struct outd *me = (struct outd *)out_desc;
136 if (me->check) {
137 me->crc = crc32(me->crc, buf, len);
138 me->total += len;
140 if (me->outfile != -1)
141 do {
142 ret = PIECE;
143 if ((unsigned)ret > len)
144 ret = (int)len;
145 ret = (int)write(me->outfile, buf, ret);
146 if (ret == -1)
147 return 1;
148 buf += ret;
149 len -= ret;
150 } while (len != 0);
151 return 0;
154 /* next input byte macro for use inside lunpipe() and gunpipe() */
155 #define NEXT() (have ? 0 : (have = in(indp, &next)), \
156 last = have ? (have--, (int)(*next++)) : -1)
158 /* memory for gunpipe() and lunpipe() --
159 the first 256 entries of prefix[] and suffix[] are never used, could
160 have offset the index, but it's faster to waste the memory */
161 unsigned char inbuf[SIZE]; /* input buffer */
162 unsigned char outbuf[SIZE]; /* output buffer */
163 unsigned short prefix[65536]; /* index to LZW prefix string */
164 unsigned char suffix[65536]; /* one-character LZW suffix */
165 unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
166 32K sliding window */
168 /* throw out what's left in the current bits byte buffer (this is a vestigial
169 aspect of the compressed data format derived from an implementation that
170 made use of a special VAX machine instruction!) */
171 #define FLUSHCODE() \
172 do { \
173 left = 0; \
174 rem = 0; \
175 if (chunk > have) { \
176 chunk -= have; \
177 have = 0; \
178 if (NEXT() == -1) \
179 break; \
180 chunk--; \
181 if (chunk > have) { \
182 chunk = have = 0; \
183 break; \
186 have -= chunk; \
187 next += chunk; \
188 chunk = 0; \
189 } while (0)
191 /* Decompress a compress (LZW) file from indp to outfile. The compress magic
192 header (two bytes) has already been read and verified. There are have bytes
193 of buffered input at next. strm is used for passing error information back
194 to gunpipe().
196 lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
197 file, read error, or write error (a write error indicated by strm->next_in
198 not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
200 local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp,
201 int outfile, z_stream *strm)
203 int last; /* last byte read by NEXT(), or -1 if EOF */
204 unsigned chunk; /* bytes left in current chunk */
205 int left; /* bits left in rem */
206 unsigned rem; /* unused bits from input */
207 int bits; /* current bits per code */
208 unsigned code; /* code, table traversal index */
209 unsigned mask; /* mask for current bits codes */
210 int max; /* maximum bits per code for this stream */
211 unsigned flags; /* compress flags, then block compress flag */
212 unsigned end; /* last valid entry in prefix/suffix tables */
213 unsigned temp; /* current code */
214 unsigned prev; /* previous code */
215 unsigned final; /* last character written for previous code */
216 unsigned stack; /* next position for reversed string */
217 unsigned outcnt; /* bytes in output buffer */
218 struct outd outd; /* output structure */
219 unsigned char *p;
221 /* set up output */
222 outd.outfile = outfile;
223 outd.check = 0;
225 /* process remainder of compress header -- a flags byte */
226 flags = NEXT();
227 if (last == -1)
228 return Z_BUF_ERROR;
229 if (flags & 0x60) {
230 strm->msg = (char *)"unknown lzw flags set";
231 return Z_DATA_ERROR;
233 max = flags & 0x1f;
234 if (max < 9 || max > 16) {
235 strm->msg = (char *)"lzw bits out of range";
236 return Z_DATA_ERROR;
238 if (max == 9) /* 9 doesn't really mean 9 */
239 max = 10;
240 flags &= 0x80; /* true if block compress */
242 /* clear table */
243 bits = 9;
244 mask = 0x1ff;
245 end = flags ? 256 : 255;
247 /* set up: get first 9-bit code, which is the first decompressed byte, but
248 don't create a table entry until the next code */
249 if (NEXT() == -1) /* no compressed data is ok */
250 return Z_OK;
251 final = prev = (unsigned)last; /* low 8 bits of code */
252 if (NEXT() == -1) /* missing a bit */
253 return Z_BUF_ERROR;
254 if (last & 1) { /* code must be < 256 */
255 strm->msg = (char *)"invalid lzw code";
256 return Z_DATA_ERROR;
258 rem = (unsigned)last >> 1; /* remaining 7 bits */
259 left = 7;
260 chunk = bits - 2; /* 7 bytes left in this chunk */
261 outbuf[0] = (unsigned char)final; /* write first decompressed byte */
262 outcnt = 1;
264 /* decode codes */
265 stack = 0;
266 for (;;) {
267 /* if the table will be full after this, increment the code size */
268 if (end >= mask && bits < max) {
269 FLUSHCODE();
270 bits++;
271 mask <<= 1;
272 mask++;
275 /* get a code of length bits */
276 if (chunk == 0) /* decrement chunk modulo bits */
277 chunk = bits;
278 code = rem; /* low bits of code */
279 if (NEXT() == -1) { /* EOF is end of compressed data */
280 /* write remaining buffered output */
281 if (outcnt && out(&outd, outbuf, outcnt)) {
282 strm->next_in = outbuf; /* signal write error */
283 return Z_BUF_ERROR;
285 return Z_OK;
287 code += (unsigned)last << left; /* middle (or high) bits of code */
288 left += 8;
289 chunk--;
290 if (bits > left) { /* need more bits */
291 if (NEXT() == -1) /* can't end in middle of code */
292 return Z_BUF_ERROR;
293 code += (unsigned)last << left; /* high bits of code */
294 left += 8;
295 chunk--;
297 code &= mask; /* mask to current code length */
298 left -= bits; /* number of unused bits */
299 rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
301 /* process clear code (256) */
302 if (code == 256 && flags) {
303 FLUSHCODE();
304 bits = 9; /* initialize bits and mask */
305 mask = 0x1ff;
306 end = 255; /* empty table */
307 continue; /* get next code */
310 /* special code to reuse last match */
311 temp = code; /* save the current code */
312 if (code > end) {
313 /* Be picky on the allowed code here, and make sure that the code
314 we drop through (prev) will be a valid index so that random
315 input does not cause an exception. The code != end + 1 check is
316 empirically derived, and not checked in the original uncompress
317 code. If this ever causes a problem, that check could be safely
318 removed. Leaving this check in greatly improves gun's ability
319 to detect random or corrupted input after a compress header.
320 In any case, the prev > end check must be retained. */
321 if (code != end + 1 || prev > end) {
322 strm->msg = (char *)"invalid lzw code";
323 return Z_DATA_ERROR;
325 match[stack++] = (unsigned char)final;
326 code = prev;
329 /* walk through linked list to generate output in reverse order */
330 p = match + stack;
331 while (code >= 256) {
332 *p++ = suffix[code];
333 code = prefix[code];
335 stack = p - match;
336 match[stack++] = (unsigned char)code;
337 final = code;
339 /* link new table entry */
340 if (end < mask) {
341 end++;
342 prefix[end] = (unsigned short)prev;
343 suffix[end] = (unsigned char)final;
346 /* set previous code for next iteration */
347 prev = temp;
349 /* write output in forward order */
350 while (stack > SIZE - outcnt) {
351 while (outcnt < SIZE)
352 outbuf[outcnt++] = match[--stack];
353 if (out(&outd, outbuf, outcnt)) {
354 strm->next_in = outbuf; /* signal write error */
355 return Z_BUF_ERROR;
357 outcnt = 0;
359 p = match + stack;
360 do {
361 outbuf[outcnt++] = *--p;
362 } while (p > match);
363 stack = 0;
365 /* loop for next code with final and prev as the last match, rem and
366 left provide the first 0..7 bits of the next code, end is the last
367 valid table entry */
371 /* Decompress a gzip file from infile to outfile. strm is assumed to have been
372 successfully initialized with inflateBackInit(). The input file may consist
373 of a series of gzip streams, in which case all of them will be decompressed
374 to the output file. If outfile is -1, then the gzip stream(s) integrity is
375 checked and nothing is written.
377 The return value is a zlib error code: Z_MEM_ERROR if out of memory,
378 Z_DATA_ERROR if the header or the compressed data is invalid, or if the
379 trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
380 prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
381 stream) follows a valid gzip stream.
383 local int gunpipe(z_stream *strm, int infile, int outfile)
385 int ret, first, last;
386 unsigned have, flags, len;
387 z_const unsigned char *next = NULL;
388 struct ind ind, *indp;
389 struct outd outd;
391 /* setup input buffer */
392 ind.infile = infile;
393 ind.inbuf = inbuf;
394 indp = &ind;
396 /* decompress concatenated gzip streams */
397 have = 0; /* no input data read in yet */
398 first = 1; /* looking for first gzip header */
399 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
400 for (;;) {
401 /* look for the two magic header bytes for a gzip stream */
402 if (NEXT() == -1) {
403 ret = Z_OK;
404 break; /* empty gzip stream is ok */
406 if (last != 31 || (NEXT() != 139 && last != 157)) {
407 strm->msg = (char *)"incorrect header check";
408 ret = first ? Z_DATA_ERROR : Z_ERRNO;
409 break; /* not a gzip or compress header */
411 first = 0; /* next non-header is junk */
413 /* process a compress (LZW) file -- can't be concatenated after this */
414 if (last == 157) {
415 ret = lunpipe(have, next, indp, outfile, strm);
416 break;
419 /* process remainder of gzip header */
420 ret = Z_BUF_ERROR;
421 if (NEXT() != 8) { /* only deflate method allowed */
422 if (last == -1) break;
423 strm->msg = (char *)"unknown compression method";
424 ret = Z_DATA_ERROR;
425 break;
427 flags = NEXT(); /* header flags */
428 NEXT(); /* discard mod time, xflgs, os */
429 NEXT();
430 NEXT();
431 NEXT();
432 NEXT();
433 NEXT();
434 if (last == -1) break;
435 if (flags & 0xe0) {
436 strm->msg = (char *)"unknown header flags set";
437 ret = Z_DATA_ERROR;
438 break;
440 if (flags & 4) { /* extra field */
441 len = NEXT();
442 len += (unsigned)(NEXT()) << 8;
443 if (last == -1) break;
444 while (len > have) {
445 len -= have;
446 have = 0;
447 if (NEXT() == -1) break;
448 len--;
450 if (last == -1) break;
451 have -= len;
452 next += len;
454 if (flags & 8) /* file name */
455 while (NEXT() != 0 && last != -1)
457 if (flags & 16) /* comment */
458 while (NEXT() != 0 && last != -1)
460 if (flags & 2) { /* header crc */
461 NEXT();
462 NEXT();
464 if (last == -1) break;
466 /* set up output */
467 outd.outfile = outfile;
468 outd.check = 1;
469 outd.crc = crc32(0L, Z_NULL, 0);
470 outd.total = 0;
472 /* decompress data to output */
473 strm->next_in = next;
474 strm->avail_in = have;
475 ret = inflateBack(strm, in, indp, out, &outd);
476 if (ret != Z_STREAM_END) break;
477 next = strm->next_in;
478 have = strm->avail_in;
479 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
481 /* check trailer */
482 ret = Z_BUF_ERROR;
483 if (NEXT() != (int)(outd.crc & 0xff) ||
484 NEXT() != (int)((outd.crc >> 8) & 0xff) ||
485 NEXT() != (int)((outd.crc >> 16) & 0xff) ||
486 NEXT() != (int)((outd.crc >> 24) & 0xff)) {
487 /* crc error */
488 if (last != -1) {
489 strm->msg = (char *)"incorrect data check";
490 ret = Z_DATA_ERROR;
492 break;
494 if (NEXT() != (int)(outd.total & 0xff) ||
495 NEXT() != (int)((outd.total >> 8) & 0xff) ||
496 NEXT() != (int)((outd.total >> 16) & 0xff) ||
497 NEXT() != (int)((outd.total >> 24) & 0xff)) {
498 /* length error */
499 if (last != -1) {
500 strm->msg = (char *)"incorrect length check";
501 ret = Z_DATA_ERROR;
503 break;
506 /* go back and look for another gzip stream */
509 /* clean up and return */
510 return ret;
513 /* Copy file attributes, from -> to, as best we can. This is best effort, so
514 no errors are reported. The mode bits, including suid, sgid, and the sticky
515 bit are copied (if allowed), the owner's user id and group id are copied
516 (again if allowed), and the access and modify times are copied. */
517 local void copymeta(char *from, char *to)
519 struct stat was;
520 struct utimbuf when;
522 /* get all of from's Unix meta data, return if not a regular file */
523 if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
524 return;
526 /* set to's mode bits, ignore errors */
527 (void)chmod(to, was.st_mode & 07777);
529 /* copy owner's user and group, ignore errors */
530 (void)chown(to, was.st_uid, was.st_gid);
532 /* copy access and modify times, ignore errors */
533 when.actime = was.st_atime;
534 when.modtime = was.st_mtime;
535 (void)utime(to, &when);
538 /* Decompress the file inname to the file outnname, of if test is true, just
539 decompress without writing and check the gzip trailer for integrity. If
540 inname is NULL or an empty string, read from stdin. If outname is NULL or
541 an empty string, write to stdout. strm is a pre-initialized inflateBack
542 structure. When appropriate, copy the file attributes from inname to
543 outname.
545 gunzip() returns 1 if there is an out-of-memory error or an unexpected
546 return code from gunpipe(). Otherwise it returns 0.
548 local int gunzip(z_stream *strm, char *inname, char *outname, int test)
550 int ret;
551 int infile, outfile;
553 /* open files */
554 if (inname == NULL || *inname == 0) {
555 inname = "-";
556 infile = 0; /* stdin */
558 else {
559 infile = open(inname, O_RDONLY, 0);
560 if (infile == -1) {
561 fprintf(stderr, "gun cannot open %s\n", inname);
562 return 0;
565 if (test)
566 outfile = -1;
567 else if (outname == NULL || *outname == 0) {
568 outname = "-";
569 outfile = 1; /* stdout */
571 else {
572 outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
573 if (outfile == -1) {
574 close(infile);
575 fprintf(stderr, "gun cannot create %s\n", outname);
576 return 0;
579 errno = 0;
581 /* decompress */
582 ret = gunpipe(strm, infile, outfile);
583 if (outfile > 2) close(outfile);
584 if (infile > 2) close(infile);
586 /* interpret result */
587 switch (ret) {
588 case Z_OK:
589 case Z_ERRNO:
590 if (infile > 2 && outfile > 2) {
591 copymeta(inname, outname); /* copy attributes */
592 unlink(inname);
594 if (ret == Z_ERRNO)
595 fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
596 inname);
597 break;
598 case Z_DATA_ERROR:
599 if (outfile > 2) unlink(outname);
600 fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
601 break;
602 case Z_MEM_ERROR:
603 if (outfile > 2) unlink(outname);
604 fprintf(stderr, "gun out of memory error--aborting\n");
605 return 1;
606 case Z_BUF_ERROR:
607 if (outfile > 2) unlink(outname);
608 if (strm->next_in != Z_NULL) {
609 fprintf(stderr, "gun write error on %s: %s\n",
610 outname, strerror(errno));
612 else if (errno) {
613 fprintf(stderr, "gun read error on %s: %s\n",
614 inname, strerror(errno));
616 else {
617 fprintf(stderr, "gun unexpected end of file on %s\n",
618 inname);
620 break;
621 default:
622 if (outfile > 2) unlink(outname);
623 fprintf(stderr, "gun internal error--aborting\n");
624 return 1;
626 return 0;
629 /* Process the gun command line arguments. See the command syntax near the
630 beginning of this source file. */
631 int main(int argc, char **argv)
633 int ret, len, test;
634 char *outname;
635 unsigned char *window;
636 z_stream strm;
638 /* initialize inflateBack state for repeated use */
639 window = match; /* reuse LZW match buffer */
640 strm.zalloc = Z_NULL;
641 strm.zfree = Z_NULL;
642 strm.opaque = Z_NULL;
643 ret = inflateBackInit(&strm, 15, window);
644 if (ret != Z_OK) {
645 fprintf(stderr, "gun out of memory error--aborting\n");
646 return 1;
649 /* decompress each file to the same name with the suffix removed */
650 argc--;
651 argv++;
652 test = 0;
653 if (argc && strcmp(*argv, "-h") == 0) {
654 fprintf(stderr, "gun 1.6 (17 Jan 2010)\n");
655 fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n");
656 fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
657 return 0;
659 if (argc && strcmp(*argv, "-t") == 0) {
660 test = 1;
661 argc--;
662 argv++;
664 if (argc)
665 do {
666 if (test)
667 outname = NULL;
668 else {
669 len = (int)strlen(*argv);
670 if (strcmp(*argv + len - 3, ".gz") == 0 ||
671 strcmp(*argv + len - 3, "-gz") == 0)
672 len -= 3;
673 else if (strcmp(*argv + len - 2, ".z") == 0 ||
674 strcmp(*argv + len - 2, "-z") == 0 ||
675 strcmp(*argv + len - 2, "_z") == 0 ||
676 strcmp(*argv + len - 2, ".Z") == 0)
677 len -= 2;
678 else {
679 fprintf(stderr, "gun error: no gz type on %s--skipping\n",
680 *argv);
681 continue;
683 outname = malloc(len + 1);
684 if (outname == NULL) {
685 fprintf(stderr, "gun out of memory error--aborting\n");
686 ret = 1;
687 break;
689 memcpy(outname, *argv, len);
690 outname[len] = 0;
692 ret = gunzip(&strm, *argv, outname, test);
693 if (outname != NULL) free(outname);
694 if (ret) break;
695 } while (argv++, --argc);
696 else
697 ret = gunzip(&strm, NULL, NULL, test);
699 /* clean up */
700 inflateBackEnd(&strm);
701 return ret;