add ext4,vfat and tar.bz2
[u-tools.git] / u-tools / apps / busybox / archival / libunarchive / decompress_unzip.c
blobe83cd4f45254e20f807899362b33a3d5fcedc4dd
1 /* vi: set sw=4 ts=4: */
2 /*
3 * gunzip implementation for busybox
5 * Based on GNU gzip v1.2.4 Copyright (C) 1992-1993 Jean-loup Gailly.
7 * Originally adjusted for busybox by Sven Rudolph <sr1@inf.tu-dresden.de>
8 * based on gzip sources
10 * Adjusted further by Erik Andersen <andersen@codepoet.org> to support
11 * files as well as stdin/stdout, and to generally behave itself wrt
12 * command line handling.
14 * General cleanup to better adhere to the style guide and make use of standard
15 * busybox functions by Glenn McGrath
17 * read_gz interface + associated hacking by Laurence Anderson
19 * Fixed huft_build() so decoding end-of-block code does not grab more bits
20 * than necessary (this is required by unzip applet), added inflate_cleanup()
21 * to free leaked bytebuffer memory (used in unzip.c), and some minor style
22 * guide cleanups by Ed Clark
24 * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface
25 * Copyright (C) 1992-1993 Jean-loup Gailly
26 * The unzip code was written and put in the public domain by Mark Adler.
27 * Portions of the lzw code are derived from the public domain 'compress'
28 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
29 * Ken Turkowski, Dave Mack and Peter Jannesen.
31 * See the file algorithm.doc for the compression algorithms and file formats.
33 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
36 #include <setjmp.h>
37 #include "libbb.h"
38 #include "unarchive.h"
40 typedef struct huft_t {
41 unsigned char e; /* number of extra bits or operation */
42 unsigned char b; /* number of bits in this code or subcode */
43 union {
44 unsigned short n; /* literal, length base, or distance base */
45 struct huft_t *t; /* pointer to next level of table */
46 } v;
47 } huft_t;
49 enum {
50 /* gunzip_window size--must be a power of two, and
51 * at least 32K for zip's deflate method */
52 GUNZIP_WSIZE = 0x8000,
53 /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
54 BMAX = 16, /* maximum bit length of any code (16 for explode) */
55 N_MAX = 288, /* maximum number of codes in any set */
59 /* This is somewhat complex-looking arrangement, but it allows
60 * to place decompressor state either in bss or in
61 * malloc'ed space simply by changing #defines below.
62 * Sizes on i386:
63 * text data bss dec hex
64 * 5256 0 108 5364 14f4 - bss
65 * 4915 0 0 4915 1333 - malloc
67 #define STATE_IN_BSS 0
68 #define STATE_IN_MALLOC 1
71 typedef struct state_t {
72 off_t gunzip_bytes_out; /* number of output bytes */
73 uint32_t gunzip_crc;
75 int gunzip_src_fd;
76 unsigned gunzip_outbuf_count; /* bytes in output buffer */
78 unsigned char *gunzip_window;
80 uint32_t *gunzip_crc_table;
82 /* bitbuffer */
83 unsigned gunzip_bb; /* bit buffer */
84 unsigned char gunzip_bk; /* bits in bit buffer */
86 /* input (compressed) data */
87 unsigned char *bytebuffer; /* buffer itself */
88 off_t to_read; /* compressed bytes to read (unzip only, -1 for gunzip) */
89 // unsigned bytebuffer_max; /* buffer size */
90 unsigned bytebuffer_offset; /* buffer position */
91 unsigned bytebuffer_size; /* how much data is there (size <= max) */
93 /* private data of inflate_codes() */
94 unsigned inflate_codes_ml; /* masks for bl and bd bits */
95 unsigned inflate_codes_md; /* masks for bl and bd bits */
96 unsigned inflate_codes_bb; /* bit buffer */
97 unsigned inflate_codes_k; /* number of bits in bit buffer */
98 unsigned inflate_codes_w; /* current gunzip_window position */
99 huft_t *inflate_codes_tl;
100 huft_t *inflate_codes_td;
101 unsigned inflate_codes_bl;
102 unsigned inflate_codes_bd;
103 unsigned inflate_codes_nn; /* length and index for copy */
104 unsigned inflate_codes_dd;
106 smallint resume_copy;
108 /* private data of inflate_get_next_window() */
109 smallint method; /* method == -1 for stored, -2 for codes */
110 smallint need_another_block;
111 smallint end_reached;
113 /* private data of inflate_stored() */
114 unsigned inflate_stored_n;
115 unsigned inflate_stored_b;
116 unsigned inflate_stored_k;
117 unsigned inflate_stored_w;
119 const char *error_msg;
120 jmp_buf error_jmp;
121 } state_t;
122 #define gunzip_bytes_out (S()gunzip_bytes_out )
123 #define gunzip_crc (S()gunzip_crc )
124 #define gunzip_src_fd (S()gunzip_src_fd )
125 #define gunzip_outbuf_count (S()gunzip_outbuf_count)
126 #define gunzip_window (S()gunzip_window )
127 #define gunzip_crc_table (S()gunzip_crc_table )
128 #define gunzip_bb (S()gunzip_bb )
129 #define gunzip_bk (S()gunzip_bk )
130 #define to_read (S()to_read )
131 // #define bytebuffer_max (S()bytebuffer_max )
132 // Both gunzip and unzip can use constant buffer size now (16k):
133 #define bytebuffer_max 0x4000
134 #define bytebuffer (S()bytebuffer )
135 #define bytebuffer_offset (S()bytebuffer_offset )
136 #define bytebuffer_size (S()bytebuffer_size )
137 #define inflate_codes_ml (S()inflate_codes_ml )
138 #define inflate_codes_md (S()inflate_codes_md )
139 #define inflate_codes_bb (S()inflate_codes_bb )
140 #define inflate_codes_k (S()inflate_codes_k )
141 #define inflate_codes_w (S()inflate_codes_w )
142 #define inflate_codes_tl (S()inflate_codes_tl )
143 #define inflate_codes_td (S()inflate_codes_td )
144 #define inflate_codes_bl (S()inflate_codes_bl )
145 #define inflate_codes_bd (S()inflate_codes_bd )
146 #define inflate_codes_nn (S()inflate_codes_nn )
147 #define inflate_codes_dd (S()inflate_codes_dd )
148 #define resume_copy (S()resume_copy )
149 #define method (S()method )
150 #define need_another_block (S()need_another_block )
151 #define end_reached (S()end_reached )
152 #define inflate_stored_n (S()inflate_stored_n )
153 #define inflate_stored_b (S()inflate_stored_b )
154 #define inflate_stored_k (S()inflate_stored_k )
155 #define inflate_stored_w (S()inflate_stored_w )
156 #define error_msg (S()error_msg )
157 #define error_jmp (S()error_jmp )
159 /* This is a generic part */
160 #if STATE_IN_BSS /* Use global data segment */
161 #define DECLARE_STATE /*nothing*/
162 #define ALLOC_STATE /*nothing*/
163 #define DEALLOC_STATE ((void)0)
164 #define S() state.
165 #define PASS_STATE /*nothing*/
166 #define PASS_STATE_ONLY /*nothing*/
167 #define STATE_PARAM /*nothing*/
168 #define STATE_PARAM_ONLY void
169 static state_t state;
170 #endif
172 #if STATE_IN_MALLOC /* Use malloc space */
173 #define DECLARE_STATE state_t *state
174 #define ALLOC_STATE (state = xzalloc(sizeof(*state)))
175 #define DEALLOC_STATE free(state)
176 #define S() state->
177 #define PASS_STATE state,
178 #define PASS_STATE_ONLY state
179 #define STATE_PARAM state_t *state,
180 #define STATE_PARAM_ONLY state_t *state
181 #endif
184 static const uint16_t mask_bits[] ALIGN2 = {
185 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
186 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
189 /* Copy lengths for literal codes 257..285 */
190 static const uint16_t cplens[] ALIGN2 = {
191 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
192 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
195 /* note: see note #13 above about the 258 in this list. */
196 /* Extra bits for literal codes 257..285 */
197 static const uint8_t cplext[] ALIGN1 = {
198 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
199 5, 5, 5, 0, 99, 99
200 }; /* 99 == invalid */
202 /* Copy offsets for distance codes 0..29 */
203 static const uint16_t cpdist[] ALIGN2 = {
204 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
205 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
208 /* Extra bits for distance codes */
209 static const uint8_t cpdext[] ALIGN1 = {
210 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10,
211 11, 11, 12, 12, 13, 13
214 /* Tables for deflate from PKZIP's appnote.txt. */
215 /* Order of the bit length code lengths */
216 static const uint8_t border[] ALIGN1 = {
217 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
222 * Free the malloc'ed tables built by huft_build(), which makes a linked
223 * list of the tables it made, with the links in a dummy first entry of
224 * each table.
225 * t: table to free
227 static void huft_free(huft_t *p)
229 huft_t *q;
231 /* Go through linked list, freeing from the malloced (t[-1]) address. */
232 while (p) {
233 q = (--p)->v.t;
234 free(p);
235 p = q;
239 static void huft_free_all(STATE_PARAM_ONLY)
241 huft_free(inflate_codes_tl);
242 huft_free(inflate_codes_td);
243 inflate_codes_tl = NULL;
244 inflate_codes_td = NULL;
247 static void abort_unzip(STATE_PARAM_ONLY) NORETURN;
248 static void abort_unzip(STATE_PARAM_ONLY)
250 huft_free_all(PASS_STATE_ONLY);
251 longjmp(error_jmp, 1);
254 static unsigned fill_bitbuffer(STATE_PARAM unsigned bitbuffer, unsigned *current, const unsigned required)
256 while (*current < required) {
257 if (bytebuffer_offset >= bytebuffer_size) {
258 unsigned sz = bytebuffer_max - 4;
259 if (to_read >= 0 && to_read < sz) /* unzip only */
260 sz = to_read;
261 /* Leave the first 4 bytes empty so we can always unwind the bitbuffer
262 * to the front of the bytebuffer */
263 bytebuffer_size = safe_read(gunzip_src_fd, &bytebuffer[4], sz);
264 if ((int)bytebuffer_size < 1) {
265 error_msg = "unexpected end of file";
266 abort_unzip(PASS_STATE_ONLY);
268 if (to_read >= 0) /* unzip only */
269 to_read -= bytebuffer_size;
270 bytebuffer_size += 4;
271 bytebuffer_offset = 4;
273 bitbuffer |= ((unsigned) bytebuffer[bytebuffer_offset]) << *current;
274 bytebuffer_offset++;
275 *current += 8;
277 return bitbuffer;
281 /* Given a list of code lengths and a maximum table size, make a set of
282 * tables to decode that set of codes. Return zero on success, one if
283 * the given code set is incomplete (the tables are still built in this
284 * case), two if the input is invalid (all zero length codes or an
285 * oversubscribed set of lengths) - in this case stores NULL in *t.
287 * b: code lengths in bits (all assumed <= BMAX)
288 * n: number of codes (assumed <= N_MAX)
289 * s: number of simple-valued codes (0..s-1)
290 * d: list of base values for non-simple codes
291 * e: list of extra bits for non-simple codes
292 * t: result: starting table
293 * m: maximum lookup bits, returns actual
295 static int huft_build(const unsigned *b, const unsigned n,
296 const unsigned s, const unsigned short *d,
297 const unsigned char *e, huft_t **t, unsigned *m)
299 unsigned a; /* counter for codes of length k */
300 unsigned c[BMAX + 1]; /* bit length count table */
301 unsigned eob_len; /* length of end-of-block code (value 256) */
302 unsigned f; /* i repeats in table every f entries */
303 int g; /* maximum code length */
304 int htl; /* table level */
305 unsigned i; /* counter, current code */
306 unsigned j; /* counter */
307 int k; /* number of bits in current code */
308 unsigned *p; /* pointer into c[], b[], or v[] */
309 huft_t *q; /* points to current table */
310 huft_t r; /* table entry for structure assignment */
311 huft_t *u[BMAX]; /* table stack */
312 unsigned v[N_MAX]; /* values in order of bit length */
313 int ws[BMAX + 1]; /* bits decoded stack */
314 int w; /* bits decoded */
315 unsigned x[BMAX + 1]; /* bit offsets, then code stack */
316 unsigned *xp; /* pointer into x */
317 int y; /* number of dummy codes added */
318 unsigned z; /* number of entries in current table */
320 /* Length of EOB code, if any */
321 eob_len = n > 256 ? b[256] : BMAX;
323 *t = NULL;
325 /* Generate counts for each bit length */
326 memset(c, 0, sizeof(c));
327 p = (unsigned *) b; /* cast allows us to reuse p for pointing to b */
328 i = n;
329 do {
330 c[*p]++; /* assume all entries <= BMAX */
331 p++; /* can't combine with above line (Solaris bug) */
332 } while (--i);
333 if (c[0] == n) { /* null input - all zero length codes */
334 *m = 0;
335 return 2;
338 /* Find minimum and maximum length, bound *m by those */
339 for (j = 1; (c[j] == 0) && (j <= BMAX); j++)
340 continue;
341 k = j; /* minimum code length */
342 for (i = BMAX; (c[i] == 0) && i; i--)
343 continue;
344 g = i; /* maximum code length */
345 *m = (*m < j) ? j : ((*m > i) ? i : *m);
347 /* Adjust last length count to fill out codes, if needed */
348 for (y = 1 << j; j < i; j++, y <<= 1) {
349 y -= c[j];
350 if (y < 0)
351 return 2; /* bad input: more codes than bits */
353 y -= c[i];
354 if (y < 0)
355 return 2;
356 c[i] += y;
358 /* Generate starting offsets into the value table for each length */
359 x[1] = j = 0;
360 p = c + 1;
361 xp = x + 2;
362 while (--i) { /* note that i == g from above */
363 j += *p++;
364 *xp++ = j;
367 /* Make a table of values in order of bit lengths */
368 p = (unsigned *) b;
369 i = 0;
370 do {
371 j = *p++;
372 if (j != 0) {
373 v[x[j]++] = i;
375 } while (++i < n);
377 /* Generate the Huffman codes and for each, make the table entries */
378 x[0] = i = 0; /* first Huffman code is zero */
379 p = v; /* grab values in bit order */
380 htl = -1; /* no tables yet--level -1 */
381 w = ws[0] = 0; /* bits decoded */
382 u[0] = NULL; /* just to keep compilers happy */
383 q = NULL; /* ditto */
384 z = 0; /* ditto */
386 /* go through the bit lengths (k already is bits in shortest code) */
387 for (; k <= g; k++) {
388 a = c[k];
389 while (a--) {
390 /* here i is the Huffman code of length k bits for value *p */
391 /* make tables up to required level */
392 while (k > ws[htl + 1]) {
393 w = ws[++htl];
395 /* compute minimum size table less than or equal to *m bits */
396 z = g - w;
397 z = z > *m ? *m : z; /* upper limit on table size */
398 j = k - w;
399 f = 1 << j;
400 if (f > a + 1) { /* try a k-w bit table */
401 /* too few codes for k-w bit table */
402 f -= a + 1; /* deduct codes from patterns left */
403 xp = c + k;
404 while (++j < z) { /* try smaller tables up to z bits */
405 f <<= 1;
406 if (f <= *++xp) {
407 break; /* enough codes to use up j bits */
409 f -= *xp; /* else deduct codes from patterns */
412 j = (w + j > eob_len && w < eob_len) ? eob_len - w : j; /* make EOB code end at table */
413 z = 1 << j; /* table entries for j-bit table */
414 ws[htl+1] = w + j; /* set bits decoded in stack */
416 /* allocate and link in new table */
417 q = xzalloc((z + 1) * sizeof(huft_t));
418 *t = q + 1; /* link to list for huft_free() */
419 t = &(q->v.t);
420 u[htl] = ++q; /* table starts after link */
422 /* connect to last table, if there is one */
423 if (htl) {
424 x[htl] = i; /* save pattern for backing up */
425 r.b = (unsigned char) (w - ws[htl - 1]); /* bits to dump before this table */
426 r.e = (unsigned char) (16 + j); /* bits in this table */
427 r.v.t = q; /* pointer to this table */
428 j = (i & ((1 << w) - 1)) >> ws[htl - 1];
429 u[htl - 1][j] = r; /* connect to last table */
433 /* set up table entry in r */
434 r.b = (unsigned char) (k - w);
435 if (p >= v + n) {
436 r.e = 99; /* out of values--invalid code */
437 } else if (*p < s) {
438 r.e = (unsigned char) (*p < 256 ? 16 : 15); /* 256 is EOB code */
439 r.v.n = (unsigned short) (*p++); /* simple code is just the value */
440 } else {
441 r.e = (unsigned char) e[*p - s]; /* non-simple--look up in lists */
442 r.v.n = d[*p++ - s];
445 /* fill code-like entries with r */
446 f = 1 << (k - w);
447 for (j = i >> w; j < z; j += f) {
448 q[j] = r;
451 /* backwards increment the k-bit code i */
452 for (j = 1 << (k - 1); i & j; j >>= 1) {
453 i ^= j;
455 i ^= j;
457 /* backup over finished tables */
458 while ((i & ((1 << w) - 1)) != x[htl]) {
459 w = ws[--htl];
464 /* return actual size of base table */
465 *m = ws[1];
467 /* Return 1 if we were given an incomplete table */
468 return y != 0 && g != 1;
473 * inflate (decompress) the codes in a deflated (compressed) block.
474 * Return an error code or zero if it all goes ok.
476 * tl, td: literal/length and distance decoder tables
477 * bl, bd: number of bits decoded by tl[] and td[]
479 /* called once from inflate_block */
481 /* map formerly local static variables to globals */
482 #define ml inflate_codes_ml
483 #define md inflate_codes_md
484 #define bb inflate_codes_bb
485 #define k inflate_codes_k
486 #define w inflate_codes_w
487 #define tl inflate_codes_tl
488 #define td inflate_codes_td
489 #define bl inflate_codes_bl
490 #define bd inflate_codes_bd
491 #define nn inflate_codes_nn
492 #define dd inflate_codes_dd
493 static void inflate_codes_setup(STATE_PARAM unsigned my_bl, unsigned my_bd)
495 bl = my_bl;
496 bd = my_bd;
497 /* make local copies of globals */
498 bb = gunzip_bb; /* initialize bit buffer */
499 k = gunzip_bk;
500 w = gunzip_outbuf_count; /* initialize gunzip_window position */
501 /* inflate the coded data */
502 ml = mask_bits[bl]; /* precompute masks for speed */
503 md = mask_bits[bd];
505 /* called once from inflate_get_next_window */
506 static int inflate_codes(STATE_PARAM_ONLY)
508 unsigned e; /* table entry flag/number of extra bits */
509 huft_t *t; /* pointer to table entry */
511 if (resume_copy)
512 goto do_copy;
514 while (1) { /* do until end of block */
515 bb = fill_bitbuffer(PASS_STATE bb, &k, bl);
516 t = tl + ((unsigned) bb & ml);
517 e = t->e;
518 if (e > 16)
519 do {
520 if (e == 99)
521 abort_unzip(PASS_STATE_ONLY);;
522 bb >>= t->b;
523 k -= t->b;
524 e -= 16;
525 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
526 t = t->v.t + ((unsigned) bb & mask_bits[e]);
527 e = t->e;
528 } while (e > 16);
529 bb >>= t->b;
530 k -= t->b;
531 if (e == 16) { /* then it's a literal */
532 gunzip_window[w++] = (unsigned char) t->v.n;
533 if (w == GUNZIP_WSIZE) {
534 gunzip_outbuf_count = w;
535 //flush_gunzip_window();
536 w = 0;
537 return 1; // We have a block to read
539 } else { /* it's an EOB or a length */
540 /* exit if end of block */
541 if (e == 15) {
542 break;
545 /* get length of block to copy */
546 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
547 nn = t->v.n + ((unsigned) bb & mask_bits[e]);
548 bb >>= e;
549 k -= e;
551 /* decode distance of block to copy */
552 bb = fill_bitbuffer(PASS_STATE bb, &k, bd);
553 t = td + ((unsigned) bb & md);
554 e = t->e;
555 if (e > 16)
556 do {
557 if (e == 99)
558 abort_unzip(PASS_STATE_ONLY);
559 bb >>= t->b;
560 k -= t->b;
561 e -= 16;
562 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
563 t = t->v.t + ((unsigned) bb & mask_bits[e]);
564 e = t->e;
565 } while (e > 16);
566 bb >>= t->b;
567 k -= t->b;
568 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
569 dd = w - t->v.n - ((unsigned) bb & mask_bits[e]);
570 bb >>= e;
571 k -= e;
573 /* do the copy */
574 do_copy:
575 do {
576 /* Was: nn -= (e = (e = GUNZIP_WSIZE - ((dd &= GUNZIP_WSIZE - 1) > w ? dd : w)) > nn ? nn : e); */
577 /* Who wrote THAT?? rewritten as: */
578 dd &= GUNZIP_WSIZE - 1;
579 e = GUNZIP_WSIZE - (dd > w ? dd : w);
580 if (e > nn) e = nn;
581 nn -= e;
583 /* copy to new buffer to prevent possible overwrite */
584 if (w - dd >= e) { /* (this test assumes unsigned comparison) */
585 memcpy(gunzip_window + w, gunzip_window + dd, e);
586 w += e;
587 dd += e;
588 } else {
589 /* do it slow to avoid memcpy() overlap */
590 /* !NOMEMCPY */
591 do {
592 gunzip_window[w++] = gunzip_window[dd++];
593 } while (--e);
595 if (w == GUNZIP_WSIZE) {
596 gunzip_outbuf_count = w;
597 resume_copy = (nn != 0);
598 //flush_gunzip_window();
599 w = 0;
600 return 1;
602 } while (nn);
603 resume_copy = 0;
607 /* restore the globals from the locals */
608 gunzip_outbuf_count = w; /* restore global gunzip_window pointer */
609 gunzip_bb = bb; /* restore global bit buffer */
610 gunzip_bk = k;
612 /* normally just after call to inflate_codes, but save code by putting it here */
613 /* free the decoding tables (tl and td), return */
614 huft_free_all(PASS_STATE_ONLY);
616 /* done */
617 return 0;
619 #undef ml
620 #undef md
621 #undef bb
622 #undef k
623 #undef w
624 #undef tl
625 #undef td
626 #undef bl
627 #undef bd
628 #undef nn
629 #undef dd
632 /* called once from inflate_block */
633 static void inflate_stored_setup(STATE_PARAM int my_n, int my_b, int my_k)
635 inflate_stored_n = my_n;
636 inflate_stored_b = my_b;
637 inflate_stored_k = my_k;
638 /* initialize gunzip_window position */
639 inflate_stored_w = gunzip_outbuf_count;
641 /* called once from inflate_get_next_window */
642 static int inflate_stored(STATE_PARAM_ONLY)
644 /* read and output the compressed data */
645 while (inflate_stored_n--) {
646 inflate_stored_b = fill_bitbuffer(PASS_STATE inflate_stored_b, &inflate_stored_k, 8);
647 gunzip_window[inflate_stored_w++] = (unsigned char) inflate_stored_b;
648 if (inflate_stored_w == GUNZIP_WSIZE) {
649 gunzip_outbuf_count = inflate_stored_w;
650 //flush_gunzip_window();
651 inflate_stored_w = 0;
652 inflate_stored_b >>= 8;
653 inflate_stored_k -= 8;
654 return 1; /* We have a block */
656 inflate_stored_b >>= 8;
657 inflate_stored_k -= 8;
660 /* restore the globals from the locals */
661 gunzip_outbuf_count = inflate_stored_w; /* restore global gunzip_window pointer */
662 gunzip_bb = inflate_stored_b; /* restore global bit buffer */
663 gunzip_bk = inflate_stored_k;
664 return 0; /* Finished */
669 * decompress an inflated block
670 * e: last block flag
672 * GLOBAL VARIABLES: bb, kk,
674 /* Return values: -1 = inflate_stored, -2 = inflate_codes */
675 /* One callsite in inflate_get_next_window */
676 static int inflate_block(STATE_PARAM smallint *e)
678 unsigned ll[286 + 30]; /* literal/length and distance code lengths */
679 unsigned t; /* block type */
680 unsigned b; /* bit buffer */
681 unsigned k; /* number of bits in bit buffer */
683 /* make local bit buffer */
685 b = gunzip_bb;
686 k = gunzip_bk;
688 /* read in last block bit */
689 b = fill_bitbuffer(PASS_STATE b, &k, 1);
690 *e = b & 1;
691 b >>= 1;
692 k -= 1;
694 /* read in block type */
695 b = fill_bitbuffer(PASS_STATE b, &k, 2);
696 t = (unsigned) b & 3;
697 b >>= 2;
698 k -= 2;
700 /* restore the global bit buffer */
701 gunzip_bb = b;
702 gunzip_bk = k;
704 /* Do we see block type 1 often? Yes!
705 * TODO: fix performance problem (see below) */
706 //bb_error_msg("blktype %d", t);
708 /* inflate that block type */
709 switch (t) {
710 case 0: /* Inflate stored */
712 unsigned n; /* number of bytes in block */
713 unsigned b_stored; /* bit buffer */
714 unsigned k_stored; /* number of bits in bit buffer */
716 /* make local copies of globals */
717 b_stored = gunzip_bb; /* initialize bit buffer */
718 k_stored = gunzip_bk;
720 /* go to byte boundary */
721 n = k_stored & 7;
722 b_stored >>= n;
723 k_stored -= n;
725 /* get the length and its complement */
726 b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16);
727 n = ((unsigned) b_stored & 0xffff);
728 b_stored >>= 16;
729 k_stored -= 16;
731 b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16);
732 if (n != (unsigned) ((~b_stored) & 0xffff)) {
733 abort_unzip(PASS_STATE_ONLY); /* error in compressed data */
735 b_stored >>= 16;
736 k_stored -= 16;
738 inflate_stored_setup(PASS_STATE n, b_stored, k_stored);
740 return -1;
742 case 1:
743 /* Inflate fixed
744 * decompress an inflated type 1 (fixed Huffman codes) block. We should
745 * either replace this with a custom decoder, or at least precompute the
746 * Huffman tables. TODO */
748 int i; /* temporary variable */
749 unsigned bl; /* lookup bits for tl */
750 unsigned bd; /* lookup bits for td */
751 /* gcc 4.2.1 is too dumb to reuse stackspace. Moved up... */
752 //unsigned ll[288]; /* length list for huft_build */
754 /* set up literal table */
755 for (i = 0; i < 144; i++)
756 ll[i] = 8;
757 for (; i < 256; i++)
758 ll[i] = 9;
759 for (; i < 280; i++)
760 ll[i] = 7;
761 for (; i < 288; i++) /* make a complete, but wrong code set */
762 ll[i] = 8;
763 bl = 7;
764 huft_build(ll, 288, 257, cplens, cplext, &inflate_codes_tl, &bl);
765 /* huft_build() never return nonzero - we use known data */
767 /* set up distance table */
768 for (i = 0; i < 30; i++) /* make an incomplete code set */
769 ll[i] = 5;
770 bd = 5;
771 huft_build(ll, 30, 0, cpdist, cpdext, &inflate_codes_td, &bd);
773 /* set up data for inflate_codes() */
774 inflate_codes_setup(PASS_STATE bl, bd);
776 /* huft_free code moved into inflate_codes */
778 return -2;
780 case 2: /* Inflate dynamic */
782 enum { dbits = 6 }; /* bits in base distance lookup table */
783 enum { lbits = 9 }; /* bits in base literal/length lookup table */
785 huft_t *td; /* distance code table */
786 unsigned i; /* temporary variables */
787 unsigned j;
788 unsigned l; /* last length */
789 unsigned m; /* mask for bit lengths table */
790 unsigned n; /* number of lengths to get */
791 unsigned bl; /* lookup bits for tl */
792 unsigned bd; /* lookup bits for td */
793 unsigned nb; /* number of bit length codes */
794 unsigned nl; /* number of literal/length codes */
795 unsigned nd; /* number of distance codes */
797 //unsigned ll[286 + 30];/* literal/length and distance code lengths */
798 unsigned b_dynamic; /* bit buffer */
799 unsigned k_dynamic; /* number of bits in bit buffer */
801 /* make local bit buffer */
802 b_dynamic = gunzip_bb;
803 k_dynamic = gunzip_bk;
805 /* read in table lengths */
806 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5);
807 nl = 257 + ((unsigned) b_dynamic & 0x1f); /* number of literal/length codes */
809 b_dynamic >>= 5;
810 k_dynamic -= 5;
811 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5);
812 nd = 1 + ((unsigned) b_dynamic & 0x1f); /* number of distance codes */
814 b_dynamic >>= 5;
815 k_dynamic -= 5;
816 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 4);
817 nb = 4 + ((unsigned) b_dynamic & 0xf); /* number of bit length codes */
819 b_dynamic >>= 4;
820 k_dynamic -= 4;
821 if (nl > 286 || nd > 30)
822 abort_unzip(PASS_STATE_ONLY); /* bad lengths */
824 /* read in bit-length-code lengths */
825 for (j = 0; j < nb; j++) {
826 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3);
827 ll[border[j]] = (unsigned) b_dynamic & 7;
828 b_dynamic >>= 3;
829 k_dynamic -= 3;
831 for (; j < 19; j++)
832 ll[border[j]] = 0;
834 /* build decoding table for trees - single level, 7 bit lookup */
835 bl = 7;
836 i = huft_build(ll, 19, 19, NULL, NULL, &inflate_codes_tl, &bl);
837 if (i != 0) {
838 abort_unzip(PASS_STATE_ONLY); //return i; /* incomplete code set */
841 /* read in literal and distance code lengths */
842 n = nl + nd;
843 m = mask_bits[bl];
844 i = l = 0;
845 while ((unsigned) i < n) {
846 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, (unsigned)bl);
847 td = inflate_codes_tl + ((unsigned) b_dynamic & m);
848 j = td->b;
849 b_dynamic >>= j;
850 k_dynamic -= j;
851 j = td->v.n;
852 if (j < 16) { /* length of code in bits (0..15) */
853 ll[i++] = l = j; /* save last length in l */
854 } else if (j == 16) { /* repeat last length 3 to 6 times */
855 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 2);
856 j = 3 + ((unsigned) b_dynamic & 3);
857 b_dynamic >>= 2;
858 k_dynamic -= 2;
859 if ((unsigned) i + j > n) {
860 abort_unzip(PASS_STATE_ONLY); //return 1;
862 while (j--) {
863 ll[i++] = l;
865 } else if (j == 17) { /* 3 to 10 zero length codes */
866 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3);
867 j = 3 + ((unsigned) b_dynamic & 7);
868 b_dynamic >>= 3;
869 k_dynamic -= 3;
870 if ((unsigned) i + j > n) {
871 abort_unzip(PASS_STATE_ONLY); //return 1;
873 while (j--) {
874 ll[i++] = 0;
876 l = 0;
877 } else { /* j == 18: 11 to 138 zero length codes */
878 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 7);
879 j = 11 + ((unsigned) b_dynamic & 0x7f);
880 b_dynamic >>= 7;
881 k_dynamic -= 7;
882 if ((unsigned) i + j > n) {
883 abort_unzip(PASS_STATE_ONLY); //return 1;
885 while (j--) {
886 ll[i++] = 0;
888 l = 0;
892 /* free decoding table for trees */
893 huft_free(inflate_codes_tl);
895 /* restore the global bit buffer */
896 gunzip_bb = b_dynamic;
897 gunzip_bk = k_dynamic;
899 /* build the decoding tables for literal/length and distance codes */
900 bl = lbits;
902 i = huft_build(ll, nl, 257, cplens, cplext, &inflate_codes_tl, &bl);
903 if (i != 0)
904 abort_unzip(PASS_STATE_ONLY);
905 bd = dbits;
906 i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &inflate_codes_td, &bd);
907 if (i != 0)
908 abort_unzip(PASS_STATE_ONLY);
910 /* set up data for inflate_codes() */
911 inflate_codes_setup(PASS_STATE bl, bd);
913 /* huft_free code moved into inflate_codes */
915 return -2;
917 default:
918 abort_unzip(PASS_STATE_ONLY);
922 /* Two callsites, both in inflate_get_next_window */
923 static void calculate_gunzip_crc(STATE_PARAM_ONLY)
925 unsigned n;
926 for (n = 0; n < gunzip_outbuf_count; n++) {
927 gunzip_crc = gunzip_crc_table[((int) gunzip_crc ^ (gunzip_window[n])) & 0xff] ^ (gunzip_crc >> 8);
929 gunzip_bytes_out += gunzip_outbuf_count;
932 /* One callsite in inflate_unzip_internal */
933 static int inflate_get_next_window(STATE_PARAM_ONLY)
935 gunzip_outbuf_count = 0;
937 while (1) {
938 int ret;
940 if (need_another_block) {
941 if (end_reached) {
942 calculate_gunzip_crc(PASS_STATE_ONLY);
943 end_reached = 0;
944 /* NB: need_another_block is still set */
945 return 0; /* Last block */
947 method = inflate_block(PASS_STATE &end_reached);
948 need_another_block = 0;
951 switch (method) {
952 case -1:
953 ret = inflate_stored(PASS_STATE_ONLY);
954 break;
955 case -2:
956 ret = inflate_codes(PASS_STATE_ONLY);
957 break;
958 default: /* cannot happen */
959 abort_unzip(PASS_STATE_ONLY);
962 if (ret == 1) {
963 calculate_gunzip_crc(PASS_STATE_ONLY);
964 return 1; /* more data left */
966 need_another_block = 1; /* end of that block */
968 /* Doesnt get here */
972 /* Called from unpack_gz_stream() and inflate_unzip() */
973 static USE_DESKTOP(long long) int
974 inflate_unzip_internal(STATE_PARAM int in, int out)
976 USE_DESKTOP(long long) int n = 0;
977 ssize_t nwrote;
979 /* Allocate all global buffers (for DYN_ALLOC option) */
980 gunzip_window = xmalloc(GUNZIP_WSIZE);
981 gunzip_outbuf_count = 0;
982 gunzip_bytes_out = 0;
983 gunzip_src_fd = in;
985 /* (re) initialize state */
986 method = -1;
987 need_another_block = 1;
988 resume_copy = 0;
989 gunzip_bk = 0;
990 gunzip_bb = 0;
992 /* Create the crc table */
993 gunzip_crc_table = crc32_filltable(NULL, 0);
994 gunzip_crc = ~0;
996 error_msg = "corrupted data";
997 if (setjmp(error_jmp)) {
998 /* Error from deep inside zip machinery */
999 n = -1;
1000 goto ret;
1003 while (1) {
1004 int r = inflate_get_next_window(PASS_STATE_ONLY);
1005 nwrote = full_write(out, gunzip_window, gunzip_outbuf_count);
1006 if (nwrote != (ssize_t)gunzip_outbuf_count) {
1007 bb_perror_msg("write");
1008 n = -1;
1009 goto ret;
1011 USE_DESKTOP(n += nwrote;)
1012 if (r == 0) break;
1015 /* Store unused bytes in a global buffer so calling applets can access it */
1016 if (gunzip_bk >= 8) {
1017 /* Undo too much lookahead. The next read will be byte aligned
1018 * so we can discard unused bits in the last meaningful byte. */
1019 bytebuffer_offset--;
1020 bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff;
1021 gunzip_bb >>= 8;
1022 gunzip_bk -= 8;
1024 ret:
1025 /* Cleanup */
1026 free(gunzip_window);
1027 free(gunzip_crc_table);
1028 return n;
1032 /* External entry points */
1034 /* For unzip */
1036 USE_DESKTOP(long long) int FAST_FUNC
1037 inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out)
1039 USE_DESKTOP(long long) int n;
1040 DECLARE_STATE;
1042 ALLOC_STATE;
1044 to_read = compr_size;
1045 // bytebuffer_max = 0x8000;
1046 bytebuffer_offset = 4;
1047 bytebuffer = xmalloc(bytebuffer_max);
1048 n = inflate_unzip_internal(PASS_STATE in, out);
1049 free(bytebuffer);
1051 res->crc = gunzip_crc;
1052 res->bytes_out = gunzip_bytes_out;
1053 DEALLOC_STATE;
1054 return n;
1058 /* For gunzip */
1060 /* helpers first */
1062 /* Top up the input buffer with at least n bytes. */
1063 static int top_up(STATE_PARAM unsigned n)
1065 int count = bytebuffer_size - bytebuffer_offset;
1067 if (count < (int)n) {
1068 memmove(bytebuffer, &bytebuffer[bytebuffer_offset], count);
1069 bytebuffer_offset = 0;
1070 bytebuffer_size = full_read(gunzip_src_fd, &bytebuffer[count], bytebuffer_max - count);
1071 if ((int)bytebuffer_size < 0) {
1072 bb_error_msg("read error");
1073 return 0;
1075 bytebuffer_size += count;
1076 if (bytebuffer_size < n)
1077 return 0;
1079 return 1;
1082 static uint16_t buffer_read_le_u16(STATE_PARAM_ONLY)
1084 uint16_t res;
1085 #if BB_LITTLE_ENDIAN
1086 /* gcc 4.2.1 is very clever */
1087 memcpy(&res, &bytebuffer[bytebuffer_offset], 2);
1088 #else
1089 res = bytebuffer[bytebuffer_offset];
1090 res |= bytebuffer[bytebuffer_offset + 1] << 8;
1091 #endif
1092 bytebuffer_offset += 2;
1093 return res;
1096 static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY)
1098 uint32_t res;
1099 #if BB_LITTLE_ENDIAN
1100 memcpy(&res, &bytebuffer[bytebuffer_offset], 4);
1101 #else
1102 res = bytebuffer[bytebuffer_offset];
1103 res |= bytebuffer[bytebuffer_offset + 1] << 8;
1104 res |= bytebuffer[bytebuffer_offset + 2] << 16;
1105 res |= bytebuffer[bytebuffer_offset + 3] << 24;
1106 #endif
1107 bytebuffer_offset += 4;
1108 return res;
1111 static int check_header_gzip(STATE_PARAM unpack_info_t *info)
1113 union {
1114 unsigned char raw[8];
1115 struct {
1116 uint8_t gz_method;
1117 uint8_t flags;
1118 uint32_t mtime;
1119 uint8_t xtra_flags_UNUSED;
1120 uint8_t os_flags_UNUSED;
1121 } __attribute__((packed)) formatted;
1122 } header;
1123 struct BUG_header {
1124 char BUG_header[sizeof(header) == 8 ? 1 : -1];
1128 * Rewind bytebuffer. We use the beginning because the header has 8
1129 * bytes, leaving enough for unwinding afterwards.
1131 bytebuffer_size -= bytebuffer_offset;
1132 memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size);
1133 bytebuffer_offset = 0;
1135 if (!top_up(PASS_STATE 8))
1136 return 0;
1137 memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8);
1138 bytebuffer_offset += 8;
1140 /* Check the compression method */
1141 if (header.formatted.gz_method != 8) {
1142 return 0;
1145 if (header.formatted.flags & 0x04) {
1146 /* bit 2 set: extra field present */
1147 unsigned extra_short;
1149 if (!top_up(PASS_STATE 2))
1150 return 0;
1151 extra_short = buffer_read_le_u16(PASS_STATE_ONLY);
1152 if (!top_up(PASS_STATE extra_short))
1153 return 0;
1154 /* Ignore extra field */
1155 bytebuffer_offset += extra_short;
1158 /* Discard original name and file comment if any */
1159 /* bit 3 set: original file name present */
1160 /* bit 4 set: file comment present */
1161 if (header.formatted.flags & 0x18) {
1162 while (1) {
1163 do {
1164 if (!top_up(PASS_STATE 1))
1165 return 0;
1166 } while (bytebuffer[bytebuffer_offset++] != 0);
1167 if ((header.formatted.flags & 0x18) != 0x18)
1168 break;
1169 header.formatted.flags &= ~0x18;
1173 if (info)
1174 info->mtime = SWAP_LE32(header.formatted.mtime);
1176 /* Read the header checksum */
1177 if (header.formatted.flags & 0x02) {
1178 if (!top_up(PASS_STATE 2))
1179 return 0;
1180 bytebuffer_offset += 2;
1182 return 1;
1185 USE_DESKTOP(long long) int FAST_FUNC
1186 unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
1188 uint32_t v32;
1189 USE_DESKTOP(long long) int n;
1190 DECLARE_STATE;
1192 n = 0;
1194 ALLOC_STATE;
1195 to_read = -1;
1196 // bytebuffer_max = 0x8000;
1197 bytebuffer = xmalloc(bytebuffer_max);
1198 gunzip_src_fd = in;
1200 again:
1201 if (!check_header_gzip(PASS_STATE info)) {
1202 bb_error_msg("corrupted data");
1203 n = -1;
1204 goto ret;
1206 n += inflate_unzip_internal(PASS_STATE in, out);
1207 if (n < 0)
1208 goto ret;
1210 if (!top_up(PASS_STATE 8)) {
1211 bb_error_msg("corrupted data");
1212 n = -1;
1213 goto ret;
1216 /* Validate decompression - crc */
1217 v32 = buffer_read_le_u32(PASS_STATE_ONLY);
1218 if ((~gunzip_crc) != v32) {
1219 bb_error_msg("crc error");
1220 n = -1;
1221 goto ret;
1224 /* Validate decompression - size */
1225 v32 = buffer_read_le_u32(PASS_STATE_ONLY);
1226 if ((uint32_t)gunzip_bytes_out != v32) {
1227 bb_error_msg("incorrect length");
1228 n = -1;
1231 if (!top_up(PASS_STATE 2))
1232 goto ret; /* EOF */
1234 if (bytebuffer[bytebuffer_offset] == 0x1f
1235 && bytebuffer[bytebuffer_offset + 1] == 0x8b
1237 bytebuffer_offset += 2;
1238 goto again;
1240 /* GNU gzip says: */
1241 /*bb_error_msg("decompression OK, trailing garbage ignored");*/
1243 ret:
1244 free(bytebuffer);
1245 DEALLOC_STATE;
1246 return n;
1249 USE_DESKTOP(long long) int FAST_FUNC
1250 unpack_gz_stream(int in, int out)
1252 return unpack_gz_stream_with_info(in, out, NULL);