2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 #include "bzlib_private.h"
25 /*---------------------------------------------------*/
27 void makeMaps_d ( DState
* s
)
31 for (i
= 0; i
< 256; i
++)
33 s
->seqToUnseq
[s
->nInUse
] = i
;
39 /*---------------------------------------------------*/
41 { retVal = rrr; goto save_state_and_return; }
43 #define GET_BITS(lll,vvv,nnn) \
44 case lll: s->state = lll; \
46 if (s->bsLive >= nnn) { \
49 (s->bsLive-nnn)) & ((1 << nnn)-1); \
54 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
56 = (s->bsBuff << 8) | \
58 (*((UChar*)(s->strm->next_in)))); \
61 s->strm->avail_in--; \
62 s->strm->total_in_lo32++; \
63 if (s->strm->total_in_lo32 == 0) \
64 s->strm->total_in_hi32++; \
67 #define GET_UCHAR(lll,uuu) \
70 #define GET_BIT(lll,uuu) \
73 /*---------------------------------------------------*/
74 #define GET_MTF_VAL(label1,label2,lval) \
76 if (groupPos == 0) { \
78 if (groupNo >= nSelectors) \
79 RETURN(BZ_DATA_ERROR); \
80 groupPos = BZ_G_SIZE; \
81 gSel = s->selector[groupNo]; \
82 gMinlen = s->minLens[gSel]; \
83 gLimit = &(s->limit[gSel][0]); \
84 gPerm = &(s->perm[gSel][0]); \
85 gBase = &(s->base[gSel][0]); \
89 GET_BITS(label1, zvec, zn); \
91 if (zn > 20 /* the longest code */) \
92 RETURN(BZ_DATA_ERROR); \
93 if (zvec <= gLimit[zn]) break; \
95 GET_BIT(label2, zj); \
96 zvec = (zvec << 1) | zj; \
98 if (zvec - gBase[zn] < 0 \
99 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
100 RETURN(BZ_DATA_ERROR); \
101 lval = gPerm[zvec - gBase[zn]]; \
105 /*---------------------------------------------------*/
106 Int32
BZ2_decompress ( DState
* s
)
110 Int32 minLen
, maxLen
;
111 bz_stream
* strm
= s
->strm
;
113 /* stuff that needs to be saved/restored */
139 if (s
->state
== BZ_X_MAGIC_1
) {
140 /*initialise the save area*/
144 s
->save_alphaSize
= 0;
146 s
->save_nSelectors
= 0;
149 s
->save_groupPos
= 0;
151 s
->save_nblockMAX
= 0;
162 s
->save_gLimit
= NULL
;
163 s
->save_gBase
= NULL
;
164 s
->save_gPerm
= NULL
;
167 /*restore from the save area*/
171 alphaSize
= s
->save_alphaSize
;
172 nGroups
= s
->save_nGroups
;
173 nSelectors
= s
->save_nSelectors
;
175 groupNo
= s
->save_groupNo
;
176 groupPos
= s
->save_groupPos
;
177 nextSym
= s
->save_nextSym
;
178 nblockMAX
= s
->save_nblockMAX
;
179 nblock
= s
->save_nblock
;
188 gMinlen
= s
->save_gMinlen
;
189 gLimit
= s
->save_gLimit
;
190 gBase
= s
->save_gBase
;
191 gPerm
= s
->save_gPerm
;
197 GET_UCHAR(BZ_X_MAGIC_1
, uc
);
198 if (uc
!= BZ_HDR_B
) RETURN(BZ_DATA_ERROR_MAGIC
);
200 GET_UCHAR(BZ_X_MAGIC_2
, uc
);
201 if (uc
!= BZ_HDR_Z
) RETURN(BZ_DATA_ERROR_MAGIC
);
203 GET_UCHAR(BZ_X_MAGIC_3
, uc
)
204 if (uc
!= BZ_HDR_h
) RETURN(BZ_DATA_ERROR_MAGIC
);
206 GET_BITS(BZ_X_MAGIC_4
, s
->blockSize100k
, 8)
207 if (s
->blockSize100k
< (BZ_HDR_0
+ 1) ||
208 s
->blockSize100k
> (BZ_HDR_0
+ 9)) RETURN(BZ_DATA_ERROR_MAGIC
);
209 s
->blockSize100k
-= BZ_HDR_0
;
211 if (s
->smallDecompress
) {
212 s
->ll16
= BZALLOC( s
->blockSize100k
* 100000 * sizeof(UInt16
) );
214 ((1 + s
->blockSize100k
* 100000) >> 1) * sizeof(UChar
)
216 if (s
->ll16
== NULL
|| s
->ll4
== NULL
) RETURN(BZ_MEM_ERROR
);
218 s
->tt
= BZALLOC( s
->blockSize100k
* 100000 * sizeof(Int32
) );
219 if (s
->tt
== NULL
) RETURN(BZ_MEM_ERROR
);
222 GET_UCHAR(BZ_X_BLKHDR_1
, uc
);
224 if (uc
== 0x17) goto endhdr_2
;
225 if (uc
!= 0x31) RETURN(BZ_DATA_ERROR
);
226 GET_UCHAR(BZ_X_BLKHDR_2
, uc
);
227 if (uc
!= 0x41) RETURN(BZ_DATA_ERROR
);
228 GET_UCHAR(BZ_X_BLKHDR_3
, uc
);
229 if (uc
!= 0x59) RETURN(BZ_DATA_ERROR
);
230 GET_UCHAR(BZ_X_BLKHDR_4
, uc
);
231 if (uc
!= 0x26) RETURN(BZ_DATA_ERROR
);
232 GET_UCHAR(BZ_X_BLKHDR_5
, uc
);
233 if (uc
!= 0x53) RETURN(BZ_DATA_ERROR
);
234 GET_UCHAR(BZ_X_BLKHDR_6
, uc
);
235 if (uc
!= 0x59) RETURN(BZ_DATA_ERROR
);
238 if (s
->verbosity
>= 2)
239 VPrintf1 ( "\n [%d: huff+mtf ", s
->currBlockNo
);
241 s
->storedBlockCRC
= 0;
242 GET_UCHAR(BZ_X_BCRC_1
, uc
);
243 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
244 GET_UCHAR(BZ_X_BCRC_2
, uc
);
245 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
246 GET_UCHAR(BZ_X_BCRC_3
, uc
);
247 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
248 GET_UCHAR(BZ_X_BCRC_4
, uc
);
249 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
251 GET_BITS(BZ_X_RANDBIT
, s
->blockRandomised
, 1);
254 GET_UCHAR(BZ_X_ORIGPTR_1
, uc
);
255 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
256 GET_UCHAR(BZ_X_ORIGPTR_2
, uc
);
257 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
258 GET_UCHAR(BZ_X_ORIGPTR_3
, uc
);
259 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
262 RETURN(BZ_DATA_ERROR
);
263 if (s
->origPtr
> 10 + 100000*s
->blockSize100k
)
264 RETURN(BZ_DATA_ERROR
);
266 /*--- Receive the mapping table ---*/
267 for (i
= 0; i
< 16; i
++) {
268 GET_BIT(BZ_X_MAPPING_1
, uc
);
270 s
->inUse16
[i
] = True
; else
271 s
->inUse16
[i
] = False
;
274 for (i
= 0; i
< 256; i
++) s
->inUse
[i
] = False
;
276 for (i
= 0; i
< 16; i
++)
278 for (j
= 0; j
< 16; j
++) {
279 GET_BIT(BZ_X_MAPPING_2
, uc
);
280 if (uc
== 1) s
->inUse
[i
* 16 + j
] = True
;
283 if (s
->nInUse
== 0) RETURN(BZ_DATA_ERROR
);
284 alphaSize
= s
->nInUse
+2;
286 /*--- Now the selectors ---*/
287 GET_BITS(BZ_X_SELECTOR_1
, nGroups
, 3);
288 if (nGroups
< 2 || nGroups
> 6) RETURN(BZ_DATA_ERROR
);
289 GET_BITS(BZ_X_SELECTOR_2
, nSelectors
, 15);
290 if (nSelectors
< 1) RETURN(BZ_DATA_ERROR
);
291 for (i
= 0; i
< nSelectors
; i
++) {
294 GET_BIT(BZ_X_SELECTOR_3
, uc
);
297 if (j
>= nGroups
) RETURN(BZ_DATA_ERROR
);
299 s
->selectorMtf
[i
] = j
;
302 /*--- Undo the MTF values for the selectors. ---*/
304 UChar pos
[BZ_N_GROUPS
], tmp
, v
;
305 for (v
= 0; v
< nGroups
; v
++) pos
[v
] = v
;
307 for (i
= 0; i
< nSelectors
; i
++) {
308 v
= s
->selectorMtf
[i
];
310 while (v
> 0) { pos
[v
] = pos
[v
-1]; v
--; }
312 s
->selector
[i
] = tmp
;
316 /*--- Now the coding tables ---*/
317 for (t
= 0; t
< nGroups
; t
++) {
318 GET_BITS(BZ_X_CODING_1
, curr
, 5);
319 for (i
= 0; i
< alphaSize
; i
++) {
321 if (curr
< 1 || curr
> 20) RETURN(BZ_DATA_ERROR
);
322 GET_BIT(BZ_X_CODING_2
, uc
);
324 GET_BIT(BZ_X_CODING_3
, uc
);
325 if (uc
== 0) curr
++; else curr
--;
331 /*--- Create the Huffman decoding tables ---*/
332 for (t
= 0; t
< nGroups
; t
++) {
335 for (i
= 0; i
< alphaSize
; i
++) {
336 if (s
->len
[t
][i
] > maxLen
) maxLen
= s
->len
[t
][i
];
337 if (s
->len
[t
][i
] < minLen
) minLen
= s
->len
[t
][i
];
339 BZ2_hbCreateDecodeTables (
344 minLen
, maxLen
, alphaSize
346 s
->minLens
[t
] = minLen
;
349 /*--- Now the MTF values ---*/
352 nblockMAX
= 100000 * s
->blockSize100k
;
356 for (i
= 0; i
<= 255; i
++) s
->unzftab
[i
] = 0;
362 for (ii
= 256 / MTFL_SIZE
- 1; ii
>= 0; ii
--) {
363 for (jj
= MTFL_SIZE
-1; jj
>= 0; jj
--) {
364 s
->mtfa
[kk
] = (UChar
)(ii
* MTFL_SIZE
+ jj
);
367 s
->mtfbase
[ii
] = kk
+ 1;
370 /*-- end MTF init --*/
373 GET_MTF_VAL(BZ_X_MTF_1
, BZ_X_MTF_2
, nextSym
);
377 if (nextSym
== EOB
) break;
379 if (nextSym
== BZ_RUNA
|| nextSym
== BZ_RUNB
) {
384 /* Check that N doesn't get too big, so that es doesn't
385 go negative. The maximum value that can be
386 RUNA/RUNB encoded is equal to the block size (post
387 the initial RLE), viz, 900k, so bounding N at 2
388 million should guard against overflow without
389 rejecting any legitimate inputs. */
390 if (N
>= 2*1024*1024) RETURN(BZ_DATA_ERROR
);
391 if (nextSym
== BZ_RUNA
) es
= es
+ (0+1) * N
; else
392 if (nextSym
== BZ_RUNB
) es
= es
+ (1+1) * N
;
394 GET_MTF_VAL(BZ_X_MTF_3
, BZ_X_MTF_4
, nextSym
);
396 while (nextSym
== BZ_RUNA
|| nextSym
== BZ_RUNB
);
399 uc
= s
->seqToUnseq
[ s
->mtfa
[s
->mtfbase
[0]] ];
400 s
->unzftab
[uc
] += es
;
402 if (s
->smallDecompress
)
404 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
405 s
->ll16
[nblock
] = (UInt16
)uc
;
411 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
412 s
->tt
[nblock
] = (UInt32
)uc
;
421 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
423 /*-- uc = MTF ( nextSym-1 ) --*/
425 Int32 ii
, jj
, kk
, pp
, lno
, off
;
427 nn
= (UInt32
)(nextSym
- 1);
429 if (nn
< MTFL_SIZE
) {
430 /* avoid general-case expense */
435 s
->mtfa
[(z
) ] = s
->mtfa
[(z
)-1];
436 s
->mtfa
[(z
)-1] = s
->mtfa
[(z
)-2];
437 s
->mtfa
[(z
)-2] = s
->mtfa
[(z
)-3];
438 s
->mtfa
[(z
)-3] = s
->mtfa
[(z
)-4];
442 s
->mtfa
[(pp
+nn
)] = s
->mtfa
[(pp
+nn
)-1]; nn
--;
447 lno
= nn
/ MTFL_SIZE
;
448 off
= nn
% MTFL_SIZE
;
449 pp
= s
->mtfbase
[lno
] + off
;
451 while (pp
> s
->mtfbase
[lno
]) {
452 s
->mtfa
[pp
] = s
->mtfa
[pp
-1]; pp
--;
457 s
->mtfa
[s
->mtfbase
[lno
]]
458 = s
->mtfa
[s
->mtfbase
[lno
-1] + MTFL_SIZE
- 1];
462 s
->mtfa
[s
->mtfbase
[0]] = uc
;
463 if (s
->mtfbase
[0] == 0) {
465 for (ii
= 256 / MTFL_SIZE
-1; ii
>= 0; ii
--) {
466 for (jj
= MTFL_SIZE
-1; jj
>= 0; jj
--) {
467 s
->mtfa
[kk
] = s
->mtfa
[s
->mtfbase
[ii
] + jj
];
470 s
->mtfbase
[ii
] = kk
+ 1;
475 /*-- end uc = MTF ( nextSym-1 ) --*/
477 s
->unzftab
[s
->seqToUnseq
[uc
]]++;
478 if (s
->smallDecompress
)
479 s
->ll16
[nblock
] = (UInt16
)(s
->seqToUnseq
[uc
]); else
480 s
->tt
[nblock
] = (UInt32
)(s
->seqToUnseq
[uc
]);
483 GET_MTF_VAL(BZ_X_MTF_5
, BZ_X_MTF_6
, nextSym
);
488 /* Now we know what nblock is, we can do a better sanity
491 if (s
->origPtr
< 0 || s
->origPtr
>= nblock
)
492 RETURN(BZ_DATA_ERROR
);
494 /*-- Set up cftab to facilitate generation of T^(-1) --*/
495 /* Check: unzftab entries in range. */
496 for (i
= 0; i
<= 255; i
++) {
497 if (s
->unzftab
[i
] < 0 || s
->unzftab
[i
] > nblock
)
498 RETURN(BZ_DATA_ERROR
);
500 /* Actually generate cftab. */
502 for (i
= 1; i
<= 256; i
++) s
->cftab
[i
] = s
->unzftab
[i
-1];
503 for (i
= 1; i
<= 256; i
++) s
->cftab
[i
] += s
->cftab
[i
-1];
504 /* Check: cftab entries in range. */
505 for (i
= 0; i
<= 256; i
++) {
506 if (s
->cftab
[i
] < 0 || s
->cftab
[i
] > nblock
) {
507 /* s->cftab[i] can legitimately be == nblock */
508 RETURN(BZ_DATA_ERROR
)
511 /* Check: cftab entries non-descending. */
512 for (i
= 1; i
<= 256; i
++) {
513 if (s
->cftab
[i
-1] > s
->cftab
[i
]) {
514 RETURN(BZ_DATA_ERROR
)
518 s
->state_out_len
= 0;
520 BZ_INITIALISE_CRC ( s
->calculatedBlockCRC
);
521 s
->state
= BZ_X_OUTPUT
;
522 if (s
->verbosity
>= 2) VPrintf0 ( "rt+rld" );
524 if (s
->smallDecompress
) {
526 /*-- Make a copy of cftab, used in generation of T --*/
527 for (i
= 0; i
<= 256; i
++) s
->cftabCopy
[i
] = s
->cftab
[i
];
529 /*-- compute the T vector --*/
530 for (i
= 0; i
< nblock
; i
++) {
531 uc
= (UChar
)(s
->ll16
[i
]);
532 SET_LL(i
, s
->cftabCopy
[uc
]);
536 /*-- Compute T^(-1) by pointer reversal on T --*/
540 Int32 tmp
= GET_LL(j
);
545 while (i
!= s
->origPtr
);
547 s
->tPos
= s
->origPtr
;
549 if (s
->blockRandomised
) {
551 BZ_GET_SMALL(s
->k0
); s
->nblock_used
++;
552 BZ_RAND_UPD_MASK
; s
->k0
^= BZ_RAND_MASK
;
554 BZ_GET_SMALL(s
->k0
); s
->nblock_used
++;
559 /*-- compute the T^(-1) vector --*/
560 for (i
= 0; i
< nblock
; i
++) {
561 uc
= (UChar
)(s
->tt
[i
] & 0xff);
562 s
->tt
[s
->cftab
[uc
]] |= (i
<< 8);
566 s
->tPos
= s
->tt
[s
->origPtr
] >> 8;
568 if (s
->blockRandomised
) {
570 BZ_GET_FAST(s
->k0
); s
->nblock_used
++;
571 BZ_RAND_UPD_MASK
; s
->k0
^= BZ_RAND_MASK
;
573 BZ_GET_FAST(s
->k0
); s
->nblock_used
++;
584 GET_UCHAR(BZ_X_ENDHDR_2
, uc
);
585 if (uc
!= 0x72) RETURN(BZ_DATA_ERROR
);
586 GET_UCHAR(BZ_X_ENDHDR_3
, uc
);
587 if (uc
!= 0x45) RETURN(BZ_DATA_ERROR
);
588 GET_UCHAR(BZ_X_ENDHDR_4
, uc
);
589 if (uc
!= 0x38) RETURN(BZ_DATA_ERROR
);
590 GET_UCHAR(BZ_X_ENDHDR_5
, uc
);
591 if (uc
!= 0x50) RETURN(BZ_DATA_ERROR
);
592 GET_UCHAR(BZ_X_ENDHDR_6
, uc
);
593 if (uc
!= 0x90) RETURN(BZ_DATA_ERROR
);
595 s
->storedCombinedCRC
= 0;
596 GET_UCHAR(BZ_X_CCRC_1
, uc
);
597 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
598 GET_UCHAR(BZ_X_CCRC_2
, uc
);
599 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
600 GET_UCHAR(BZ_X_CCRC_3
, uc
);
601 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
602 GET_UCHAR(BZ_X_CCRC_4
, uc
);
603 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
605 s
->state
= BZ_X_IDLE
;
606 RETURN(BZ_STREAM_END
)
608 default: AssertH ( False
, 4001 );
611 AssertH ( False
, 4002 );
613 save_state_and_return
:
618 s
->save_alphaSize
= alphaSize
;
619 s
->save_nGroups
= nGroups
;
620 s
->save_nSelectors
= nSelectors
;
622 s
->save_groupNo
= groupNo
;
623 s
->save_groupPos
= groupPos
;
624 s
->save_nextSym
= nextSym
;
625 s
->save_nblockMAX
= nblockMAX
;
626 s
->save_nblock
= nblock
;
635 s
->save_gMinlen
= gMinlen
;
636 s
->save_gLimit
= gLimit
;
637 s
->save_gBase
= gBase
;
638 s
->save_gPerm
= gPerm
;
644 /*-------------------------------------------------------------*/
645 /*--- end decompress.c ---*/
646 /*-------------------------------------------------------------*/