drd/drd_pthread_intercepts: Add a workaround for what is probably a compiler bug
[valgrind.git] / VEX / switchback / test_bzip2.c
blob19fc8223a009ff015da4978347cd6482ce963edd
2 #define BZ_NO_STDIO
5 /*-------------------------------------------------------------*/
6 /*--- Private header file for the library. ---*/
7 /*--- bzlib_private.h ---*/
8 /*-------------------------------------------------------------*/
10 /*--
11 This file is a part of bzip2 and/or libbzip2, a program and
12 library for lossless, block-sorting data compression.
14 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
20 1. Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
23 2. The origin of this software must not be misrepresented; you must
24 not claim that you wrote the original software. If you use this
25 software in a product, an acknowledgment in the product
26 documentation would be appreciated but is not required.
28 3. Altered source versions must be plainly marked as such, and must
29 not be misrepresented as being the original software.
31 4. The name of the author may not be used to endorse or promote
32 products derived from this software without specific prior written
33 permission.
35 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
36 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
37 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
39 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
41 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
42 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
43 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 Julian Seward, Cambridge, UK.
48 jseward@bzip.org
49 bzip2/libbzip2 version 1.0 of 21 March 2000
51 This program is based on (at least) the work of:
52 Mike Burrows
53 David Wheeler
54 Peter Fenwick
55 Alistair Moffat
56 Radford Neal
57 Ian H. Witten
58 Robert Sedgewick
59 Jon L. Bentley
61 For more information on these sources, see the manual.
62 --*/
65 #ifndef _BZLIB_PRIVATE_H
66 #define _BZLIB_PRIVATE_H
68 #include <stdlib.h>
70 #ifndef BZ_NO_STDIO
71 #include <stdio.h>
72 #include <ctype.h>
73 #include <string.h>
74 #endif
77 /*-------------------------------------------------------------*/
78 /*--- Public header file for the library. ---*/
79 /*--- bzlib.h ---*/
80 /*-------------------------------------------------------------*/
82 /*--
83 This file is a part of bzip2 and/or libbzip2, a program and
84 library for lossless, block-sorting data compression.
86 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
88 Redistribution and use in source and binary forms, with or without
89 modification, are permitted provided that the following conditions
90 are met:
92 1. Redistributions of source code must retain the above copyright
93 notice, this list of conditions and the following disclaimer.
95 2. The origin of this software must not be misrepresented; you must
96 not claim that you wrote the original software. If you use this
97 software in a product, an acknowledgment in the product
98 documentation would be appreciated but is not required.
100 3. Altered source versions must be plainly marked as such, and must
101 not be misrepresented as being the original software.
103 4. The name of the author may not be used to endorse or promote
104 products derived from this software without specific prior written
105 permission.
107 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
108 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
109 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
110 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
111 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
112 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
113 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
114 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
115 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
116 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
117 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
119 Julian Seward, Cambridge, UK.
120 jseward@bzip.org
121 bzip2/libbzip2 version 1.0 of 21 March 2000
123 This program is based on (at least) the work of:
124 Mike Burrows
125 David Wheeler
126 Peter Fenwick
127 Alistair Moffat
128 Radford Neal
129 Ian H. Witten
130 Robert Sedgewick
131 Jon L. Bentley
133 For more information on these sources, see the manual.
134 --*/
137 #ifndef _BZLIB_H
138 #define _BZLIB_H
140 #ifdef __cplusplus
141 extern "C" {
142 #endif
144 #define BZ_RUN 0
145 #define BZ_FLUSH 1
146 #define BZ_FINISH 2
148 #define BZ_OK 0
149 #define BZ_RUN_OK 1
150 #define BZ_FLUSH_OK 2
151 #define BZ_FINISH_OK 3
152 #define BZ_STREAM_END 4
153 #define BZ_SEQUENCE_ERROR (-1)
154 #define BZ_PARAM_ERROR (-2)
155 #define BZ_MEM_ERROR (-3)
156 #define BZ_DATA_ERROR (-4)
157 #define BZ_DATA_ERROR_MAGIC (-5)
158 #define BZ_IO_ERROR (-6)
159 #define BZ_UNEXPECTED_EOF (-7)
160 #define BZ_OUTBUFF_FULL (-8)
161 #define BZ_CONFIG_ERROR (-9)
163 typedef
164 struct {
165 char *next_in;
166 unsigned int avail_in;
167 unsigned int total_in_lo32;
168 unsigned int total_in_hi32;
170 char *next_out;
171 unsigned int avail_out;
172 unsigned int total_out_lo32;
173 unsigned int total_out_hi32;
175 void *state;
177 void *(*bzalloc)(void *,int,int);
178 void (*bzfree)(void *,void *);
179 void *opaque;
181 bz_stream;
184 #ifndef BZ_IMPORT
185 #define BZ_EXPORT
186 #endif
188 #ifndef BZ_NO_STDIO
189 /* Need a definitition for FILE */
190 #include <stdio.h>
191 #endif
193 #ifdef _WIN32
194 # include <windows.h>
195 # ifdef small
196 /* windows.h define small to char */
197 # undef small
198 # endif
199 # ifdef BZ_EXPORT
200 # define BZ_API(func) WINAPI func
201 # define BZ_EXTERN extern
202 # else
203 /* import windows dll dynamically */
204 # define BZ_API(func) (WINAPI * func)
205 # define BZ_EXTERN
206 # endif
207 #else
208 # define BZ_API(func) func
209 # define BZ_EXTERN extern
210 #endif
213 /*-- Core (low-level) library functions --*/
215 BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
216 bz_stream* strm,
217 int blockSize100k,
218 int verbosity,
219 int workFactor
222 BZ_EXTERN int BZ_API(BZ2_bzCompress) (
223 bz_stream* strm,
224 int action
227 BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
228 bz_stream* strm
231 BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
232 bz_stream *strm,
233 int verbosity,
234 int small
237 BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
238 bz_stream* strm
241 BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
242 bz_stream *strm
247 /*-- High(er) level library functions --*/
249 #ifndef BZ_NO_STDIO
250 #define BZ_MAX_UNUSED 5000
252 typedef void BZFILE;
254 BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
255 int* bzerror,
256 FILE* f,
257 int verbosity,
258 int small,
259 void* unused,
260 int nUnused
263 BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
264 int* bzerror,
265 BZFILE* b
268 BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
269 int* bzerror,
270 BZFILE* b,
271 void** unused,
272 int* nUnused
275 BZ_EXTERN int BZ_API(BZ2_bzRead) (
276 int* bzerror,
277 BZFILE* b,
278 void* buf,
279 int len
282 BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
283 int* bzerror,
284 FILE* f,
285 int blockSize100k,
286 int verbosity,
287 int workFactor
290 BZ_EXTERN void BZ_API(BZ2_bzWrite) (
291 int* bzerror,
292 BZFILE* b,
293 void* buf,
294 int len
297 BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
298 int* bzerror,
299 BZFILE* b,
300 int abandon,
301 unsigned int* nbytes_in,
302 unsigned int* nbytes_out
305 BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
306 int* bzerror,
307 BZFILE* b,
308 int abandon,
309 unsigned int* nbytes_in_lo32,
310 unsigned int* nbytes_in_hi32,
311 unsigned int* nbytes_out_lo32,
312 unsigned int* nbytes_out_hi32
314 #endif
317 /*-- Utility functions --*/
319 BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
320 char* dest,
321 unsigned int* destLen,
322 char* source,
323 unsigned int sourceLen,
324 int blockSize100k,
325 int verbosity,
326 int workFactor
329 BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
330 char* dest,
331 unsigned int* destLen,
332 char* source,
333 unsigned int sourceLen,
334 int small,
335 int verbosity
339 /*--
340 Code contributed by Yoshioka Tsuneo
341 (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp),
342 to support better zlib compatibility.
343 This code is not _officially_ part of libbzip2 (yet);
344 I haven't tested it, documented it, or considered the
345 threading-safeness of it.
346 If this code breaks, please contact both Yoshioka and me.
347 --*/
349 BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
350 void
353 #ifndef BZ_NO_STDIO
354 BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
355 const char *path,
356 const char *mode
359 BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
360 int fd,
361 const char *mode
364 BZ_EXTERN int BZ_API(BZ2_bzread) (
365 BZFILE* b,
366 void* buf,
367 int len
370 BZ_EXTERN int BZ_API(BZ2_bzwrite) (
371 BZFILE* b,
372 void* buf,
373 int len
376 BZ_EXTERN int BZ_API(BZ2_bzflush) (
377 BZFILE* b
380 BZ_EXTERN void BZ_API(BZ2_bzclose) (
381 BZFILE* b
384 BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
385 BZFILE *b,
386 int *errnum
388 #endif
390 #ifdef __cplusplus
392 #endif
394 #endif
396 /*-------------------------------------------------------------*/
397 /*--- end bzlib.h ---*/
398 /*-------------------------------------------------------------*/
403 /*-- General stuff. --*/
405 #define BZ_VERSION "1.0.3, 17-Oct-2004"
407 typedef char Char;
408 typedef unsigned char Bool;
409 typedef unsigned char UChar;
410 typedef int Int32;
411 typedef unsigned int UInt32;
412 typedef short Int16;
413 typedef unsigned short UInt16;
415 #define True ((Bool)1)
416 #define False ((Bool)0)
418 #ifndef __GNUC__
419 #define __inline__ /* */
420 #endif
422 #ifndef BZ_NO_STDIO
423 extern void BZ2_bz__AssertH__fail ( int errcode );
424 #define AssertH(cond,errcode) \
425 { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
426 #if BZ_DEBUG
427 #define AssertD(cond,msg) \
428 { if (!(cond)) { \
429 fprintf ( stderr, \
430 "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
431 exit(1); \
433 #else
434 #define AssertD(cond,msg) /* */
435 #endif
436 #define VPrintf0(zf) \
437 fprintf(stderr,zf)
438 #define VPrintf1(zf,za1) \
439 fprintf(stderr,zf,za1)
440 #define VPrintf2(zf,za1,za2) \
441 fprintf(stderr,zf,za1,za2)
442 #define VPrintf3(zf,za1,za2,za3) \
443 fprintf(stderr,zf,za1,za2,za3)
444 #define VPrintf4(zf,za1,za2,za3,za4) \
445 fprintf(stderr,zf,za1,za2,za3,za4)
446 #define VPrintf5(zf,za1,za2,za3,za4,za5) \
447 fprintf(stderr,zf,za1,za2,za3,za4,za5)
448 #else
449 extern void bz_internal_error ( int errcode );
450 #define AssertH(cond,errcode) \
451 { if (!(cond)) bz_internal_error ( errcode ); }
452 #define AssertD(cond,msg) /* */
453 #define VPrintf0(zf) \
454 vexxx_printf(zf)
455 #define VPrintf1(zf,za1) \
456 vexxx_printf(zf,za1)
457 #define VPrintf2(zf,za1,za2) \
458 vexxx_printf(zf,za1,za2)
459 #define VPrintf3(zf,za1,za2,za3) \
460 vexxx_printf(zf,za1,za2,za3)
461 #define VPrintf4(zf,za1,za2,za3,za4) \
462 vexxx_printf(zf,za1,za2,za3,za4)
463 #define VPrintf5(zf,za1,za2,za3,za4,za5) \
464 vexxx_printf(zf,za1,za2,za3,za4,za5)
465 #endif
468 #define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
469 #define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp))
472 /*-- Header bytes. --*/
474 #define BZ_HDR_B 0x42 /* 'B' */
475 #define BZ_HDR_Z 0x5a /* 'Z' */
476 #define BZ_HDR_h 0x68 /* 'h' */
477 #define BZ_HDR_0 0x30 /* '0' */
479 /*-- Constants for the back end. --*/
481 #define BZ_MAX_ALPHA_SIZE 258
482 #define BZ_MAX_CODE_LEN 23
484 #define BZ_RUNA 0
485 #define BZ_RUNB 1
487 #define BZ_N_GROUPS 6
488 #define BZ_G_SIZE 50
489 #define BZ_N_ITERS 4
491 #define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
495 /*-- Stuff for randomising repetitive blocks. --*/
497 extern Int32 BZ2_rNums[512];
499 #define BZ_RAND_DECLS \
500 Int32 rNToGo; \
501 Int32 rTPos \
503 #define BZ_RAND_INIT_MASK \
504 s->rNToGo = 0; \
505 s->rTPos = 0 \
507 #define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
509 #define BZ_RAND_UPD_MASK \
510 if (s->rNToGo == 0) { \
511 s->rNToGo = BZ2_rNums[s->rTPos]; \
512 s->rTPos++; \
513 if (s->rTPos == 512) s->rTPos = 0; \
515 s->rNToGo--;
519 /*-- Stuff for doing CRCs. --*/
521 extern UInt32 BZ2_crc32Table[256];
523 #define BZ_INITIALISE_CRC(crcVar) \
525 crcVar = 0xffffffffL; \
528 #define BZ_FINALISE_CRC(crcVar) \
530 crcVar = ~(crcVar); \
533 #define BZ_UPDATE_CRC(crcVar,cha) \
535 crcVar = (crcVar << 8) ^ \
536 BZ2_crc32Table[(crcVar >> 24) ^ \
537 ((UChar)cha)]; \
542 /*-- States and modes for compression. --*/
544 #define BZ_M_IDLE 1
545 #define BZ_M_RUNNING 2
546 #define BZ_M_FLUSHING 3
547 #define BZ_M_FINISHING 4
549 #define BZ_S_OUTPUT 1
550 #define BZ_S_INPUT 2
552 #define BZ_N_RADIX 2
553 #define BZ_N_QSORT 12
554 #define BZ_N_SHELL 18
555 #define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
560 /*-- Structure holding all the compression-side stuff. --*/
562 typedef
563 struct {
564 /* pointer back to the struct bz_stream */
565 bz_stream* strm;
567 /* mode this stream is in, and whether inputting */
568 /* or outputting data */
569 Int32 mode;
570 Int32 state;
572 /* remembers avail_in when flush/finish requested */
573 UInt32 avail_in_expect;
575 /* for doing the block sorting */
576 UInt32* arr1;
577 UInt32* arr2;
578 UInt32* ftab;
579 Int32 origPtr;
581 /* aliases for arr1 and arr2 */
582 UInt32* ptr;
583 UChar* block;
584 UInt16* mtfv;
585 UChar* zbits;
587 /* for deciding when to use the fallback sorting algorithm */
588 Int32 workFactor;
590 /* run-length-encoding of the input */
591 UInt32 state_in_ch;
592 Int32 state_in_len;
593 BZ_RAND_DECLS;
595 /* input and output limits and current posns */
596 Int32 nblock;
597 Int32 nblockMAX;
598 Int32 numZ;
599 Int32 state_out_pos;
601 /* map of bytes used in block */
602 Int32 nInUse;
603 Bool inUse[256];
604 UChar unseqToSeq[256];
606 /* the buffer for bit stream creation */
607 UInt32 bsBuff;
608 Int32 bsLive;
610 /* block and combined CRCs */
611 UInt32 blockCRC;
612 UInt32 combinedCRC;
614 /* misc administratium */
615 Int32 verbosity;
616 Int32 blockNo;
617 Int32 blockSize100k;
619 /* stuff for coding the MTF values */
620 Int32 nMTF;
621 Int32 mtfFreq [BZ_MAX_ALPHA_SIZE];
622 UChar selector [BZ_MAX_SELECTORS];
623 UChar selectorMtf[BZ_MAX_SELECTORS];
625 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
626 Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
627 Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
628 /* second dimension: only 3 needed; 4 makes index calculations faster */
629 UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4];
632 EState;
636 /*-- externs for compression. --*/
638 extern void
639 BZ2_blockSort ( EState* );
641 extern void
642 BZ2_compressBlock ( EState*, Bool );
644 extern void
645 BZ2_bsInitWrite ( EState* );
647 extern void
648 BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
650 extern void
651 BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
655 /*-- states for decompression. --*/
657 #define BZ_X_IDLE 1
658 #define BZ_X_OUTPUT 2
660 #define BZ_X_MAGIC_1 10
661 #define BZ_X_MAGIC_2 11
662 #define BZ_X_MAGIC_3 12
663 #define BZ_X_MAGIC_4 13
664 #define BZ_X_BLKHDR_1 14
665 #define BZ_X_BLKHDR_2 15
666 #define BZ_X_BLKHDR_3 16
667 #define BZ_X_BLKHDR_4 17
668 #define BZ_X_BLKHDR_5 18
669 #define BZ_X_BLKHDR_6 19
670 #define BZ_X_BCRC_1 20
671 #define BZ_X_BCRC_2 21
672 #define BZ_X_BCRC_3 22
673 #define BZ_X_BCRC_4 23
674 #define BZ_X_RANDBIT 24
675 #define BZ_X_ORIGPTR_1 25
676 #define BZ_X_ORIGPTR_2 26
677 #define BZ_X_ORIGPTR_3 27
678 #define BZ_X_MAPPING_1 28
679 #define BZ_X_MAPPING_2 29
680 #define BZ_X_SELECTOR_1 30
681 #define BZ_X_SELECTOR_2 31
682 #define BZ_X_SELECTOR_3 32
683 #define BZ_X_CODING_1 33
684 #define BZ_X_CODING_2 34
685 #define BZ_X_CODING_3 35
686 #define BZ_X_MTF_1 36
687 #define BZ_X_MTF_2 37
688 #define BZ_X_MTF_3 38
689 #define BZ_X_MTF_4 39
690 #define BZ_X_MTF_5 40
691 #define BZ_X_MTF_6 41
692 #define BZ_X_ENDHDR_2 42
693 #define BZ_X_ENDHDR_3 43
694 #define BZ_X_ENDHDR_4 44
695 #define BZ_X_ENDHDR_5 45
696 #define BZ_X_ENDHDR_6 46
697 #define BZ_X_CCRC_1 47
698 #define BZ_X_CCRC_2 48
699 #define BZ_X_CCRC_3 49
700 #define BZ_X_CCRC_4 50
704 /*-- Constants for the fast MTF decoder. --*/
706 #define MTFA_SIZE 4096
707 #define MTFL_SIZE 16
711 /*-- Structure holding all the decompression-side stuff. --*/
713 typedef
714 struct {
715 /* pointer back to the struct bz_stream */
716 bz_stream* strm;
718 /* state indicator for this stream */
719 Int32 state;
721 /* for doing the final run-length decoding */
722 UChar state_out_ch;
723 Int32 state_out_len;
724 Bool blockRandomised;
725 BZ_RAND_DECLS;
727 /* the buffer for bit stream reading */
728 UInt32 bsBuff;
729 Int32 bsLive;
731 /* misc administratium */
732 Int32 blockSize100k;
733 Bool smallDecompress;
734 Int32 currBlockNo;
735 Int32 verbosity;
737 /* for undoing the Burrows-Wheeler transform */
738 Int32 origPtr;
739 UInt32 tPos;
740 Int32 k0;
741 Int32 unzftab[256];
742 Int32 nblock_used;
743 Int32 cftab[257];
744 Int32 cftabCopy[257];
746 /* for undoing the Burrows-Wheeler transform (FAST) */
747 UInt32 *tt;
749 /* for undoing the Burrows-Wheeler transform (SMALL) */
750 UInt16 *ll16;
751 UChar *ll4;
753 /* stored and calculated CRCs */
754 UInt32 storedBlockCRC;
755 UInt32 storedCombinedCRC;
756 UInt32 calculatedBlockCRC;
757 UInt32 calculatedCombinedCRC;
759 /* map of bytes used in block */
760 Int32 nInUse;
761 Bool inUse[256];
762 Bool inUse16[16];
763 UChar seqToUnseq[256];
765 /* for decoding the MTF values */
766 UChar mtfa [MTFA_SIZE];
767 Int32 mtfbase[256 / MTFL_SIZE];
768 UChar selector [BZ_MAX_SELECTORS];
769 UChar selectorMtf[BZ_MAX_SELECTORS];
770 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
772 Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
773 Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
774 Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
775 Int32 minLens[BZ_N_GROUPS];
777 /* save area for scalars in the main decompress code */
778 Int32 save_i;
779 Int32 save_j;
780 Int32 save_t;
781 Int32 save_alphaSize;
782 Int32 save_nGroups;
783 Int32 save_nSelectors;
784 Int32 save_EOB;
785 Int32 save_groupNo;
786 Int32 save_groupPos;
787 Int32 save_nextSym;
788 Int32 save_nblockMAX;
789 Int32 save_nblock;
790 Int32 save_es;
791 Int32 save_N;
792 Int32 save_curr;
793 Int32 save_zt;
794 Int32 save_zn;
795 Int32 save_zvec;
796 Int32 save_zj;
797 Int32 save_gSel;
798 Int32 save_gMinlen;
799 Int32* save_gLimit;
800 Int32* save_gBase;
801 Int32* save_gPerm;
804 DState;
808 /*-- Macros for decompression. --*/
810 #define BZ_GET_FAST(cccc) \
811 s->tPos = s->tt[s->tPos]; \
812 cccc = (UChar)(s->tPos & 0xff); \
813 s->tPos >>= 8;
815 #define BZ_GET_FAST_C(cccc) \
816 c_tPos = c_tt[c_tPos]; \
817 cccc = (UChar)(c_tPos & 0xff); \
818 c_tPos >>= 8;
820 #define SET_LL4(i,n) \
821 { if (((i) & 0x1) == 0) \
822 s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \
823 s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
826 #define GET_LL4(i) \
827 ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
829 #define SET_LL(i,n) \
830 { s->ll16[i] = (UInt16)(n & 0x0000ffff); \
831 SET_LL4(i, n >> 16); \
834 #define GET_LL(i) \
835 (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
837 #define BZ_GET_SMALL(cccc) \
838 cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \
839 s->tPos = GET_LL(s->tPos);
842 /*-- externs for decompression. --*/
844 extern Int32
845 BZ2_indexIntoF ( Int32, Int32* );
847 extern Int32
848 BZ2_decompress ( DState* );
850 extern void
851 BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
852 Int32, Int32, Int32 );
855 #endif
858 /*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
860 #ifdef BZ_NO_STDIO
861 #ifndef NULL
862 #define NULL 0
863 #endif
864 #endif
867 /*-------------------------------------------------------------*/
868 /*--- end bzlib_private.h ---*/
869 /*-------------------------------------------------------------*/
872 /* Something which has the same size as void* on the host. That is,
873 it is 32 bits on a 32-bit host and 64 bits on a 64-bit host, and so
874 it can safely be coerced to and from a pointer type on the host
875 machine. */
876 typedef unsigned long HWord;
877 typedef char HChar;
878 typedef signed int Int;
879 typedef unsigned int UInt;
881 typedef signed long long int Long;
882 typedef unsigned long long int ULong;
885 /////////////////////////////////////////////////////////////////////
886 /////////////////////////////////////////////////////////////////////
888 //#include "/home/sewardj/VEX/trunk/pub/libvex_basictypes.h"
890 static HWord (*serviceFn)(HWord,HWord) = 0;
893 static char* my_strcpy ( char* dest, const char* src )
895 char* dest_orig = dest;
896 while (*src) *dest++ = *src++;
897 *dest = 0;
898 return dest_orig;
901 static void* my_memcpy ( void *dest, const void *src, int sz )
903 const char *s = (const char *)src;
904 char *d = (char *)dest;
906 while (sz--)
907 *d++ = *s++;
909 return dest;
912 static void* my_memmove( void *dst, const void *src, unsigned int len )
914 register char *d;
915 register char *s;
916 if ( dst > src ) {
917 d = (char *)dst + len - 1;
918 s = (char *)src + len - 1;
919 while ( len >= 4 ) {
920 *d-- = *s--;
921 *d-- = *s--;
922 *d-- = *s--;
923 *d-- = *s--;
924 len -= 4;
926 while ( len-- ) {
927 *d-- = *s--;
929 } else if ( dst < src ) {
930 d = (char *)dst;
931 s = (char *)src;
932 while ( len >= 4 ) {
933 *d++ = *s++;
934 *d++ = *s++;
935 *d++ = *s++;
936 *d++ = *s++;
937 len -= 4;
939 while ( len-- ) {
940 *d++ = *s++;
943 return dst;
946 char* my_strcat ( char* dest, const char* src )
948 char* dest_orig = dest;
949 while (*dest) dest++;
950 while (*src) *dest++ = *src++;
951 *dest = 0;
952 return dest_orig;
956 /////////////////////////////////////////////////////////////////////
958 static void vexxx_log_bytes ( char* p, int n )
960 int i;
961 for (i = 0; i < n; i++)
962 (*serviceFn)( 1, (int)p[i] );
965 /*---------------------------------------------------------*/
966 /*--- vexxx_printf ---*/
967 /*---------------------------------------------------------*/
969 /* This should be the only <...> include in the entire VEX library.
970 New code for vexxx_util.c should go above this point. */
971 #include <stdarg.h>
973 static HChar vexxx_toupper ( HChar c )
975 if (c >= 'a' && c <= 'z')
976 return c + ('A' - 'a');
977 else
978 return c;
981 static Int vexxx_strlen ( const HChar* str )
983 Int i = 0;
984 while (str[i] != 0) i++;
985 return i;
988 Bool vexxx_streq ( const HChar* s1, const HChar* s2 )
990 while (True) {
991 if (*s1 == 0 && *s2 == 0)
992 return True;
993 if (*s1 != *s2)
994 return False;
995 s1++;
996 s2++;
1000 /* Some flags. */
1001 #define VG_MSG_SIGNED 1 /* The value is signed. */
1002 #define VG_MSG_ZJUSTIFY 2 /* Must justify with '0'. */
1003 #define VG_MSG_LJUSTIFY 4 /* Must justify on the left. */
1004 #define VG_MSG_PAREN 8 /* Parenthesize if present (for %y) */
1005 #define VG_MSG_COMMA 16 /* Add commas to numbers (for %d, %u) */
1007 /* Copy a string into the buffer. */
1008 static UInt
1009 myvprintf_str ( void(*send)(HChar), Int flags, Int width, HChar* str,
1010 Bool capitalise )
1012 # define MAYBE_TOUPPER(ch) (capitalise ? vexxx_toupper(ch) : (ch))
1013 UInt ret = 0;
1014 Int i, extra;
1015 Int len = vexxx_strlen(str);
1017 if (width == 0) {
1018 ret += len;
1019 for (i = 0; i < len; i++)
1020 send(MAYBE_TOUPPER(str[i]));
1021 return ret;
1024 if (len > width) {
1025 ret += width;
1026 for (i = 0; i < width; i++)
1027 send(MAYBE_TOUPPER(str[i]));
1028 return ret;
1031 extra = width - len;
1032 if (flags & VG_MSG_LJUSTIFY) {
1033 ret += extra;
1034 for (i = 0; i < extra; i++)
1035 send(' ');
1037 ret += len;
1038 for (i = 0; i < len; i++)
1039 send(MAYBE_TOUPPER(str[i]));
1040 if (!(flags & VG_MSG_LJUSTIFY)) {
1041 ret += extra;
1042 for (i = 0; i < extra; i++)
1043 send(' ');
1046 # undef MAYBE_TOUPPER
1048 return ret;
1051 /* Write P into the buffer according to these args:
1052 * If SIGN is true, p is a signed.
1053 * BASE is the base.
1054 * If WITH_ZERO is true, '0' must be added.
1055 * WIDTH is the width of the field.
1057 static UInt
1058 myvprintf_int64 ( void(*send)(HChar), Int flags, Int base, Int width, ULong pL)
1060 HChar buf[40];
1061 Int ind = 0;
1062 Int i, nc = 0;
1063 Bool neg = False;
1064 HChar *digits = "0123456789ABCDEF";
1065 UInt ret = 0;
1066 UInt p = (UInt)pL;
1068 if (base < 2 || base > 16)
1069 return ret;
1071 if ((flags & VG_MSG_SIGNED) && (Int)p < 0) {
1072 p = - (Int)p;
1073 neg = True;
1076 if (p == 0)
1077 buf[ind++] = '0';
1078 else {
1079 while (p > 0) {
1080 if ((flags & VG_MSG_COMMA) && 10 == base &&
1081 0 == (ind-nc) % 3 && 0 != ind)
1083 buf[ind++] = ',';
1084 nc++;
1086 buf[ind++] = digits[p % base];
1087 p /= base;
1091 if (neg)
1092 buf[ind++] = '-';
1094 if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) {
1095 for(; ind < width; ind++) {
1096 //vassert(ind < 39);
1097 buf[ind] = ((flags & VG_MSG_ZJUSTIFY) ? '0': ' ');
1101 /* Reverse copy to buffer. */
1102 ret += ind;
1103 for (i = ind -1; i >= 0; i--) {
1104 send(buf[i]);
1106 if (width > 0 && (flags & VG_MSG_LJUSTIFY)) {
1107 for(; ind < width; ind++) {
1108 ret++;
1109 send(' '); // Never pad with zeroes on RHS -- changes the value!
1112 return ret;
1116 /* A simple vprintf(). */
1117 static
1118 UInt vprintf_wrk ( void(*send)(HChar), const HChar *format, va_list vargs )
1120 UInt ret = 0;
1121 int i;
1122 int flags;
1123 int width;
1124 Bool is_long;
1126 /* We assume that vargs has already been initialised by the
1127 caller, using va_start, and that the caller will similarly
1128 clean up with va_end.
1131 for (i = 0; format[i] != 0; i++) {
1132 if (format[i] != '%') {
1133 send(format[i]);
1134 ret++;
1135 continue;
1137 i++;
1138 /* A '%' has been found. Ignore a trailing %. */
1139 if (format[i] == 0)
1140 break;
1141 if (format[i] == '%') {
1142 /* `%%' is replaced by `%'. */
1143 send('%');
1144 ret++;
1145 continue;
1147 flags = 0;
1148 is_long = False;
1149 width = 0; /* length of the field. */
1150 if (format[i] == '(') {
1151 flags |= VG_MSG_PAREN;
1152 i++;
1154 /* If ',' follows '%', commas will be inserted. */
1155 if (format[i] == ',') {
1156 flags |= VG_MSG_COMMA;
1157 i++;
1159 /* If '-' follows '%', justify on the left. */
1160 if (format[i] == '-') {
1161 flags |= VG_MSG_LJUSTIFY;
1162 i++;
1164 /* If '0' follows '%', pads will be inserted. */
1165 if (format[i] == '0') {
1166 flags |= VG_MSG_ZJUSTIFY;
1167 i++;
1169 /* Compute the field length. */
1170 while (format[i] >= '0' && format[i] <= '9') {
1171 width *= 10;
1172 width += format[i++] - '0';
1174 while (format[i] == 'l') {
1175 i++;
1176 is_long = True;
1179 switch (format[i]) {
1180 case 'd': /* %d */
1181 flags |= VG_MSG_SIGNED;
1182 if (is_long)
1183 ret += myvprintf_int64(send, flags, 10, width,
1184 (ULong)(va_arg (vargs, Long)));
1185 else
1186 ret += myvprintf_int64(send, flags, 10, width,
1187 (ULong)(va_arg (vargs, Int)));
1188 break;
1189 case 'u': /* %u */
1190 if (is_long)
1191 ret += myvprintf_int64(send, flags, 10, width,
1192 (ULong)(va_arg (vargs, ULong)));
1193 else
1194 ret += myvprintf_int64(send, flags, 10, width,
1195 (ULong)(va_arg (vargs, UInt)));
1196 break;
1197 case 'p': /* %p */
1198 ret += 2;
1199 send('0');
1200 send('x');
1201 ret += myvprintf_int64(send, flags, 16, width,
1202 (ULong)((HWord)va_arg (vargs, void *)));
1203 break;
1204 case 'x': /* %x */
1205 if (is_long)
1206 ret += myvprintf_int64(send, flags, 16, width,
1207 (ULong)(va_arg (vargs, ULong)));
1208 else
1209 ret += myvprintf_int64(send, flags, 16, width,
1210 (ULong)(va_arg (vargs, UInt)));
1211 break;
1212 case 'c': /* %c */
1213 ret++;
1214 send((va_arg (vargs, int)));
1215 break;
1216 case 's': case 'S': { /* %s */
1217 char *str = va_arg (vargs, char *);
1218 if (str == (char*) 0) str = "(null)";
1219 ret += myvprintf_str(send, flags, width, str,
1220 (format[i]=='S'));
1221 break;
1223 # if 0
1224 case 'y': { /* %y - print symbol */
1225 Addr a = va_arg(vargs, Addr);
1227 HChar *name;
1228 if (VG_(get_fnname_w_offset)(a, &name)) {
1229 HChar buf[1 + VG_strlen(name) + 1 + 1];
1230 if (flags & VG_MSG_PAREN) {
1231 VG_(sprintf)(str, "(%s)", name):
1232 } else {
1233 VG_(sprintf)(str, "%s", name):
1235 ret += myvprintf_str(send, flags, width, buf, 0);
1237 break;
1239 # endif
1240 default:
1241 break;
1244 return ret;
1248 /* A general replacement for printf(). Note that only low-level
1249 debugging info should be sent via here. The official route is to
1250 to use vg_message(). This interface is deprecated.
1252 static HChar myprintf_buf[1000];
1253 static Int n_myprintf_buf;
1255 static void add_to_myprintf_buf ( HChar c )
1257 if (c == '\n' || n_myprintf_buf >= 1000-10 /*paranoia*/ ) {
1258 (*vexxx_log_bytes)( myprintf_buf, vexxx_strlen(myprintf_buf) );
1259 n_myprintf_buf = 0;
1260 myprintf_buf[n_myprintf_buf] = 0;
1262 myprintf_buf[n_myprintf_buf++] = c;
1263 myprintf_buf[n_myprintf_buf] = 0;
1266 static UInt vexxx_printf ( const char *format, ... )
1268 UInt ret;
1269 va_list vargs;
1270 va_start(vargs,format);
1272 n_myprintf_buf = 0;
1273 myprintf_buf[n_myprintf_buf] = 0;
1274 ret = vprintf_wrk ( add_to_myprintf_buf, format, vargs );
1276 if (n_myprintf_buf > 0) {
1277 (*vexxx_log_bytes)( myprintf_buf, n_myprintf_buf );
1280 va_end(vargs);
1282 return ret;
1285 /*---------------------------------------------------------------*/
1286 /*--- end vexxx_util.c ---*/
1287 /*---------------------------------------------------------------*/
1290 /////////////////////////////////////////////////////////////////////
1291 /////////////////////////////////////////////////////////////////////
1292 /////////////////////////////////////////////////////////////////////
1293 /////////////////////////////////////////////////////////////////////
1296 /*-------------------------------------------------------------*/
1297 /*--- Decompression machinery ---*/
1298 /*--- decompress.c ---*/
1299 /*-------------------------------------------------------------*/
1301 /*--
1302 This file is a part of bzip2 and/or libbzip2, a program and
1303 library for lossless, block-sorting data compression.
1305 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
1307 Redistribution and use in source and binary forms, with or without
1308 modification, are permitted provided that the following conditions
1309 are met:
1311 1. Redistributions of source code must retain the above copyright
1312 notice, this list of conditions and the following disclaimer.
1314 2. The origin of this software must not be misrepresented; you must
1315 not claim that you wrote the original software. If you use this
1316 software in a product, an acknowledgment in the product
1317 documentation would be appreciated but is not required.
1319 3. Altered source versions must be plainly marked as such, and must
1320 not be misrepresented as being the original software.
1322 4. The name of the author may not be used to endorse or promote
1323 products derived from this software without specific prior written
1324 permission.
1326 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
1327 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1328 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1329 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
1330 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1331 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
1332 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1333 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1334 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
1335 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1336 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1338 Julian Seward, Cambridge, UK.
1339 jseward@bzip.org
1340 bzip2/libbzip2 version 1.0 of 21 March 2000
1342 This program is based on (at least) the work of:
1343 Mike Burrows
1344 David Wheeler
1345 Peter Fenwick
1346 Alistair Moffat
1347 Radford Neal
1348 Ian H. Witten
1349 Robert Sedgewick
1350 Jon L. Bentley
1352 For more information on these sources, see the manual.
1353 --*/
1358 /*---------------------------------------------------*/
1359 static
1360 void makeMaps_d ( DState* s )
1362 Int32 i;
1363 s->nInUse = 0;
1364 for (i = 0; i < 256; i++)
1365 if (s->inUse[i]) {
1366 s->seqToUnseq[s->nInUse] = i;
1367 s->nInUse++;
1372 /*---------------------------------------------------*/
1373 #define RETURN(rrr) \
1374 { retVal = rrr; goto save_state_and_return; };
1376 #define GET_BITS(lll,vvv,nnn) \
1377 case lll: s->state = lll; \
1378 while (True) { \
1379 if (s->bsLive >= nnn) { \
1380 UInt32 v; \
1381 v = (s->bsBuff >> \
1382 (s->bsLive-nnn)) & ((1 << nnn)-1); \
1383 s->bsLive -= nnn; \
1384 vvv = v; \
1385 break; \
1387 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
1388 s->bsBuff \
1389 = (s->bsBuff << 8) | \
1390 ((UInt32) \
1391 (*((UChar*)(s->strm->next_in)))); \
1392 s->bsLive += 8; \
1393 s->strm->next_in++; \
1394 s->strm->avail_in--; \
1395 s->strm->total_in_lo32++; \
1396 if (s->strm->total_in_lo32 == 0) \
1397 s->strm->total_in_hi32++; \
1400 #define GET_UCHAR(lll,uuu) \
1401 GET_BITS(lll,uuu,8)
1403 #define GET_BIT(lll,uuu) \
1404 GET_BITS(lll,uuu,1)
1406 /*---------------------------------------------------*/
1407 #define GET_MTF_VAL(label1,label2,lval) \
1409 if (groupPos == 0) { \
1410 groupNo++; \
1411 if (groupNo >= nSelectors) \
1412 RETURN(BZ_DATA_ERROR); \
1413 groupPos = BZ_G_SIZE; \
1414 gSel = s->selector[groupNo]; \
1415 gMinlen = s->minLens[gSel]; \
1416 gLimit = &(s->limit[gSel][0]); \
1417 gPerm = &(s->perm[gSel][0]); \
1418 gBase = &(s->base[gSel][0]); \
1420 groupPos--; \
1421 zn = gMinlen; \
1422 GET_BITS(label1, zvec, zn); \
1423 while (1) { \
1424 if (zn > 20 /* the longest code */) \
1425 RETURN(BZ_DATA_ERROR); \
1426 if (zvec <= gLimit[zn]) break; \
1427 zn++; \
1428 GET_BIT(label2, zj); \
1429 zvec = (zvec << 1) | zj; \
1430 }; \
1431 if (zvec - gBase[zn] < 0 \
1432 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
1433 RETURN(BZ_DATA_ERROR); \
1434 lval = gPerm[zvec - gBase[zn]]; \
1439 /*---------------------------------------------------*/
1440 __inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
1442 Int32 nb, na, mid;
1443 nb = 0;
1444 na = 256;
1445 do {
1446 mid = (nb + na) >> 1;
1447 if (indx >= cftab[mid]) nb = mid; else na = mid;
1449 while (na - nb != 1);
1450 return nb;
1453 /*---------------------------------------------------*/
1454 Int32 BZ2_decompress ( DState* s )
1456 UChar uc;
1457 Int32 retVal;
1458 Int32 minLen, maxLen;
1459 bz_stream* strm = s->strm;
1461 /* stuff that needs to be saved/restored */
1462 Int32 i;
1463 Int32 j;
1464 Int32 t;
1465 Int32 alphaSize;
1466 Int32 nGroups;
1467 Int32 nSelectors;
1468 Int32 EOB;
1469 Int32 groupNo;
1470 Int32 groupPos;
1471 Int32 nextSym;
1472 Int32 nblockMAX;
1473 Int32 nblock;
1474 Int32 es;
1475 Int32 N;
1476 Int32 curr;
1477 Int32 zt;
1478 Int32 zn;
1479 Int32 zvec;
1480 Int32 zj;
1481 Int32 gSel;
1482 Int32 gMinlen;
1483 Int32* gLimit;
1484 Int32* gBase;
1485 Int32* gPerm;
1487 if (s->state == BZ_X_MAGIC_1) {
1488 /*initialise the save area*/
1489 s->save_i = 0;
1490 s->save_j = 0;
1491 s->save_t = 0;
1492 s->save_alphaSize = 0;
1493 s->save_nGroups = 0;
1494 s->save_nSelectors = 0;
1495 s->save_EOB = 0;
1496 s->save_groupNo = 0;
1497 s->save_groupPos = 0;
1498 s->save_nextSym = 0;
1499 s->save_nblockMAX = 0;
1500 s->save_nblock = 0;
1501 s->save_es = 0;
1502 s->save_N = 0;
1503 s->save_curr = 0;
1504 s->save_zt = 0;
1505 s->save_zn = 0;
1506 s->save_zvec = 0;
1507 s->save_zj = 0;
1508 s->save_gSel = 0;
1509 s->save_gMinlen = 0;
1510 s->save_gLimit = NULL;
1511 s->save_gBase = NULL;
1512 s->save_gPerm = NULL;
1515 /*restore from the save area*/
1516 i = s->save_i;
1517 j = s->save_j;
1518 t = s->save_t;
1519 alphaSize = s->save_alphaSize;
1520 nGroups = s->save_nGroups;
1521 nSelectors = s->save_nSelectors;
1522 EOB = s->save_EOB;
1523 groupNo = s->save_groupNo;
1524 groupPos = s->save_groupPos;
1525 nextSym = s->save_nextSym;
1526 nblockMAX = s->save_nblockMAX;
1527 nblock = s->save_nblock;
1528 es = s->save_es;
1529 N = s->save_N;
1530 curr = s->save_curr;
1531 zt = s->save_zt;
1532 zn = s->save_zn;
1533 zvec = s->save_zvec;
1534 zj = s->save_zj;
1535 gSel = s->save_gSel;
1536 gMinlen = s->save_gMinlen;
1537 gLimit = s->save_gLimit;
1538 gBase = s->save_gBase;
1539 gPerm = s->save_gPerm;
1541 retVal = BZ_OK;
1543 switch (s->state) {
1545 GET_UCHAR(BZ_X_MAGIC_1, uc);
1546 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
1548 GET_UCHAR(BZ_X_MAGIC_2, uc);
1549 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
1551 GET_UCHAR(BZ_X_MAGIC_3, uc)
1552 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
1554 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
1555 if (s->blockSize100k < (BZ_HDR_0 + 1) ||
1556 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
1557 s->blockSize100k -= BZ_HDR_0;
1559 if (s->smallDecompress) {
1560 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
1561 s->ll4 = BZALLOC(
1562 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
1564 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
1565 } else {
1566 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
1567 if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
1570 GET_UCHAR(BZ_X_BLKHDR_1, uc);
1572 if (uc == 0x17) goto endhdr_2;
1573 if (uc != 0x31) RETURN(BZ_DATA_ERROR);
1574 GET_UCHAR(BZ_X_BLKHDR_2, uc);
1575 if (uc != 0x41) RETURN(BZ_DATA_ERROR);
1576 GET_UCHAR(BZ_X_BLKHDR_3, uc);
1577 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
1578 GET_UCHAR(BZ_X_BLKHDR_4, uc);
1579 if (uc != 0x26) RETURN(BZ_DATA_ERROR);
1580 GET_UCHAR(BZ_X_BLKHDR_5, uc);
1581 if (uc != 0x53) RETURN(BZ_DATA_ERROR);
1582 GET_UCHAR(BZ_X_BLKHDR_6, uc);
1583 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
1585 s->currBlockNo++;
1586 if (s->verbosity >= 2)
1587 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
1589 s->storedBlockCRC = 0;
1590 GET_UCHAR(BZ_X_BCRC_1, uc);
1591 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
1592 GET_UCHAR(BZ_X_BCRC_2, uc);
1593 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
1594 GET_UCHAR(BZ_X_BCRC_3, uc);
1595 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
1596 GET_UCHAR(BZ_X_BCRC_4, uc);
1597 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
1599 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
1601 s->origPtr = 0;
1602 GET_UCHAR(BZ_X_ORIGPTR_1, uc);
1603 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
1604 GET_UCHAR(BZ_X_ORIGPTR_2, uc);
1605 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
1606 GET_UCHAR(BZ_X_ORIGPTR_3, uc);
1607 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
1609 if (s->origPtr < 0)
1610 RETURN(BZ_DATA_ERROR);
1611 if (s->origPtr > 10 + 100000*s->blockSize100k)
1612 RETURN(BZ_DATA_ERROR);
1614 /*--- Receive the mapping table ---*/
1615 for (i = 0; i < 16; i++) {
1616 GET_BIT(BZ_X_MAPPING_1, uc);
1617 if (uc == 1)
1618 s->inUse16[i] = True; else
1619 s->inUse16[i] = False;
1622 for (i = 0; i < 256; i++) s->inUse[i] = False;
1624 for (i = 0; i < 16; i++)
1625 if (s->inUse16[i])
1626 for (j = 0; j < 16; j++) {
1627 GET_BIT(BZ_X_MAPPING_2, uc);
1628 if (uc == 1) s->inUse[i * 16 + j] = True;
1630 makeMaps_d ( s );
1631 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
1632 alphaSize = s->nInUse+2;
1634 /*--- Now the selectors ---*/
1635 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
1636 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
1637 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
1638 if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
1639 for (i = 0; i < nSelectors; i++) {
1640 j = 0;
1641 while (True) {
1642 GET_BIT(BZ_X_SELECTOR_3, uc);
1643 if (uc == 0) break;
1644 j++;
1645 if (j >= nGroups) RETURN(BZ_DATA_ERROR);
1647 s->selectorMtf[i] = j;
1650 /*--- Undo the MTF values for the selectors. ---*/
1652 UChar pos[BZ_N_GROUPS], tmp, v;
1653 for (v = 0; v < nGroups; v++) pos[v] = v;
1655 for (i = 0; i < nSelectors; i++) {
1656 v = s->selectorMtf[i];
1657 tmp = pos[v];
1658 while (v > 0) { pos[v] = pos[v-1]; v--; }
1659 pos[0] = tmp;
1660 s->selector[i] = tmp;
1664 /*--- Now the coding tables ---*/
1665 for (t = 0; t < nGroups; t++) {
1666 GET_BITS(BZ_X_CODING_1, curr, 5);
1667 for (i = 0; i < alphaSize; i++) {
1668 while (True) {
1669 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
1670 GET_BIT(BZ_X_CODING_2, uc);
1671 if (uc == 0) break;
1672 GET_BIT(BZ_X_CODING_3, uc);
1673 if (uc == 0) curr++; else curr--;
1675 s->len[t][i] = curr;
1679 /*--- Create the Huffman decoding tables ---*/
1680 for (t = 0; t < nGroups; t++) {
1681 minLen = 32;
1682 maxLen = 0;
1683 for (i = 0; i < alphaSize; i++) {
1684 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
1685 if (s->len[t][i] < minLen) minLen = s->len[t][i];
1687 BZ2_hbCreateDecodeTables (
1688 &(s->limit[t][0]),
1689 &(s->base[t][0]),
1690 &(s->perm[t][0]),
1691 &(s->len[t][0]),
1692 minLen, maxLen, alphaSize
1694 s->minLens[t] = minLen;
1697 /*--- Now the MTF values ---*/
1699 EOB = s->nInUse+1;
1700 nblockMAX = 100000 * s->blockSize100k;
1701 groupNo = -1;
1702 groupPos = 0;
1704 for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
1706 /*-- MTF init --*/
1708 Int32 ii, jj, kk;
1709 kk = MTFA_SIZE-1;
1710 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
1711 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
1712 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
1713 kk--;
1715 s->mtfbase[ii] = kk + 1;
1718 /*-- end MTF init --*/
1720 nblock = 0;
1721 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
1723 while (True) {
1725 if (nextSym == EOB) break;
1727 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
1729 es = -1;
1730 N = 1;
1731 do {
1732 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
1733 if (nextSym == BZ_RUNB) es = es + (1+1) * N;
1734 N = N * 2;
1735 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
1737 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
1739 es++;
1740 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
1741 s->unzftab[uc] += es;
1743 if (s->smallDecompress)
1744 while (es > 0) {
1745 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
1746 s->ll16[nblock] = (UInt16)uc;
1747 nblock++;
1748 es--;
1750 else
1751 while (es > 0) {
1752 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
1753 s->tt[nblock] = (UInt32)uc;
1754 nblock++;
1755 es--;
1758 continue;
1760 } else {
1762 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
1764 /*-- uc = MTF ( nextSym-1 ) --*/
1766 Int32 ii, jj, kk, pp, lno, off;
1767 UInt32 nn;
1768 nn = (UInt32)(nextSym - 1);
1770 if (nn < MTFL_SIZE) {
1771 /* avoid general-case expense */
1772 pp = s->mtfbase[0];
1773 uc = s->mtfa[pp+nn];
1774 while (nn > 3) {
1775 Int32 z = pp+nn;
1776 s->mtfa[(z) ] = s->mtfa[(z)-1];
1777 s->mtfa[(z)-1] = s->mtfa[(z)-2];
1778 s->mtfa[(z)-2] = s->mtfa[(z)-3];
1779 s->mtfa[(z)-3] = s->mtfa[(z)-4];
1780 nn -= 4;
1782 while (nn > 0) {
1783 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
1785 s->mtfa[pp] = uc;
1786 } else {
1787 /* general case */
1788 lno = nn / MTFL_SIZE;
1789 off = nn % MTFL_SIZE;
1790 pp = s->mtfbase[lno] + off;
1791 uc = s->mtfa[pp];
1792 while (pp > s->mtfbase[lno]) {
1793 s->mtfa[pp] = s->mtfa[pp-1]; pp--;
1795 s->mtfbase[lno]++;
1796 while (lno > 0) {
1797 s->mtfbase[lno]--;
1798 s->mtfa[s->mtfbase[lno]]
1799 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
1800 lno--;
1802 s->mtfbase[0]--;
1803 s->mtfa[s->mtfbase[0]] = uc;
1804 if (s->mtfbase[0] == 0) {
1805 kk = MTFA_SIZE-1;
1806 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
1807 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
1808 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
1809 kk--;
1811 s->mtfbase[ii] = kk + 1;
1816 /*-- end uc = MTF ( nextSym-1 ) --*/
1818 s->unzftab[s->seqToUnseq[uc]]++;
1819 if (s->smallDecompress)
1820 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
1821 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
1822 nblock++;
1824 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
1825 continue;
1829 /* Now we know what nblock is, we can do a better sanity
1830 check on s->origPtr.
1832 if (s->origPtr < 0 || s->origPtr >= nblock)
1833 RETURN(BZ_DATA_ERROR);
1835 /*-- Set up cftab to facilitate generation of T^(-1) --*/
1836 s->cftab[0] = 0;
1837 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
1838 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
1839 for (i = 0; i <= 256; i++) {
1840 if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
1841 /* s->cftab[i] can legitimately be == nblock */
1842 RETURN(BZ_DATA_ERROR);
1846 s->state_out_len = 0;
1847 s->state_out_ch = 0;
1848 BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
1849 s->state = BZ_X_OUTPUT;
1850 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
1852 if (s->smallDecompress) {
1854 /*-- Make a copy of cftab, used in generation of T --*/
1855 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
1857 /*-- compute the T vector --*/
1858 for (i = 0; i < nblock; i++) {
1859 uc = (UChar)(s->ll16[i]);
1860 SET_LL(i, s->cftabCopy[uc]);
1861 s->cftabCopy[uc]++;
1864 /*-- Compute T^(-1) by pointer reversal on T --*/
1865 i = s->origPtr;
1866 j = GET_LL(i);
1867 do {
1868 Int32 tmp = GET_LL(j);
1869 SET_LL(j, i);
1870 i = j;
1871 j = tmp;
1873 while (i != s->origPtr);
1875 s->tPos = s->origPtr;
1876 s->nblock_used = 0;
1877 if (s->blockRandomised) {
1878 BZ_RAND_INIT_MASK;
1879 BZ_GET_SMALL(s->k0); s->nblock_used++;
1880 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
1881 } else {
1882 BZ_GET_SMALL(s->k0); s->nblock_used++;
1885 } else {
1887 /*-- compute the T^(-1) vector --*/
1888 for (i = 0; i < nblock; i++) {
1889 uc = (UChar)(s->tt[i] & 0xff);
1890 s->tt[s->cftab[uc]] |= (i << 8);
1891 s->cftab[uc]++;
1894 s->tPos = s->tt[s->origPtr] >> 8;
1895 s->nblock_used = 0;
1896 if (s->blockRandomised) {
1897 BZ_RAND_INIT_MASK;
1898 BZ_GET_FAST(s->k0); s->nblock_used++;
1899 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
1900 } else {
1901 BZ_GET_FAST(s->k0); s->nblock_used++;
1906 RETURN(BZ_OK);
1910 endhdr_2:
1912 GET_UCHAR(BZ_X_ENDHDR_2, uc);
1913 if (uc != 0x72) RETURN(BZ_DATA_ERROR);
1914 GET_UCHAR(BZ_X_ENDHDR_3, uc);
1915 if (uc != 0x45) RETURN(BZ_DATA_ERROR);
1916 GET_UCHAR(BZ_X_ENDHDR_4, uc);
1917 if (uc != 0x38) RETURN(BZ_DATA_ERROR);
1918 GET_UCHAR(BZ_X_ENDHDR_5, uc);
1919 if (uc != 0x50) RETURN(BZ_DATA_ERROR);
1920 GET_UCHAR(BZ_X_ENDHDR_6, uc);
1921 if (uc != 0x90) RETURN(BZ_DATA_ERROR);
1923 s->storedCombinedCRC = 0;
1924 GET_UCHAR(BZ_X_CCRC_1, uc);
1925 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
1926 GET_UCHAR(BZ_X_CCRC_2, uc);
1927 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
1928 GET_UCHAR(BZ_X_CCRC_3, uc);
1929 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
1930 GET_UCHAR(BZ_X_CCRC_4, uc);
1931 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
1933 s->state = BZ_X_IDLE;
1934 RETURN(BZ_STREAM_END);
1936 default: AssertH ( False, 4001 );
1939 AssertH ( False, 4002 );
1941 save_state_and_return:
1943 s->save_i = i;
1944 s->save_j = j;
1945 s->save_t = t;
1946 s->save_alphaSize = alphaSize;
1947 s->save_nGroups = nGroups;
1948 s->save_nSelectors = nSelectors;
1949 s->save_EOB = EOB;
1950 s->save_groupNo = groupNo;
1951 s->save_groupPos = groupPos;
1952 s->save_nextSym = nextSym;
1953 s->save_nblockMAX = nblockMAX;
1954 s->save_nblock = nblock;
1955 s->save_es = es;
1956 s->save_N = N;
1957 s->save_curr = curr;
1958 s->save_zt = zt;
1959 s->save_zn = zn;
1960 s->save_zvec = zvec;
1961 s->save_zj = zj;
1962 s->save_gSel = gSel;
1963 s->save_gMinlen = gMinlen;
1964 s->save_gLimit = gLimit;
1965 s->save_gBase = gBase;
1966 s->save_gPerm = gPerm;
1968 return retVal;
1972 /*-------------------------------------------------------------*/
1973 /*--- end decompress.c ---*/
1974 /*-------------------------------------------------------------*/
1976 /*-------------------------------------------------------------*/
1977 /*--- Block sorting machinery ---*/
1978 /*--- blocksort.c ---*/
1979 /*-------------------------------------------------------------*/
1981 /*--
1982 This file is a part of bzip2 and/or libbzip2, a program and
1983 library for lossless, block-sorting data compression.
1985 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
1987 Redistribution and use in source and binary forms, with or without
1988 modification, are permitted provided that the following conditions
1989 are met:
1991 1. Redistributions of source code must retain the above copyright
1992 notice, this list of conditions and the following disclaimer.
1994 2. The origin of this software must not be misrepresented; you must
1995 not claim that you wrote the original software. If you use this
1996 software in a product, an acknowledgment in the product
1997 documentation would be appreciated but is not required.
1999 3. Altered source versions must be plainly marked as such, and must
2000 not be misrepresented as being the original software.
2002 4. The name of the author may not be used to endorse or promote
2003 products derived from this software without specific prior written
2004 permission.
2006 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
2007 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
2008 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2009 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
2010 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2011 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
2012 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2013 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
2014 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2015 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2016 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2018 Julian Seward, Cambridge, UK.
2019 jseward@bzip.org
2020 bzip2/libbzip2 version 1.0 of 21 March 2000
2022 This program is based on (at least) the work of:
2023 Mike Burrows
2024 David Wheeler
2025 Peter Fenwick
2026 Alistair Moffat
2027 Radford Neal
2028 Ian H. Witten
2029 Robert Sedgewick
2030 Jon L. Bentley
2032 For more information on these sources, see the manual.
2034 To get some idea how the block sorting algorithms in this file
2035 work, read my paper
2036 On the Performance of BWT Sorting Algorithms
2037 in Proceedings of the IEEE Data Compression Conference 2000,
2038 Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
2039 file implements the algorithm called cache in the paper.
2040 --*/
2044 /*---------------------------------------------*/
2045 /*--- Fallback O(N log(N)^2) sorting ---*/
2046 /*--- algorithm, for repetitive blocks ---*/
2047 /*---------------------------------------------*/
2049 /*---------------------------------------------*/
2050 static
2051 __inline__
2052 void fallbackSimpleSort ( UInt32* fmap,
2053 UInt32* eclass,
2054 Int32 lo,
2055 Int32 hi )
2057 Int32 i, j, tmp;
2058 UInt32 ec_tmp;
2060 if (lo == hi) return;
2062 if (hi - lo > 3) {
2063 for ( i = hi-4; i >= lo; i-- ) {
2064 tmp = fmap[i];
2065 ec_tmp = eclass[tmp];
2066 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
2067 fmap[j-4] = fmap[j];
2068 fmap[j-4] = tmp;
2072 for ( i = hi-1; i >= lo; i-- ) {
2073 tmp = fmap[i];
2074 ec_tmp = eclass[tmp];
2075 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
2076 fmap[j-1] = fmap[j];
2077 fmap[j-1] = tmp;
2082 /*---------------------------------------------*/
2083 #define fswap(zz1, zz2) \
2084 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
2086 #define fvswap(zzp1, zzp2, zzn) \
2088 Int32 yyp1 = (zzp1); \
2089 Int32 yyp2 = (zzp2); \
2090 Int32 yyn = (zzn); \
2091 while (yyn > 0) { \
2092 fswap(fmap[yyp1], fmap[yyp2]); \
2093 yyp1++; yyp2++; yyn--; \
2098 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
2100 #define fpush(lz,hz) { stackLo[sp] = lz; \
2101 stackHi[sp] = hz; \
2102 sp++; }
2104 #define fpop(lz,hz) { sp--; \
2105 lz = stackLo[sp]; \
2106 hz = stackHi[sp]; }
2108 #define FALLBACK_QSORT_SMALL_THRESH 10
2109 #define FALLBACK_QSORT_STACK_SIZE 100
2112 static
2113 void fallbackQSort3 ( UInt32* fmap,
2114 UInt32* eclass,
2115 Int32 loSt,
2116 Int32 hiSt )
2118 Int32 unLo, unHi, ltLo, gtHi, n, m;
2119 Int32 sp, lo, hi;
2120 UInt32 med, r, r3;
2121 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
2122 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
2124 r = 0;
2126 sp = 0;
2127 fpush ( loSt, hiSt );
2129 while (sp > 0) {
2131 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
2133 fpop ( lo, hi );
2134 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
2135 fallbackSimpleSort ( fmap, eclass, lo, hi );
2136 continue;
2139 /* Random partitioning. Median of 3 sometimes fails to
2140 avoid bad cases. Median of 9 seems to help but
2141 looks rather expensive. This too seems to work but
2142 is cheaper. Guidance for the magic constants
2143 7621 and 32768 is taken from Sedgewick's algorithms
2144 book, chapter 35.
2146 r = ((r * 7621) + 1) % 32768;
2147 r3 = r % 3;
2148 if (r3 == 0) med = eclass[fmap[lo]]; else
2149 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
2150 med = eclass[fmap[hi]];
2152 unLo = ltLo = lo;
2153 unHi = gtHi = hi;
2155 while (1) {
2156 while (1) {
2157 if (unLo > unHi) break;
2158 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
2159 if (n == 0) {
2160 fswap(fmap[unLo], fmap[ltLo]);
2161 ltLo++; unLo++;
2162 continue;
2164 if (n > 0) break;
2165 unLo++;
2167 while (1) {
2168 if (unLo > unHi) break;
2169 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
2170 if (n == 0) {
2171 fswap(fmap[unHi], fmap[gtHi]);
2172 gtHi--; unHi--;
2173 continue;
2175 if (n < 0) break;
2176 unHi--;
2178 if (unLo > unHi) break;
2179 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
2182 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
2184 if (gtHi < ltLo) continue;
2186 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
2187 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
2189 n = lo + unLo - ltLo - 1;
2190 m = hi - (gtHi - unHi) + 1;
2192 if (n - lo > hi - m) {
2193 fpush ( lo, n );
2194 fpush ( m, hi );
2195 } else {
2196 fpush ( m, hi );
2197 fpush ( lo, n );
2202 #undef fmin
2203 #undef fpush
2204 #undef fpop
2205 #undef fswap
2206 #undef fvswap
2207 #undef FALLBACK_QSORT_SMALL_THRESH
2208 #undef FALLBACK_QSORT_STACK_SIZE
2211 /*---------------------------------------------*/
2212 /* Pre:
2213 nblock > 0
2214 eclass exists for [0 .. nblock-1]
2215 ((UChar*)eclass) [0 .. nblock-1] holds block
2216 ptr exists for [0 .. nblock-1]
2218 Post:
2219 ((UChar*)eclass) [0 .. nblock-1] holds block
2220 All other areas of eclass destroyed
2221 fmap [0 .. nblock-1] holds sorted order
2222 bhtab [ 0 .. 2+(nblock/32) ] destroyed
2225 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
2226 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
2227 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
2228 #define WORD_BH(zz) bhtab[(zz) >> 5]
2229 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
2231 static
2232 void fallbackSort ( UInt32* fmap,
2233 UInt32* eclass,
2234 UInt32* bhtab,
2235 Int32 nblock,
2236 Int32 verb )
2238 Int32 ftab[257];
2239 Int32 ftabCopy[256];
2240 Int32 H, i, j, k, l, r, cc, cc1;
2241 Int32 nNotDone;
2242 Int32 nBhtab;
2243 UChar* eclass8 = (UChar*)eclass;
2245 /*--
2246 Initial 1-char radix sort to generate
2247 initial fmap and initial BH bits.
2248 --*/
2249 if (verb >= 4)
2250 VPrintf0 ( " bucket sorting ...\n" );
2251 for (i = 0; i < 257; i++) ftab[i] = 0;
2252 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
2253 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
2254 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
2256 for (i = 0; i < nblock; i++) {
2257 j = eclass8[i];
2258 k = ftab[j] - 1;
2259 ftab[j] = k;
2260 fmap[k] = i;
2263 nBhtab = 2 + (nblock / 32);
2264 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
2265 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
2267 /*--
2268 Inductively refine the buckets. Kind-of an
2269 "exponential radix sort" (!), inspired by the
2270 Manber-Myers suffix array construction algorithm.
2271 --*/
2273 /*-- set sentinel bits for block-end detection --*/
2274 for (i = 0; i < 32; i++) {
2275 SET_BH(nblock + 2*i);
2276 CLEAR_BH(nblock + 2*i + 1);
2279 /*-- the log(N) loop --*/
2280 H = 1;
2281 while (1) {
2283 if (verb >= 4)
2284 VPrintf1 ( " depth %6d has ", H );
2286 j = 0;
2287 for (i = 0; i < nblock; i++) {
2288 if (ISSET_BH(i)) j = i;
2289 k = fmap[i] - H; if (k < 0) k += nblock;
2290 eclass[k] = j;
2293 nNotDone = 0;
2294 r = -1;
2295 while (1) {
2297 /*-- find the next non-singleton bucket --*/
2298 k = r + 1;
2299 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
2300 if (ISSET_BH(k)) {
2301 while (WORD_BH(k) == 0xffffffff) k += 32;
2302 while (ISSET_BH(k)) k++;
2304 l = k - 1;
2305 if (l >= nblock) break;
2306 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
2307 if (!ISSET_BH(k)) {
2308 while (WORD_BH(k) == 0x00000000) k += 32;
2309 while (!ISSET_BH(k)) k++;
2311 r = k - 1;
2312 if (r >= nblock) break;
2314 /*-- now [l, r] bracket current bucket --*/
2315 if (r > l) {
2316 nNotDone += (r - l + 1);
2317 fallbackQSort3 ( fmap, eclass, l, r );
2319 /*-- scan bucket and generate header bits-- */
2320 cc = -1;
2321 for (i = l; i <= r; i++) {
2322 cc1 = eclass[fmap[i]];
2323 if (cc != cc1) { SET_BH(i); cc = cc1; };
2328 if (verb >= 4)
2329 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
2331 H *= 2;
2332 if (H > nblock || nNotDone == 0) break;
2335 /*--
2336 Reconstruct the original block in
2337 eclass8 [0 .. nblock-1], since the
2338 previous phase destroyed it.
2339 --*/
2340 if (verb >= 4)
2341 VPrintf0 ( " reconstructing block ...\n" );
2342 j = 0;
2343 for (i = 0; i < nblock; i++) {
2344 while (ftabCopy[j] == 0) j++;
2345 ftabCopy[j]--;
2346 eclass8[fmap[i]] = (UChar)j;
2348 AssertH ( j < 256, 1005 );
2351 #undef SET_BH
2352 #undef CLEAR_BH
2353 #undef ISSET_BH
2354 #undef WORD_BH
2355 #undef UNALIGNED_BH
2358 /*---------------------------------------------*/
2359 /*--- The main, O(N^2 log(N)) sorting ---*/
2360 /*--- algorithm. Faster for "normal" ---*/
2361 /*--- non-repetitive blocks. ---*/
2362 /*---------------------------------------------*/
2364 /*---------------------------------------------*/
2365 static
2366 __inline__
2367 Bool mainGtU ( UInt32 i1,
2368 UInt32 i2,
2369 UChar* block,
2370 UInt16* quadrant,
2371 UInt32 nblock,
2372 Int32* budget )
2374 Int32 k;
2375 UChar c1, c2;
2376 UInt16 s1, s2;
2378 AssertD ( i1 != i2, "mainGtU" );
2379 /* 1 */
2380 c1 = block[i1]; c2 = block[i2];
2381 if (c1 != c2) return (c1 > c2);
2382 i1++; i2++;
2383 /* 2 */
2384 c1 = block[i1]; c2 = block[i2];
2385 if (c1 != c2) return (c1 > c2);
2386 i1++; i2++;
2387 /* 3 */
2388 c1 = block[i1]; c2 = block[i2];
2389 if (c1 != c2) return (c1 > c2);
2390 i1++; i2++;
2391 /* 4 */
2392 c1 = block[i1]; c2 = block[i2];
2393 if (c1 != c2) return (c1 > c2);
2394 i1++; i2++;
2395 /* 5 */
2396 c1 = block[i1]; c2 = block[i2];
2397 if (c1 != c2) return (c1 > c2);
2398 i1++; i2++;
2399 /* 6 */
2400 c1 = block[i1]; c2 = block[i2];
2401 if (c1 != c2) return (c1 > c2);
2402 i1++; i2++;
2403 /* 7 */
2404 c1 = block[i1]; c2 = block[i2];
2405 if (c1 != c2) return (c1 > c2);
2406 i1++; i2++;
2407 /* 8 */
2408 c1 = block[i1]; c2 = block[i2];
2409 if (c1 != c2) return (c1 > c2);
2410 i1++; i2++;
2411 /* 9 */
2412 c1 = block[i1]; c2 = block[i2];
2413 if (c1 != c2) return (c1 > c2);
2414 i1++; i2++;
2415 /* 10 */
2416 c1 = block[i1]; c2 = block[i2];
2417 if (c1 != c2) return (c1 > c2);
2418 i1++; i2++;
2419 /* 11 */
2420 c1 = block[i1]; c2 = block[i2];
2421 if (c1 != c2) return (c1 > c2);
2422 i1++; i2++;
2423 /* 12 */
2424 c1 = block[i1]; c2 = block[i2];
2425 if (c1 != c2) return (c1 > c2);
2426 i1++; i2++;
2428 k = nblock + 8;
2430 do {
2431 /* 1 */
2432 c1 = block[i1]; c2 = block[i2];
2433 if (c1 != c2) return (c1 > c2);
2434 s1 = quadrant[i1]; s2 = quadrant[i2];
2435 if (s1 != s2) return (s1 > s2);
2436 i1++; i2++;
2437 /* 2 */
2438 c1 = block[i1]; c2 = block[i2];
2439 if (c1 != c2) return (c1 > c2);
2440 s1 = quadrant[i1]; s2 = quadrant[i2];
2441 if (s1 != s2) return (s1 > s2);
2442 i1++; i2++;
2443 /* 3 */
2444 c1 = block[i1]; c2 = block[i2];
2445 if (c1 != c2) return (c1 > c2);
2446 s1 = quadrant[i1]; s2 = quadrant[i2];
2447 if (s1 != s2) return (s1 > s2);
2448 i1++; i2++;
2449 /* 4 */
2450 c1 = block[i1]; c2 = block[i2];
2451 if (c1 != c2) return (c1 > c2);
2452 s1 = quadrant[i1]; s2 = quadrant[i2];
2453 if (s1 != s2) return (s1 > s2);
2454 i1++; i2++;
2455 /* 5 */
2456 c1 = block[i1]; c2 = block[i2];
2457 if (c1 != c2) return (c1 > c2);
2458 s1 = quadrant[i1]; s2 = quadrant[i2];
2459 if (s1 != s2) return (s1 > s2);
2460 i1++; i2++;
2461 /* 6 */
2462 c1 = block[i1]; c2 = block[i2];
2463 if (c1 != c2) return (c1 > c2);
2464 s1 = quadrant[i1]; s2 = quadrant[i2];
2465 if (s1 != s2) return (s1 > s2);
2466 i1++; i2++;
2467 /* 7 */
2468 c1 = block[i1]; c2 = block[i2];
2469 if (c1 != c2) return (c1 > c2);
2470 s1 = quadrant[i1]; s2 = quadrant[i2];
2471 if (s1 != s2) return (s1 > s2);
2472 i1++; i2++;
2473 /* 8 */
2474 c1 = block[i1]; c2 = block[i2];
2475 if (c1 != c2) return (c1 > c2);
2476 s1 = quadrant[i1]; s2 = quadrant[i2];
2477 if (s1 != s2) return (s1 > s2);
2478 i1++; i2++;
2480 if (i1 >= nblock) i1 -= nblock;
2481 if (i2 >= nblock) i2 -= nblock;
2483 k -= 8;
2484 (*budget)--;
2486 while (k >= 0);
2488 return False;
2492 /*---------------------------------------------*/
2493 /*--
2494 Knuth's increments seem to work better
2495 than Incerpi-Sedgewick here. Possibly
2496 because the number of elems to sort is
2497 usually small, typically <= 20.
2498 --*/
2499 static
2500 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
2501 9841, 29524, 88573, 265720,
2502 797161, 2391484 };
2504 static
2505 void mainSimpleSort ( UInt32* ptr,
2506 UChar* block,
2507 UInt16* quadrant,
2508 Int32 nblock,
2509 Int32 lo,
2510 Int32 hi,
2511 Int32 d,
2512 Int32* budget )
2514 Int32 i, j, h, bigN, hp;
2515 UInt32 v;
2517 bigN = hi - lo + 1;
2518 if (bigN < 2) return;
2520 hp = 0;
2521 while (incs[hp] < bigN) hp++;
2522 hp--;
2524 for (; hp >= 0; hp--) {
2525 h = incs[hp];
2527 i = lo + h;
2528 while (True) {
2530 /*-- copy 1 --*/
2531 if (i > hi) break;
2532 v = ptr[i];
2533 j = i;
2534 while ( mainGtU (
2535 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
2536 ) ) {
2537 ptr[j] = ptr[j-h];
2538 j = j - h;
2539 if (j <= (lo + h - 1)) break;
2541 ptr[j] = v;
2542 i++;
2544 /*-- copy 2 --*/
2545 if (i > hi) break;
2546 v = ptr[i];
2547 j = i;
2548 while ( mainGtU (
2549 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
2550 ) ) {
2551 ptr[j] = ptr[j-h];
2552 j = j - h;
2553 if (j <= (lo + h - 1)) break;
2555 ptr[j] = v;
2556 i++;
2558 /*-- copy 3 --*/
2559 if (i > hi) break;
2560 v = ptr[i];
2561 j = i;
2562 while ( mainGtU (
2563 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
2564 ) ) {
2565 ptr[j] = ptr[j-h];
2566 j = j - h;
2567 if (j <= (lo + h - 1)) break;
2569 ptr[j] = v;
2570 i++;
2572 if (*budget < 0) return;
2578 /*---------------------------------------------*/
2579 /*--
2580 The following is an implementation of
2581 an elegant 3-way quicksort for strings,
2582 described in a paper "Fast Algorithms for
2583 Sorting and Searching Strings", by Robert
2584 Sedgewick and Jon L. Bentley.
2585 --*/
2587 #define mswap(zz1, zz2) \
2588 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
2590 #define mvswap(zzp1, zzp2, zzn) \
2592 Int32 yyp1 = (zzp1); \
2593 Int32 yyp2 = (zzp2); \
2594 Int32 yyn = (zzn); \
2595 while (yyn > 0) { \
2596 mswap(ptr[yyp1], ptr[yyp2]); \
2597 yyp1++; yyp2++; yyn--; \
2601 static
2602 __inline__
2603 UChar mmed3 ( UChar a, UChar b, UChar c )
2605 UChar t;
2606 if (a > b) { t = a; a = b; b = t; };
2607 if (b > c) {
2608 b = c;
2609 if (a > b) b = a;
2611 return b;
2614 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
2616 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
2617 stackHi[sp] = hz; \
2618 stackD [sp] = dz; \
2619 sp++; }
2621 #define mpop(lz,hz,dz) { sp--; \
2622 lz = stackLo[sp]; \
2623 hz = stackHi[sp]; \
2624 dz = stackD [sp]; }
2627 #define mnextsize(az) (nextHi[az]-nextLo[az])
2629 #define mnextswap(az,bz) \
2630 { Int32 tz; \
2631 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
2632 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
2633 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
2636 #define MAIN_QSORT_SMALL_THRESH 20
2637 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
2638 #define MAIN_QSORT_STACK_SIZE 100
2640 static
2641 void mainQSort3 ( UInt32* ptr,
2642 UChar* block,
2643 UInt16* quadrant,
2644 Int32 nblock,
2645 Int32 loSt,
2646 Int32 hiSt,
2647 Int32 dSt,
2648 Int32* budget )
2650 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
2651 Int32 sp, lo, hi, d;
2653 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
2654 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
2655 Int32 stackD [MAIN_QSORT_STACK_SIZE];
2657 Int32 nextLo[3];
2658 Int32 nextHi[3];
2659 Int32 nextD [3];
2661 sp = 0;
2662 mpush ( loSt, hiSt, dSt );
2664 while (sp > 0) {
2666 AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
2668 mpop ( lo, hi, d );
2669 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
2670 d > MAIN_QSORT_DEPTH_THRESH) {
2671 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
2672 if (*budget < 0) return;
2673 continue;
2676 med = (Int32)
2677 mmed3 ( block[ptr[ lo ]+d],
2678 block[ptr[ hi ]+d],
2679 block[ptr[ (lo+hi)>>1 ]+d] );
2681 unLo = ltLo = lo;
2682 unHi = gtHi = hi;
2684 while (True) {
2685 while (True) {
2686 if (unLo > unHi) break;
2687 n = ((Int32)block[ptr[unLo]+d]) - med;
2688 if (n == 0) {
2689 mswap(ptr[unLo], ptr[ltLo]);
2690 ltLo++; unLo++; continue;
2692 if (n > 0) break;
2693 unLo++;
2695 while (True) {
2696 if (unLo > unHi) break;
2697 n = ((Int32)block[ptr[unHi]+d]) - med;
2698 if (n == 0) {
2699 mswap(ptr[unHi], ptr[gtHi]);
2700 gtHi--; unHi--; continue;
2702 if (n < 0) break;
2703 unHi--;
2705 if (unLo > unHi) break;
2706 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
2709 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
2711 if (gtHi < ltLo) {
2712 mpush(lo, hi, d+1 );
2713 continue;
2716 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
2717 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
2719 n = lo + unLo - ltLo - 1;
2720 m = hi - (gtHi - unHi) + 1;
2722 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
2723 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
2724 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
2726 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
2727 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
2728 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
2730 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
2731 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
2733 mpush (nextLo[0], nextHi[0], nextD[0]);
2734 mpush (nextLo[1], nextHi[1], nextD[1]);
2735 mpush (nextLo[2], nextHi[2], nextD[2]);
2739 #undef mswap
2740 #undef mvswap
2741 #undef mpush
2742 #undef mpop
2743 #undef mmin
2744 #undef mnextsize
2745 #undef mnextswap
2746 #undef MAIN_QSORT_SMALL_THRESH
2747 #undef MAIN_QSORT_DEPTH_THRESH
2748 #undef MAIN_QSORT_STACK_SIZE
2751 /*---------------------------------------------*/
2752 /* Pre:
2753 nblock > N_OVERSHOOT
2754 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
2755 ((UChar*)block32) [0 .. nblock-1] holds block
2756 ptr exists for [0 .. nblock-1]
2758 Post:
2759 ((UChar*)block32) [0 .. nblock-1] holds block
2760 All other areas of block32 destroyed
2761 ftab [0 .. 65536 ] destroyed
2762 ptr [0 .. nblock-1] holds sorted order
2763 if (*budget < 0), sorting was abandoned
2766 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
2767 #define SETMASK (1 << 21)
2768 #define CLEARMASK (~(SETMASK))
2770 static
2771 void mainSort ( UInt32* ptr,
2772 UChar* block,
2773 UInt16* quadrant,
2774 UInt32* ftab,
2775 Int32 nblock,
2776 Int32 verb,
2777 Int32* budget )
2779 Int32 i, j, k, ss, sb;
2780 Int32 runningOrder[256];
2781 Bool bigDone[256];
2782 Int32 copyStart[256];
2783 Int32 copyEnd [256];
2784 UChar c1;
2785 Int32 numQSorted;
2786 UInt16 s;
2787 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
2789 /*-- set up the 2-byte frequency table --*/
2790 for (i = 65536; i >= 0; i--) ftab[i] = 0;
2792 j = block[0] << 8;
2793 i = nblock-1;
2794 for (; i >= 3; i -= 4) {
2795 quadrant[i] = 0;
2796 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
2797 ftab[j]++;
2798 quadrant[i-1] = 0;
2799 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
2800 ftab[j]++;
2801 quadrant[i-2] = 0;
2802 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
2803 ftab[j]++;
2804 quadrant[i-3] = 0;
2805 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
2806 ftab[j]++;
2808 for (; i >= 0; i--) {
2809 quadrant[i] = 0;
2810 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
2811 ftab[j]++;
2814 /*-- (emphasises close relationship of block & quadrant) --*/
2815 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
2816 block [nblock+i] = block[i];
2817 quadrant[nblock+i] = 0;
2820 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
2822 /*-- Complete the initial radix sort --*/
2823 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
2825 s = block[0] << 8;
2826 i = nblock-1;
2827 for (; i >= 3; i -= 4) {
2828 s = (s >> 8) | (block[i] << 8);
2829 j = ftab[s] -1;
2830 ftab[s] = j;
2831 ptr[j] = i;
2832 s = (s >> 8) | (block[i-1] << 8);
2833 j = ftab[s] -1;
2834 ftab[s] = j;
2835 ptr[j] = i-1;
2836 s = (s >> 8) | (block[i-2] << 8);
2837 j = ftab[s] -1;
2838 ftab[s] = j;
2839 ptr[j] = i-2;
2840 s = (s >> 8) | (block[i-3] << 8);
2841 j = ftab[s] -1;
2842 ftab[s] = j;
2843 ptr[j] = i-3;
2845 for (; i >= 0; i--) {
2846 s = (s >> 8) | (block[i] << 8);
2847 j = ftab[s] -1;
2848 ftab[s] = j;
2849 ptr[j] = i;
2852 /*--
2853 Now ftab contains the first loc of every small bucket.
2854 Calculate the running order, from smallest to largest
2855 big bucket.
2856 --*/
2857 for (i = 0; i <= 255; i++) {
2858 bigDone [i] = False;
2859 runningOrder[i] = i;
2863 Int32 vv;
2864 Int32 h = 1;
2865 do h = 3 * h + 1; while (h <= 256);
2866 do {
2867 h = h / 3;
2868 for (i = h; i <= 255; i++) {
2869 vv = runningOrder[i];
2870 j = i;
2871 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
2872 runningOrder[j] = runningOrder[j-h];
2873 j = j - h;
2874 if (j <= (h - 1)) goto zero;
2876 zero:
2877 runningOrder[j] = vv;
2879 } while (h != 1);
2882 /*--
2883 The main sorting loop.
2884 --*/
2886 numQSorted = 0;
2888 for (i = 0; i <= 255; i++) {
2890 /*--
2891 Process big buckets, starting with the least full.
2892 Basically this is a 3-step process in which we call
2893 mainQSort3 to sort the small buckets [ss, j], but
2894 also make a big effort to avoid the calls if we can.
2895 --*/
2896 ss = runningOrder[i];
2898 /*--
2899 Step 1:
2900 Complete the big bucket [ss] by quicksorting
2901 any unsorted small buckets [ss, j], for j != ss.
2902 Hopefully previous pointer-scanning phases have already
2903 completed many of the small buckets [ss, j], so
2904 we don't have to sort them at all.
2905 --*/
2906 for (j = 0; j <= 255; j++) {
2907 if (j != ss) {
2908 sb = (ss << 8) + j;
2909 if ( ! (ftab[sb] & SETMASK) ) {
2910 Int32 lo = ftab[sb] & CLEARMASK;
2911 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
2912 if (hi > lo) {
2913 if (verb >= 4)
2914 VPrintf4 ( " qsort [0x%x, 0x%x] "
2915 "done %d this %d\n",
2916 ss, j, numQSorted, hi - lo + 1 );
2917 mainQSort3 (
2918 ptr, block, quadrant, nblock,
2919 lo, hi, BZ_N_RADIX, budget
2921 numQSorted += (hi - lo + 1);
2922 if (*budget < 0) return;
2925 ftab[sb] |= SETMASK;
2929 AssertH ( !bigDone[ss], 1006 );
2931 /*--
2932 Step 2:
2933 Now scan this big bucket [ss] so as to synthesise the
2934 sorted order for small buckets [t, ss] for all t,
2935 including, magically, the bucket [ss,ss] too.
2936 This will avoid doing Real Work in subsequent Step 1's.
2937 --*/
2939 for (j = 0; j <= 255; j++) {
2940 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
2941 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
2943 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
2944 k = ptr[j]-1; if (k < 0) k += nblock;
2945 c1 = block[k];
2946 if (!bigDone[c1])
2947 ptr[ copyStart[c1]++ ] = k;
2949 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
2950 k = ptr[j]-1; if (k < 0) k += nblock;
2951 c1 = block[k];
2952 if (!bigDone[c1])
2953 ptr[ copyEnd[c1]-- ] = k;
2957 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
2959 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
2960 Necessity for this case is demonstrated by compressing
2961 a sequence of approximately 48.5 million of character
2962 251; 1.0.0/1.0.1 will then die here. */
2963 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
2964 1007 )
2966 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
2968 /*--
2969 Step 3:
2970 The [ss] big bucket is now done. Record this fact,
2971 and update the quadrant descriptors. Remember to
2972 update quadrants in the overshoot area too, if
2973 necessary. The "if (i < 255)" test merely skips
2974 this updating for the last bucket processed, since
2975 updating for the last bucket is pointless.
2977 The quadrant array provides a way to incrementally
2978 cache sort orderings, as they appear, so as to
2979 make subsequent comparisons in fullGtU() complete
2980 faster. For repetitive blocks this makes a big
2981 difference (but not big enough to be able to avoid
2982 the fallback sorting mechanism, exponential radix sort).
2984 The precise meaning is: at all times:
2986 for 0 <= i < nblock and 0 <= j <= nblock
2988 if block[i] != block[j],
2990 then the relative values of quadrant[i] and
2991 quadrant[j] are meaningless.
2993 else {
2994 if quadrant[i] < quadrant[j]
2995 then the string starting at i lexicographically
2996 precedes the string starting at j
2998 else if quadrant[i] > quadrant[j]
2999 then the string starting at j lexicographically
3000 precedes the string starting at i
3002 else
3003 the relative ordering of the strings starting
3004 at i and j has not yet been determined.
3006 --*/
3007 bigDone[ss] = True;
3009 if (i < 255) {
3010 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
3011 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
3012 Int32 shifts = 0;
3014 while ((bbSize >> shifts) > 65534) shifts++;
3016 for (j = bbSize-1; j >= 0; j--) {
3017 Int32 a2update = ptr[bbStart + j];
3018 UInt16 qVal = (UInt16)(j >> shifts);
3019 quadrant[a2update] = qVal;
3020 if (a2update < BZ_N_OVERSHOOT)
3021 quadrant[a2update + nblock] = qVal;
3023 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
3028 if (verb >= 4)
3029 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
3030 nblock, numQSorted, nblock - numQSorted );
3033 #undef BIGFREQ
3034 #undef SETMASK
3035 #undef CLEARMASK
3038 /*---------------------------------------------*/
3039 /* Pre:
3040 nblock > 0
3041 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
3042 ((UChar*)arr2) [0 .. nblock-1] holds block
3043 arr1 exists for [0 .. nblock-1]
3045 Post:
3046 ((UChar*)arr2) [0 .. nblock-1] holds block
3047 All other areas of block destroyed
3048 ftab [ 0 .. 65536 ] destroyed
3049 arr1 [0 .. nblock-1] holds sorted order
3051 void BZ2_blockSort ( EState* s )
3053 UInt32* ptr = s->ptr;
3054 UChar* block = s->block;
3055 UInt32* ftab = s->ftab;
3056 Int32 nblock = s->nblock;
3057 Int32 verb = s->verbosity;
3058 Int32 wfact = s->workFactor;
3059 UInt16* quadrant;
3060 Int32 budget;
3061 Int32 budgetInit;
3062 Int32 i;
3064 if (nblock < /* 10000 */1000 ) {
3065 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
3066 } else {
3067 /* Calculate the location for quadrant, remembering to get
3068 the alignment right. Assumes that &(block[0]) is at least
3069 2-byte aligned -- this should be ok since block is really
3070 the first section of arr2.
3072 i = nblock+BZ_N_OVERSHOOT;
3073 if (i & 1) i++;
3074 quadrant = (UInt16*)(&(block[i]));
3076 /* (wfact-1) / 3 puts the default-factor-30
3077 transition point at very roughly the same place as
3078 with v0.1 and v0.9.0.
3079 Not that it particularly matters any more, since the
3080 resulting compressed stream is now the same regardless
3081 of whether or not we use the main sort or fallback sort.
3083 if (wfact < 1 ) wfact = 1;
3084 if (wfact > 100) wfact = 100;
3085 budgetInit = nblock * ((wfact-1) / 3);
3086 budget = budgetInit;
3088 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
3089 if (0 && verb >= 3)
3090 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
3091 budgetInit - budget,
3092 nblock,
3093 (float)(budgetInit - budget) /
3094 (float)(nblock==0 ? 1 : nblock) );
3095 if (budget < 0) {
3096 if (verb >= 2)
3097 VPrintf0 ( " too repetitive; using fallback"
3098 " sorting algorithm\n" );
3099 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
3103 s->origPtr = -1;
3104 for (i = 0; i < s->nblock; i++)
3105 if (ptr[i] == 0)
3106 { s->origPtr = i; break; };
3108 AssertH( s->origPtr != -1, 1003 );
3112 /*-------------------------------------------------------------*/
3113 /*--- end blocksort.c ---*/
3114 /*-------------------------------------------------------------*/
3116 /*-------------------------------------------------------------*/
3117 /*--- Huffman coding low-level stuff ---*/
3118 /*--- huffman.c ---*/
3119 /*-------------------------------------------------------------*/
3121 /*--
3122 This file is a part of bzip2 and/or libbzip2, a program and
3123 library for lossless, block-sorting data compression.
3125 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
3127 Redistribution and use in source and binary forms, with or without
3128 modification, are permitted provided that the following conditions
3129 are met:
3131 1. Redistributions of source code must retain the above copyright
3132 notice, this list of conditions and the following disclaimer.
3134 2. The origin of this software must not be misrepresented; you must
3135 not claim that you wrote the original software. If you use this
3136 software in a product, an acknowledgment in the product
3137 documentation would be appreciated but is not required.
3139 3. Altered source versions must be plainly marked as such, and must
3140 not be misrepresented as being the original software.
3142 4. The name of the author may not be used to endorse or promote
3143 products derived from this software without specific prior written
3144 permission.
3146 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
3147 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
3148 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3149 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
3150 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3151 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
3152 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
3153 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
3154 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
3155 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
3156 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3158 Julian Seward, Cambridge, UK.
3159 jseward@bzip.org
3160 bzip2/libbzip2 version 1.0 of 21 March 2000
3162 This program is based on (at least) the work of:
3163 Mike Burrows
3164 David Wheeler
3165 Peter Fenwick
3166 Alistair Moffat
3167 Radford Neal
3168 Ian H. Witten
3169 Robert Sedgewick
3170 Jon L. Bentley
3172 For more information on these sources, see the manual.
3173 --*/
3177 /*---------------------------------------------------*/
3178 #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
3179 #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
3180 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
3182 #define ADDWEIGHTS(zw1,zw2) \
3183 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
3184 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
3186 #define UPHEAP(z) \
3188 Int32 zz, tmp; \
3189 zz = z; tmp = heap[zz]; \
3190 while (weight[tmp] < weight[heap[zz >> 1]]) { \
3191 heap[zz] = heap[zz >> 1]; \
3192 zz >>= 1; \
3194 heap[zz] = tmp; \
3197 #define DOWNHEAP(z) \
3199 Int32 zz, yy, tmp; \
3200 zz = z; tmp = heap[zz]; \
3201 while (True) { \
3202 yy = zz << 1; \
3203 if (yy > nHeap) break; \
3204 if (yy < nHeap && \
3205 weight[heap[yy+1]] < weight[heap[yy]]) \
3206 yy++; \
3207 if (weight[tmp] < weight[heap[yy]]) break; \
3208 heap[zz] = heap[yy]; \
3209 zz = yy; \
3211 heap[zz] = tmp; \
3215 /*---------------------------------------------------*/
3216 void BZ2_hbMakeCodeLengths ( UChar *len,
3217 Int32 *freq,
3218 Int32 alphaSize,
3219 Int32 maxLen )
3221 /*--
3222 Nodes and heap entries run from 1. Entry 0
3223 for both the heap and nodes is a sentinel.
3224 --*/
3225 Int32 nNodes, nHeap, n1, n2, i, j, k;
3226 Bool tooLong;
3228 Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
3229 Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
3230 Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
3232 for (i = 0; i < alphaSize; i++)
3233 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
3235 while (True) {
3237 nNodes = alphaSize;
3238 nHeap = 0;
3240 heap[0] = 0;
3241 weight[0] = 0;
3242 parent[0] = -2;
3244 for (i = 1; i <= alphaSize; i++) {
3245 parent[i] = -1;
3246 nHeap++;
3247 heap[nHeap] = i;
3248 UPHEAP(nHeap);
3251 AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
3253 while (nHeap > 1) {
3254 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
3255 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
3256 nNodes++;
3257 parent[n1] = parent[n2] = nNodes;
3258 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
3259 parent[nNodes] = -1;
3260 nHeap++;
3261 heap[nHeap] = nNodes;
3262 UPHEAP(nHeap);
3265 AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
3267 tooLong = False;
3268 for (i = 1; i <= alphaSize; i++) {
3269 j = 0;
3270 k = i;
3271 while (parent[k] >= 0) { k = parent[k]; j++; }
3272 len[i-1] = j;
3273 if (j > maxLen) tooLong = True;
3276 if (! tooLong) break;
3278 /* 17 Oct 04: keep-going condition for the following loop used
3279 to be 'i < alphaSize', which missed the last element,
3280 theoretically leading to the possibility of the compressor
3281 looping. However, this count-scaling step is only needed if
3282 one of the generated Huffman code words is longer than
3283 maxLen, which up to and including version 1.0.2 was 20 bits,
3284 which is extremely unlikely. In version 1.0.3 maxLen was
3285 changed to 17 bits, which has minimal effect on compression
3286 ratio, but does mean this scaling step is used from time to
3287 time, enough to verify that it works.
3289 This means that bzip2-1.0.3 and later will only produce
3290 Huffman codes with a maximum length of 17 bits. However, in
3291 order to preserve backwards compatibility with bitstreams
3292 produced by versions pre-1.0.3, the decompressor must still
3293 handle lengths of up to 20. */
3295 for (i = 1; i <= alphaSize; i++) {
3296 j = weight[i] >> 8;
3297 j = 1 + (j / 2);
3298 weight[i] = j << 8;
3304 /*---------------------------------------------------*/
3305 void BZ2_hbAssignCodes ( Int32 *code,
3306 UChar *length,
3307 Int32 minLen,
3308 Int32 maxLen,
3309 Int32 alphaSize )
3311 Int32 n, vec, i;
3313 vec = 0;
3314 for (n = minLen; n <= maxLen; n++) {
3315 for (i = 0; i < alphaSize; i++)
3316 if (length[i] == n) { code[i] = vec; vec++; };
3317 vec <<= 1;
3322 /*---------------------------------------------------*/
3323 void BZ2_hbCreateDecodeTables ( Int32 *limit,
3324 Int32 *base,
3325 Int32 *perm,
3326 UChar *length,
3327 Int32 minLen,
3328 Int32 maxLen,
3329 Int32 alphaSize )
3331 Int32 pp, i, j, vec;
3333 pp = 0;
3334 for (i = minLen; i <= maxLen; i++)
3335 for (j = 0; j < alphaSize; j++)
3336 if (length[j] == i) { perm[pp] = j; pp++; };
3338 for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
3339 for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
3341 for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
3343 for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
3344 vec = 0;
3346 for (i = minLen; i <= maxLen; i++) {
3347 vec += (base[i+1] - base[i]);
3348 limit[i] = vec-1;
3349 vec <<= 1;
3351 for (i = minLen + 1; i <= maxLen; i++)
3352 base[i] = ((limit[i-1] + 1) << 1) - base[i];
3356 /*-------------------------------------------------------------*/
3357 /*--- end huffman.c ---*/
3358 /*-------------------------------------------------------------*/
3360 /*-------------------------------------------------------------*/
3361 /*--- Compression machinery (not incl block sorting) ---*/
3362 /*--- compress.c ---*/
3363 /*-------------------------------------------------------------*/
3365 /*--
3366 This file is a part of bzip2 and/or libbzip2, a program and
3367 library for lossless, block-sorting data compression.
3369 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
3371 Redistribution and use in source and binary forms, with or without
3372 modification, are permitted provided that the following conditions
3373 are met:
3375 1. Redistributions of source code must retain the above copyright
3376 notice, this list of conditions and the following disclaimer.
3378 2. The origin of this software must not be misrepresented; you must
3379 not claim that you wrote the original software. If you use this
3380 software in a product, an acknowledgment in the product
3381 documentation would be appreciated but is not required.
3383 3. Altered source versions must be plainly marked as such, and must
3384 not be misrepresented as being the original software.
3386 4. The name of the author may not be used to endorse or promote
3387 products derived from this software without specific prior written
3388 permission.
3390 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
3391 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
3392 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3393 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
3394 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3395 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
3396 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
3397 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
3398 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
3399 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
3400 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3402 Julian Seward, Cambridge, UK.
3403 jseward@bzip.org
3404 bzip2/libbzip2 version 1.0 of 21 March 2000
3406 This program is based on (at least) the work of:
3407 Mike Burrows
3408 David Wheeler
3409 Peter Fenwick
3410 Alistair Moffat
3411 Radford Neal
3412 Ian H. Witten
3413 Robert Sedgewick
3414 Jon L. Bentley
3416 For more information on these sources, see the manual.
3417 --*/
3419 /*--
3420 CHANGES
3421 ~~~~~~~
3422 0.9.0 -- original version.
3424 0.9.0a/b -- no changes in this file.
3426 0.9.0c
3427 * changed setting of nGroups in sendMTFValues() so as to
3428 do a bit better on small files
3429 --*/
3433 /*---------------------------------------------------*/
3434 /*--- Bit stream I/O ---*/
3435 /*---------------------------------------------------*/
3437 /*---------------------------------------------------*/
3438 void BZ2_bsInitWrite ( EState* s )
3440 s->bsLive = 0;
3441 s->bsBuff = 0;
3445 /*---------------------------------------------------*/
3446 static
3447 void bsFinishWrite ( EState* s )
3449 while (s->bsLive > 0) {
3450 s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
3451 s->numZ++;
3452 s->bsBuff <<= 8;
3453 s->bsLive -= 8;
3458 /*---------------------------------------------------*/
3459 #define bsNEEDW(nz) \
3461 while (s->bsLive >= 8) { \
3462 s->zbits[s->numZ] \
3463 = (UChar)(s->bsBuff >> 24); \
3464 s->numZ++; \
3465 s->bsBuff <<= 8; \
3466 s->bsLive -= 8; \
3471 /*---------------------------------------------------*/
3472 static
3473 __inline__
3474 void bsW ( EState* s, Int32 n, UInt32 v )
3476 bsNEEDW ( n );
3477 s->bsBuff |= (v << (32 - s->bsLive - n));
3478 s->bsLive += n;
3482 /*---------------------------------------------------*/
3483 static
3484 void bsPutUInt32 ( EState* s, UInt32 u )
3486 bsW ( s, 8, (u >> 24) & 0xffL );
3487 bsW ( s, 8, (u >> 16) & 0xffL );
3488 bsW ( s, 8, (u >> 8) & 0xffL );
3489 bsW ( s, 8, u & 0xffL );
3493 /*---------------------------------------------------*/
3494 static
3495 void bsPutUChar ( EState* s, UChar c )
3497 bsW( s, 8, (UInt32)c );
3501 /*---------------------------------------------------*/
3502 /*--- The back end proper ---*/
3503 /*---------------------------------------------------*/
3505 /*---------------------------------------------------*/
3506 static
3507 void makeMaps_e ( EState* s )
3509 Int32 i;
3510 s->nInUse = 0;
3511 for (i = 0; i < 256; i++)
3512 if (s->inUse[i]) {
3513 s->unseqToSeq[i] = s->nInUse;
3514 s->nInUse++;
3519 /*---------------------------------------------------*/
3520 static
3521 void generateMTFValues ( EState* s )
3523 UChar yy[256];
3524 Int32 i, j;
3525 Int32 zPend;
3526 Int32 wr;
3527 Int32 EOB;
3530 After sorting (eg, here),
3531 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
3533 ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
3534 holds the original block data.
3536 The first thing to do is generate the MTF values,
3537 and put them in
3538 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
3539 Because there are strictly fewer or equal MTF values
3540 than block values, ptr values in this area are overwritten
3541 with MTF values only when they are no longer needed.
3543 The final compressed bitstream is generated into the
3544 area starting at
3545 (UChar*) (&((UChar*)s->arr2)[s->nblock])
3547 These storage aliases are set up in bzCompressInit(),
3548 except for the last one, which is arranged in
3549 compressBlock().
3551 UInt32* ptr = s->ptr;
3552 UChar* block = s->block;
3553 UInt16* mtfv = s->mtfv;
3555 makeMaps_e ( s );
3556 EOB = s->nInUse+1;
3558 for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
3560 wr = 0;
3561 zPend = 0;
3562 for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
3564 for (i = 0; i < s->nblock; i++) {
3565 UChar ll_i;
3566 AssertD ( wr <= i, "generateMTFValues(1)" );
3567 j = ptr[i]-1; if (j < 0) j += s->nblock;
3568 ll_i = s->unseqToSeq[block[j]];
3569 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
3571 if (yy[0] == ll_i) {
3572 zPend++;
3573 } else {
3575 if (zPend > 0) {
3576 zPend--;
3577 while (True) {
3578 if (zPend & 1) {
3579 mtfv[wr] = BZ_RUNB; wr++;
3580 s->mtfFreq[BZ_RUNB]++;
3581 } else {
3582 mtfv[wr] = BZ_RUNA; wr++;
3583 s->mtfFreq[BZ_RUNA]++;
3585 if (zPend < 2) break;
3586 zPend = (zPend - 2) / 2;
3588 zPend = 0;
3591 register UChar rtmp;
3592 register UChar* ryy_j;
3593 register UChar rll_i;
3594 rtmp = yy[1];
3595 yy[1] = yy[0];
3596 ryy_j = &(yy[1]);
3597 rll_i = ll_i;
3598 while ( rll_i != rtmp ) {
3599 register UChar rtmp2;
3600 ryy_j++;
3601 rtmp2 = rtmp;
3602 rtmp = *ryy_j;
3603 *ryy_j = rtmp2;
3605 yy[0] = rtmp;
3606 j = ryy_j - &(yy[0]);
3607 mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
3613 if (zPend > 0) {
3614 zPend--;
3615 while (True) {
3616 if (zPend & 1) {
3617 mtfv[wr] = BZ_RUNB; wr++;
3618 s->mtfFreq[BZ_RUNB]++;
3619 } else {
3620 mtfv[wr] = BZ_RUNA; wr++;
3621 s->mtfFreq[BZ_RUNA]++;
3623 if (zPend < 2) break;
3624 zPend = (zPend - 2) / 2;
3626 zPend = 0;
3629 mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
3631 s->nMTF = wr;
3635 /*---------------------------------------------------*/
3636 #define BZ_LESSER_ICOST 0
3637 #define BZ_GREATER_ICOST 15
3639 static
3640 void sendMTFValues ( EState* s )
3642 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
3643 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
3644 Int32 nGroups, nBytes;
3646 /*--
3647 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
3648 is a global since the decoder also needs it.
3650 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
3651 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
3652 are also globals only used in this proc.
3653 Made global to keep stack frame size small.
3654 --*/
3657 UInt16 cost[BZ_N_GROUPS];
3658 Int32 fave[BZ_N_GROUPS];
3660 UInt16* mtfv = s->mtfv;
3662 if (s->verbosity >= 3)
3663 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
3664 "%d+2 syms in use\n",
3665 s->nblock, s->nMTF, s->nInUse );
3667 alphaSize = s->nInUse+2;
3668 for (t = 0; t < BZ_N_GROUPS; t++)
3669 for (v = 0; v < alphaSize; v++)
3670 s->len[t][v] = BZ_GREATER_ICOST;
3672 /*--- Decide how many coding tables to use ---*/
3673 AssertH ( s->nMTF > 0, 3001 );
3674 if (s->nMTF < 200) nGroups = 2; else
3675 if (s->nMTF < 600) nGroups = 3; else
3676 if (s->nMTF < 1200) nGroups = 4; else
3677 if (s->nMTF < 2400) nGroups = 5; else
3678 nGroups = 6;
3680 /*--- Generate an initial set of coding tables ---*/
3682 Int32 nPart, remF, tFreq, aFreq;
3684 nPart = nGroups;
3685 remF = s->nMTF;
3686 gs = 0;
3687 while (nPart > 0) {
3688 tFreq = remF / nPart;
3689 ge = gs-1;
3690 aFreq = 0;
3691 while (aFreq < tFreq && ge < alphaSize-1) {
3692 ge++;
3693 aFreq += s->mtfFreq[ge];
3696 if (ge > gs
3697 && nPart != nGroups && nPart != 1
3698 && ((nGroups-nPart) % 2 == 1)) {
3699 aFreq -= s->mtfFreq[ge];
3700 ge--;
3703 if (0 && s->verbosity >= 3)
3704 VPrintf5( " initial group %d, [%d .. %d], "
3705 "has %d syms (%4.1f%%)\n",
3706 nPart, gs, ge, aFreq,
3707 (100.0 * (float)aFreq) / (float)(s->nMTF) );
3709 for (v = 0; v < alphaSize; v++)
3710 if (v >= gs && v <= ge)
3711 s->len[nPart-1][v] = BZ_LESSER_ICOST; else
3712 s->len[nPart-1][v] = BZ_GREATER_ICOST;
3714 nPart--;
3715 gs = ge+1;
3716 remF -= aFreq;
3720 /*---
3721 Iterate up to BZ_N_ITERS times to improve the tables.
3722 ---*/
3723 for (iter = 0; iter < BZ_N_ITERS; iter++) {
3725 for (t = 0; t < nGroups; t++) fave[t] = 0;
3727 for (t = 0; t < nGroups; t++)
3728 for (v = 0; v < alphaSize; v++)
3729 s->rfreq[t][v] = 0;
3731 /*---
3732 Set up an auxiliary length table which is used to fast-track
3733 the common case (nGroups == 6).
3734 ---*/
3735 if (nGroups == 6) {
3736 for (v = 0; v < alphaSize; v++) {
3737 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
3738 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
3739 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
3743 nSelectors = 0;
3744 totc = 0;
3745 gs = 0;
3746 while (True) {
3748 /*--- Set group start & end marks. --*/
3749 if (gs >= s->nMTF) break;
3750 ge = gs + BZ_G_SIZE - 1;
3751 if (ge >= s->nMTF) ge = s->nMTF-1;
3753 /*--
3754 Calculate the cost of this group as coded
3755 by each of the coding tables.
3756 --*/
3757 for (t = 0; t < nGroups; t++) cost[t] = 0;
3759 if (nGroups == 6 && 50 == ge-gs+1) {
3760 /*--- fast track the common case ---*/
3761 register UInt32 cost01, cost23, cost45;
3762 register UInt16 icv;
3763 cost01 = cost23 = cost45 = 0;
3765 # define BZ_ITER(nn) \
3766 icv = mtfv[gs+(nn)]; \
3767 cost01 += s->len_pack[icv][0]; \
3768 cost23 += s->len_pack[icv][1]; \
3769 cost45 += s->len_pack[icv][2]; \
3771 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
3772 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
3773 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
3774 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
3775 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
3776 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
3777 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
3778 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
3779 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
3780 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
3782 # undef BZ_ITER
3784 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
3785 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
3786 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
3788 } else {
3789 /*--- slow version which correctly handles all situations ---*/
3790 for (i = gs; i <= ge; i++) {
3791 UInt16 icv = mtfv[i];
3792 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
3796 /*--
3797 Find the coding table which is best for this group,
3798 and record its identity in the selector table.
3799 --*/
3800 bc = 999999999; bt = -1;
3801 for (t = 0; t < nGroups; t++)
3802 if (cost[t] < bc) { bc = cost[t]; bt = t; };
3803 totc += bc;
3804 fave[bt]++;
3805 s->selector[nSelectors] = bt;
3806 nSelectors++;
3808 /*--
3809 Increment the symbol frequencies for the selected table.
3810 --*/
3811 if (nGroups == 6 && 50 == ge-gs+1) {
3812 /*--- fast track the common case ---*/
3814 # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
3816 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
3817 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
3818 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
3819 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
3820 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
3821 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
3822 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
3823 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
3824 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
3825 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
3827 # undef BZ_ITUR
3829 } else {
3830 /*--- slow version which correctly handles all situations ---*/
3831 for (i = gs; i <= ge; i++)
3832 s->rfreq[bt][ mtfv[i] ]++;
3835 gs = ge+1;
3837 if (s->verbosity >= 3) {
3838 VPrintf2 ( " pass %d: size is %d, grp uses are ",
3839 iter+1, totc/8 );
3840 for (t = 0; t < nGroups; t++)
3841 VPrintf1 ( "%d ", fave[t] );
3842 VPrintf0 ( "\n" );
3845 /*--
3846 Recompute the tables based on the accumulated frequencies.
3847 --*/
3848 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
3849 comment in huffman.c for details. */
3850 for (t = 0; t < nGroups; t++)
3851 BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
3852 alphaSize, 17 /*20*/ );
3856 AssertH( nGroups < 8, 3002 );
3857 AssertH( nSelectors < 32768 &&
3858 nSelectors <= (2 + (900000 / BZ_G_SIZE)),
3859 3003 );
3862 /*--- Compute MTF values for the selectors. ---*/
3864 UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
3865 for (i = 0; i < nGroups; i++) pos[i] = i;
3866 for (i = 0; i < nSelectors; i++) {
3867 ll_i = s->selector[i];
3868 j = 0;
3869 tmp = pos[j];
3870 while ( ll_i != tmp ) {
3871 j++;
3872 tmp2 = tmp;
3873 tmp = pos[j];
3874 pos[j] = tmp2;
3876 pos[0] = tmp;
3877 s->selectorMtf[i] = j;
3881 /*--- Assign actual codes for the tables. --*/
3882 for (t = 0; t < nGroups; t++) {
3883 minLen = 32;
3884 maxLen = 0;
3885 for (i = 0; i < alphaSize; i++) {
3886 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
3887 if (s->len[t][i] < minLen) minLen = s->len[t][i];
3889 AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
3890 AssertH ( !(minLen < 1), 3005 );
3891 BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
3892 minLen, maxLen, alphaSize );
3895 /*--- Transmit the mapping table. ---*/
3897 Bool inUse16[16];
3898 for (i = 0; i < 16; i++) {
3899 inUse16[i] = False;
3900 for (j = 0; j < 16; j++)
3901 if (s->inUse[i * 16 + j]) inUse16[i] = True;
3904 nBytes = s->numZ;
3905 for (i = 0; i < 16; i++)
3906 if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
3908 for (i = 0; i < 16; i++)
3909 if (inUse16[i])
3910 for (j = 0; j < 16; j++) {
3911 if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
3914 if (s->verbosity >= 3)
3915 VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
3918 /*--- Now the selectors. ---*/
3919 nBytes = s->numZ;
3920 bsW ( s, 3, nGroups );
3921 bsW ( s, 15, nSelectors );
3922 for (i = 0; i < nSelectors; i++) {
3923 for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
3924 bsW(s,1,0);
3926 if (s->verbosity >= 3)
3927 VPrintf1( "selectors %d, ", s->numZ-nBytes );
3929 /*--- Now the coding tables. ---*/
3930 nBytes = s->numZ;
3932 for (t = 0; t < nGroups; t++) {
3933 Int32 curr = s->len[t][0];
3934 bsW ( s, 5, curr );
3935 for (i = 0; i < alphaSize; i++) {
3936 while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
3937 while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
3938 bsW ( s, 1, 0 );
3942 if (s->verbosity >= 3)
3943 VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
3945 /*--- And finally, the block data proper ---*/
3946 nBytes = s->numZ;
3947 selCtr = 0;
3948 gs = 0;
3949 while (True) {
3950 if (gs >= s->nMTF) break;
3951 ge = gs + BZ_G_SIZE - 1;
3952 if (ge >= s->nMTF) ge = s->nMTF-1;
3953 AssertH ( s->selector[selCtr] < nGroups, 3006 );
3955 if (nGroups == 6 && 50 == ge-gs+1) {
3956 /*--- fast track the common case ---*/
3957 UInt16 mtfv_i;
3958 UChar* s_len_sel_selCtr
3959 = &(s->len[s->selector[selCtr]][0]);
3960 Int32* s_code_sel_selCtr
3961 = &(s->code[s->selector[selCtr]][0]);
3963 # define BZ_ITAH(nn) \
3964 mtfv_i = mtfv[gs+(nn)]; \
3965 bsW ( s, \
3966 s_len_sel_selCtr[mtfv_i], \
3967 s_code_sel_selCtr[mtfv_i] )
3969 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
3970 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
3971 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
3972 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
3973 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
3974 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
3975 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
3976 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
3977 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
3978 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
3980 # undef BZ_ITAH
3982 } else {
3983 /*--- slow version which correctly handles all situations ---*/
3984 for (i = gs; i <= ge; i++) {
3985 bsW ( s,
3986 s->len [s->selector[selCtr]] [mtfv[i]],
3987 s->code [s->selector[selCtr]] [mtfv[i]] );
3992 gs = ge+1;
3993 selCtr++;
3995 AssertH( selCtr == nSelectors, 3007 );
3997 if (s->verbosity >= 3)
3998 VPrintf1( "codes %d\n", s->numZ-nBytes );
4002 /*---------------------------------------------------*/
4003 void BZ2_compressBlock ( EState* s, Bool is_last_block )
4005 if (s->nblock > 0) {
4007 BZ_FINALISE_CRC ( s->blockCRC );
4008 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
4009 s->combinedCRC ^= s->blockCRC;
4010 if (s->blockNo > 1) s->numZ = 0;
4012 if (s->verbosity >= 2)
4013 VPrintf4( " block %d: crc = 0x%08x, "
4014 "combined CRC = 0x%08x, size = %d\n",
4015 s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
4017 BZ2_blockSort ( s );
4020 s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
4022 /*-- If this is the first block, create the stream header. --*/
4023 if (s->blockNo == 1) {
4024 BZ2_bsInitWrite ( s );
4025 bsPutUChar ( s, BZ_HDR_B );
4026 bsPutUChar ( s, BZ_HDR_Z );
4027 bsPutUChar ( s, BZ_HDR_h );
4028 bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
4031 if (s->nblock > 0) {
4033 bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
4034 bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
4035 bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
4037 /*-- Now the block's CRC, so it is in a known place. --*/
4038 bsPutUInt32 ( s, s->blockCRC );
4040 /*--
4041 Now a single bit indicating (non-)randomisation.
4042 As of version 0.9.5, we use a better sorting algorithm
4043 which makes randomisation unnecessary. So always set
4044 the randomised bit to 'no'. Of course, the decoder
4045 still needs to be able to handle randomised blocks
4046 so as to maintain backwards compatibility with
4047 older versions of bzip2.
4048 --*/
4049 bsW(s,1,0);
4051 bsW ( s, 24, s->origPtr );
4052 generateMTFValues ( s );
4053 sendMTFValues ( s );
4057 /*-- If this is the last block, add the stream trailer. --*/
4058 if (is_last_block) {
4060 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
4061 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
4062 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
4063 bsPutUInt32 ( s, s->combinedCRC );
4064 if (s->verbosity >= 2)
4065 VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC );
4066 bsFinishWrite ( s );
4071 /*-------------------------------------------------------------*/
4072 /*--- end compress.c ---*/
4073 /*-------------------------------------------------------------*/
4076 /*-------------------------------------------------------------*/
4077 /*--- Table for randomising repetitive blocks ---*/
4078 /*--- randtable.c ---*/
4079 /*-------------------------------------------------------------*/
4081 /*--
4082 This file is a part of bzip2 and/or libbzip2, a program and
4083 library for lossless, block-sorting data compression.
4085 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
4087 Redistribution and use in source and binary forms, with or without
4088 modification, are permitted provided that the following conditions
4089 are met:
4091 1. Redistributions of source code must retain the above copyright
4092 notice, this list of conditions and the following disclaimer.
4094 2. The origin of this software must not be misrepresented; you must
4095 not claim that you wrote the original software. If you use this
4096 software in a product, an acknowledgment in the product
4097 documentation would be appreciated but is not required.
4099 3. Altered source versions must be plainly marked as such, and must
4100 not be misrepresented as being the original software.
4102 4. The name of the author may not be used to endorse or promote
4103 products derived from this software without specific prior written
4104 permission.
4106 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
4107 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
4108 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4109 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
4110 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4111 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
4112 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
4113 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
4114 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
4115 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
4116 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4118 Julian Seward, Cambridge, UK.
4119 jseward@bzip.org
4120 bzip2/libbzip2 version 1.0 of 21 March 2000
4122 This program is based on (at least) the work of:
4123 Mike Burrows
4124 David Wheeler
4125 Peter Fenwick
4126 Alistair Moffat
4127 Radford Neal
4128 Ian H. Witten
4129 Robert Sedgewick
4130 Jon L. Bentley
4132 For more information on these sources, see the manual.
4133 --*/
4138 /*---------------------------------------------*/
4139 Int32 BZ2_rNums[512] = {
4140 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
4141 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
4142 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
4143 419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
4144 878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
4145 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
4146 150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
4147 170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
4148 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
4149 909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
4150 641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
4151 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
4152 382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
4153 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
4154 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
4155 469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
4156 184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
4157 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
4158 951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
4159 652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
4160 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
4161 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
4162 653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
4163 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
4164 170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
4165 857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
4166 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
4167 944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
4168 344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
4169 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
4170 433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
4171 686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
4172 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
4173 978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
4174 680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
4175 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
4176 297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
4177 134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
4178 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
4179 140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
4180 170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
4181 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
4182 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
4183 896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
4184 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
4185 768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
4186 61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
4187 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
4188 780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
4189 920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
4190 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
4191 936, 638
4195 /*-------------------------------------------------------------*/
4196 /*--- end randtable.c ---*/
4197 /*-------------------------------------------------------------*/
4199 /*-------------------------------------------------------------*/
4200 /*--- Table for doing CRCs ---*/
4201 /*--- crctable.c ---*/
4202 /*-------------------------------------------------------------*/
4204 /*--
4205 This file is a part of bzip2 and/or libbzip2, a program and
4206 library for lossless, block-sorting data compression.
4208 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
4210 Redistribution and use in source and binary forms, with or without
4211 modification, are permitted provided that the following conditions
4212 are met:
4214 1. Redistributions of source code must retain the above copyright
4215 notice, this list of conditions and the following disclaimer.
4217 2. The origin of this software must not be misrepresented; you must
4218 not claim that you wrote the original software. If you use this
4219 software in a product, an acknowledgment in the product
4220 documentation would be appreciated but is not required.
4222 3. Altered source versions must be plainly marked as such, and must
4223 not be misrepresented as being the original software.
4225 4. The name of the author may not be used to endorse or promote
4226 products derived from this software without specific prior written
4227 permission.
4229 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
4230 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
4231 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4232 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
4233 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4234 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
4235 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
4236 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
4237 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
4238 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
4239 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4241 Julian Seward, Cambridge, UK.
4242 jseward@bzip.org
4243 bzip2/libbzip2 version 1.0 of 21 March 2000
4245 This program is based on (at least) the work of:
4246 Mike Burrows
4247 David Wheeler
4248 Peter Fenwick
4249 Alistair Moffat
4250 Radford Neal
4251 Ian H. Witten
4252 Robert Sedgewick
4253 Jon L. Bentley
4255 For more information on these sources, see the manual.
4256 --*/
4262 /*--
4263 I think this is an implementation of the AUTODIN-II,
4264 Ethernet & FDDI 32-bit CRC standard. Vaguely derived
4265 from code by Rob Warnock, in Section 51 of the
4266 comp.compression FAQ.
4267 --*/
4269 UInt32 BZ2_crc32Table[256] = {
4271 /*-- Ugly, innit? --*/
4273 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
4274 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
4275 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
4276 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
4277 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
4278 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
4279 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
4280 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
4281 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
4282 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
4283 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
4284 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
4285 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
4286 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
4287 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
4288 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
4289 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
4290 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
4291 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
4292 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
4293 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
4294 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
4295 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
4296 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
4297 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
4298 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
4299 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
4300 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
4301 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
4302 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
4303 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
4304 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
4305 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
4306 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
4307 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
4308 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
4309 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
4310 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
4311 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
4312 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
4313 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
4314 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
4315 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
4316 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
4317 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
4318 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
4319 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
4320 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
4321 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
4322 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
4323 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
4324 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
4325 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
4326 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
4327 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
4328 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
4329 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
4330 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
4331 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
4332 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
4333 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
4334 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
4335 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
4336 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
4340 /*-------------------------------------------------------------*/
4341 /*--- end crctable.c ---*/
4342 /*-------------------------------------------------------------*/
4344 /*-------------------------------------------------------------*/
4345 /*--- Library top-level functions. ---*/
4346 /*--- bzlib.c ---*/
4347 /*-------------------------------------------------------------*/
4349 /*--
4350 This file is a part of bzip2 and/or libbzip2, a program and
4351 library for lossless, block-sorting data compression.
4353 Copyright (C) 1996-2004 Julian R Seward. All rights reserved.
4355 Redistribution and use in source and binary forms, with or without
4356 modification, are permitted provided that the following conditions
4357 are met:
4359 1. Redistributions of source code must retain the above copyright
4360 notice, this list of conditions and the following disclaimer.
4362 2. The origin of this software must not be misrepresented; you must
4363 not claim that you wrote the original software. If you use this
4364 software in a product, an acknowledgment in the product
4365 documentation would be appreciated but is not required.
4367 3. Altered source versions must be plainly marked as such, and must
4368 not be misrepresented as being the original software.
4370 4. The name of the author may not be used to endorse or promote
4371 products derived from this software without specific prior written
4372 permission.
4374 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
4375 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
4376 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4377 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
4378 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4379 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
4380 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
4381 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
4382 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
4383 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
4384 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4386 Julian Seward, Cambridge, UK.
4387 jseward@bzip.org
4388 bzip2/libbzip2 version 1.0 of 21 March 2000
4390 This program is based on (at least) the work of:
4391 Mike Burrows
4392 David Wheeler
4393 Peter Fenwick
4394 Alistair Moffat
4395 Radford Neal
4396 Ian H. Witten
4397 Robert Sedgewick
4398 Jon L. Bentley
4400 For more information on these sources, see the manual.
4401 --*/
4403 /*--
4404 CHANGES
4405 ~~~~~~~
4406 0.9.0 -- original version.
4408 0.9.0a/b -- no changes in this file.
4410 0.9.0c
4411 * made zero-length BZ_FLUSH work correctly in bzCompress().
4412 * fixed bzWrite/bzRead to ignore zero-length requests.
4413 * fixed bzread to correctly handle read requests after EOF.
4414 * wrong parameter order in call to bzDecompressInit in
4415 bzBuffToBuffDecompress. Fixed.
4416 --*/
4420 /*---------------------------------------------------*/
4421 /*--- Compression stuff ---*/
4422 /*---------------------------------------------------*/
4425 /*---------------------------------------------------*/
4426 void BZ2_bz__AssertH__fail ( int errcode )
4428 vexxx_printf("BZ2_bz__AssertH__fail(%d) called, exiting\n", errcode);
4429 (*serviceFn)(0,0);
4432 void bz_internal_error ( int errcode )
4434 vexxx_printf("bz_internal_error called, exiting\n", errcode);
4435 (*serviceFn)(0,0);
4438 /*---------------------------------------------------*/
4439 static
4440 int bz_config_ok ( void )
4442 if (sizeof(int) != 4) return 0;
4443 if (sizeof(short) != 2) return 0;
4444 if (sizeof(char) != 1) return 0;
4445 return 1;
4449 /*---------------------------------------------------*/
4450 static
4451 void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
4453 void* v = (void*) (*serviceFn)(2, items * size );
4454 return v;
4457 static
4458 void default_bzfree ( void* opaque, void* addr )
4460 if (addr != NULL) (*serviceFn)( 3, (HWord)addr );
4464 /*---------------------------------------------------*/
4465 static
4466 void prepare_new_block ( EState* s )
4468 Int32 i;
4469 s->nblock = 0;
4470 s->numZ = 0;
4471 s->state_out_pos = 0;
4472 BZ_INITIALISE_CRC ( s->blockCRC );
4473 for (i = 0; i < 256; i++) s->inUse[i] = False;
4474 s->blockNo++;
4478 /*---------------------------------------------------*/
4479 static
4480 void init_RL ( EState* s )
4482 s->state_in_ch = 256;
4483 s->state_in_len = 0;
4487 static
4488 Bool isempty_RL ( EState* s )
4490 if (s->state_in_ch < 256 && s->state_in_len > 0)
4491 return False; else
4492 return True;
4496 /*---------------------------------------------------*/
4497 int BZ_API(BZ2_bzCompressInit)
4498 ( bz_stream* strm,
4499 int blockSize100k,
4500 int verbosity,
4501 int workFactor )
4503 Int32 n;
4504 EState* s;
4506 if (!bz_config_ok()) return BZ_CONFIG_ERROR;
4508 if (strm == NULL ||
4509 blockSize100k < 1 || blockSize100k > 9 ||
4510 workFactor < 0 || workFactor > 250)
4511 return BZ_PARAM_ERROR;
4513 if (workFactor == 0) workFactor = 30;
4514 if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
4515 if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
4517 s = BZALLOC( sizeof(EState) );
4518 if (s == NULL) return BZ_MEM_ERROR;
4519 s->strm = strm;
4521 s->arr1 = NULL;
4522 s->arr2 = NULL;
4523 s->ftab = NULL;
4525 n = 100000 * blockSize100k;
4526 s->arr1 = BZALLOC( n * sizeof(UInt32) );
4527 s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
4528 s->ftab = BZALLOC( 65537 * sizeof(UInt32) );
4530 if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
4531 if (s->arr1 != NULL) BZFREE(s->arr1);
4532 if (s->arr2 != NULL) BZFREE(s->arr2);
4533 if (s->ftab != NULL) BZFREE(s->ftab);
4534 if (s != NULL) BZFREE(s);
4535 return BZ_MEM_ERROR;
4538 s->blockNo = 0;
4539 s->state = BZ_S_INPUT;
4540 s->mode = BZ_M_RUNNING;
4541 s->combinedCRC = 0;
4542 s->blockSize100k = blockSize100k;
4543 s->nblockMAX = 100000 * blockSize100k - 19;
4544 s->verbosity = verbosity;
4545 s->workFactor = workFactor;
4547 s->block = (UChar*)s->arr2;
4548 s->mtfv = (UInt16*)s->arr1;
4549 s->zbits = NULL;
4550 s->ptr = (UInt32*)s->arr1;
4552 strm->state = s;
4553 strm->total_in_lo32 = 0;
4554 strm->total_in_hi32 = 0;
4555 strm->total_out_lo32 = 0;
4556 strm->total_out_hi32 = 0;
4557 init_RL ( s );
4558 prepare_new_block ( s );
4559 return BZ_OK;
4563 /*---------------------------------------------------*/
4564 static
4565 void add_pair_to_block ( EState* s )
4567 Int32 i;
4568 UChar ch = (UChar)(s->state_in_ch);
4569 for (i = 0; i < s->state_in_len; i++) {
4570 BZ_UPDATE_CRC( s->blockCRC, ch );
4572 s->inUse[s->state_in_ch] = True;
4573 switch (s->state_in_len) {
4574 case 1:
4575 s->block[s->nblock] = (UChar)ch; s->nblock++;
4576 break;
4577 case 2:
4578 s->block[s->nblock] = (UChar)ch; s->nblock++;
4579 s->block[s->nblock] = (UChar)ch; s->nblock++;
4580 break;
4581 case 3:
4582 s->block[s->nblock] = (UChar)ch; s->nblock++;
4583 s->block[s->nblock] = (UChar)ch; s->nblock++;
4584 s->block[s->nblock] = (UChar)ch; s->nblock++;
4585 break;
4586 default:
4587 s->inUse[s->state_in_len-4] = True;
4588 s->block[s->nblock] = (UChar)ch; s->nblock++;
4589 s->block[s->nblock] = (UChar)ch; s->nblock++;
4590 s->block[s->nblock] = (UChar)ch; s->nblock++;
4591 s->block[s->nblock] = (UChar)ch; s->nblock++;
4592 s->block[s->nblock] = ((UChar)(s->state_in_len-4));
4593 s->nblock++;
4594 break;
4599 /*---------------------------------------------------*/
4600 static
4601 void flush_RL ( EState* s )
4603 if (s->state_in_ch < 256) add_pair_to_block ( s );
4604 init_RL ( s );
4608 /*---------------------------------------------------*/
4609 #define ADD_CHAR_TO_BLOCK(zs,zchh0) \
4611 UInt32 zchh = (UInt32)(zchh0); \
4612 /*-- fast track the common case --*/ \
4613 if (zchh != zs->state_in_ch && \
4614 zs->state_in_len == 1) { \
4615 UChar ch = (UChar)(zs->state_in_ch); \
4616 BZ_UPDATE_CRC( zs->blockCRC, ch ); \
4617 zs->inUse[zs->state_in_ch] = True; \
4618 zs->block[zs->nblock] = (UChar)ch; \
4619 zs->nblock++; \
4620 zs->state_in_ch = zchh; \
4622 else \
4623 /*-- general, uncommon cases --*/ \
4624 if (zchh != zs->state_in_ch || \
4625 zs->state_in_len == 255) { \
4626 if (zs->state_in_ch < 256) \
4627 add_pair_to_block ( zs ); \
4628 zs->state_in_ch = zchh; \
4629 zs->state_in_len = 1; \
4630 } else { \
4631 zs->state_in_len++; \
4636 /*---------------------------------------------------*/
4637 static
4638 Bool copy_input_until_stop ( EState* s )
4640 Bool progress_in = False;
4642 if (s->mode == BZ_M_RUNNING) {
4644 /*-- fast track the common case --*/
4645 while (True) {
4646 /*-- block full? --*/
4647 if (s->nblock >= s->nblockMAX) break;
4648 /*-- no input? --*/
4649 if (s->strm->avail_in == 0) break;
4650 progress_in = True;
4651 ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
4652 s->strm->next_in++;
4653 s->strm->avail_in--;
4654 s->strm->total_in_lo32++;
4655 if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
4658 } else {
4660 /*-- general, uncommon case --*/
4661 while (True) {
4662 /*-- block full? --*/
4663 if (s->nblock >= s->nblockMAX) break;
4664 /*-- no input? --*/
4665 if (s->strm->avail_in == 0) break;
4666 /*-- flush/finish end? --*/
4667 if (s->avail_in_expect == 0) break;
4668 progress_in = True;
4669 ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
4670 s->strm->next_in++;
4671 s->strm->avail_in--;
4672 s->strm->total_in_lo32++;
4673 if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
4674 s->avail_in_expect--;
4677 return progress_in;
4681 /*---------------------------------------------------*/
4682 static
4683 Bool copy_output_until_stop ( EState* s )
4685 Bool progress_out = False;
4687 while (True) {
4689 /*-- no output space? --*/
4690 if (s->strm->avail_out == 0) break;
4692 /*-- block done? --*/
4693 if (s->state_out_pos >= s->numZ) break;
4695 progress_out = True;
4696 *(s->strm->next_out) = s->zbits[s->state_out_pos];
4697 s->state_out_pos++;
4698 s->strm->avail_out--;
4699 s->strm->next_out++;
4700 s->strm->total_out_lo32++;
4701 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
4704 return progress_out;
4708 /*---------------------------------------------------*/
4709 static
4710 Bool handle_compress ( bz_stream* strm )
4712 Bool progress_in = False;
4713 Bool progress_out = False;
4714 EState* s = strm->state;
4716 while (True) {
4718 if (s->state == BZ_S_OUTPUT) {
4719 progress_out |= copy_output_until_stop ( s );
4720 if (s->state_out_pos < s->numZ) break;
4721 if (s->mode == BZ_M_FINISHING &&
4722 s->avail_in_expect == 0 &&
4723 isempty_RL(s)) break;
4724 prepare_new_block ( s );
4725 s->state = BZ_S_INPUT;
4726 if (s->mode == BZ_M_FLUSHING &&
4727 s->avail_in_expect == 0 &&
4728 isempty_RL(s)) break;
4731 if (s->state == BZ_S_INPUT) {
4732 progress_in |= copy_input_until_stop ( s );
4733 if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
4734 flush_RL ( s );
4735 BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
4736 s->state = BZ_S_OUTPUT;
4738 else
4739 if (s->nblock >= s->nblockMAX) {
4740 BZ2_compressBlock ( s, False );
4741 s->state = BZ_S_OUTPUT;
4743 else
4744 if (s->strm->avail_in == 0) {
4745 break;
4751 return progress_in || progress_out;
4755 /*---------------------------------------------------*/
4756 int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
4758 Bool progress;
4759 EState* s;
4760 if (strm == NULL) return BZ_PARAM_ERROR;
4761 s = strm->state;
4762 if (s == NULL) return BZ_PARAM_ERROR;
4763 if (s->strm != strm) return BZ_PARAM_ERROR;
4765 preswitch:
4766 switch (s->mode) {
4768 case BZ_M_IDLE:
4769 return BZ_SEQUENCE_ERROR;
4771 case BZ_M_RUNNING:
4772 if (action == BZ_RUN) {
4773 progress = handle_compress ( strm );
4774 return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;
4776 else
4777 if (action == BZ_FLUSH) {
4778 s->avail_in_expect = strm->avail_in;
4779 s->mode = BZ_M_FLUSHING;
4780 goto preswitch;
4782 else
4783 if (action == BZ_FINISH) {
4784 s->avail_in_expect = strm->avail_in;
4785 s->mode = BZ_M_FINISHING;
4786 goto preswitch;
4788 else
4789 return BZ_PARAM_ERROR;
4791 case BZ_M_FLUSHING:
4792 if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
4793 if (s->avail_in_expect != s->strm->avail_in)
4794 return BZ_SEQUENCE_ERROR;
4795 progress = handle_compress ( strm );
4796 if (s->avail_in_expect > 0 || !isempty_RL(s) ||
4797 s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
4798 s->mode = BZ_M_RUNNING;
4799 return BZ_RUN_OK;
4801 case BZ_M_FINISHING:
4802 if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
4803 if (s->avail_in_expect != s->strm->avail_in)
4804 return BZ_SEQUENCE_ERROR;
4805 progress = handle_compress ( strm );
4806 if (!progress) return BZ_SEQUENCE_ERROR;
4807 if (s->avail_in_expect > 0 || !isempty_RL(s) ||
4808 s->state_out_pos < s->numZ) return BZ_FINISH_OK;
4809 s->mode = BZ_M_IDLE;
4810 return BZ_STREAM_END;
4812 return BZ_OK; /*--not reached--*/
4816 /*---------------------------------------------------*/
4817 int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
4819 EState* s;
4820 if (strm == NULL) return BZ_PARAM_ERROR;
4821 s = strm->state;
4822 if (s == NULL) return BZ_PARAM_ERROR;
4823 if (s->strm != strm) return BZ_PARAM_ERROR;
4825 if (s->arr1 != NULL) BZFREE(s->arr1);
4826 if (s->arr2 != NULL) BZFREE(s->arr2);
4827 if (s->ftab != NULL) BZFREE(s->ftab);
4828 BZFREE(strm->state);
4830 strm->state = NULL;
4832 return BZ_OK;
4836 /*---------------------------------------------------*/
4837 /*--- Decompression stuff ---*/
4838 /*---------------------------------------------------*/
4840 /*---------------------------------------------------*/
4841 int BZ_API(BZ2_bzDecompressInit)
4842 ( bz_stream* strm,
4843 int verbosity,
4844 int small )
4846 DState* s;
4848 if (!bz_config_ok()) return BZ_CONFIG_ERROR;
4850 if (strm == NULL) return BZ_PARAM_ERROR;
4851 if (small != 0 && small != 1) return BZ_PARAM_ERROR;
4852 if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
4854 if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
4855 if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
4857 s = BZALLOC( sizeof(DState) );
4858 if (s == NULL) return BZ_MEM_ERROR;
4859 s->strm = strm;
4860 strm->state = s;
4861 s->state = BZ_X_MAGIC_1;
4862 s->bsLive = 0;
4863 s->bsBuff = 0;
4864 s->calculatedCombinedCRC = 0;
4865 strm->total_in_lo32 = 0;
4866 strm->total_in_hi32 = 0;
4867 strm->total_out_lo32 = 0;
4868 strm->total_out_hi32 = 0;
4869 s->smallDecompress = (Bool)small;
4870 s->ll4 = NULL;
4871 s->ll16 = NULL;
4872 s->tt = NULL;
4873 s->currBlockNo = 0;
4874 s->verbosity = verbosity;
4876 return BZ_OK;
4880 /*---------------------------------------------------*/
4881 /* Return True iff data corruption is discovered.
4882 Returns False if there is no problem.
4884 static
4885 Bool unRLE_obuf_to_output_FAST ( DState* s )
4887 UChar k1;
4889 if (s->blockRandomised) {
4891 while (True) {
4892 /* try to finish existing run */
4893 while (True) {
4894 if (s->strm->avail_out == 0) return False;
4895 if (s->state_out_len == 0) break;
4896 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
4897 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
4898 s->state_out_len--;
4899 s->strm->next_out++;
4900 s->strm->avail_out--;
4901 s->strm->total_out_lo32++;
4902 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
4905 /* can a new run be started? */
4906 if (s->nblock_used == s->save_nblock+1) return False;
4908 /* Only caused by corrupt data stream? */
4909 if (s->nblock_used > s->save_nblock+1)
4910 return True;
4912 s->state_out_len = 1;
4913 s->state_out_ch = s->k0;
4914 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
4915 k1 ^= BZ_RAND_MASK; s->nblock_used++;
4916 if (s->nblock_used == s->save_nblock+1) continue;
4917 if (k1 != s->k0) { s->k0 = k1; continue; };
4919 s->state_out_len = 2;
4920 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
4921 k1 ^= BZ_RAND_MASK; s->nblock_used++;
4922 if (s->nblock_used == s->save_nblock+1) continue;
4923 if (k1 != s->k0) { s->k0 = k1; continue; };
4925 s->state_out_len = 3;
4926 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
4927 k1 ^= BZ_RAND_MASK; s->nblock_used++;
4928 if (s->nblock_used == s->save_nblock+1) continue;
4929 if (k1 != s->k0) { s->k0 = k1; continue; };
4931 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
4932 k1 ^= BZ_RAND_MASK; s->nblock_used++;
4933 s->state_out_len = ((Int32)k1) + 4;
4934 BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK;
4935 s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
4938 } else {
4940 /* restore */
4941 UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC;
4942 UChar c_state_out_ch = s->state_out_ch;
4943 Int32 c_state_out_len = s->state_out_len;
4944 Int32 c_nblock_used = s->nblock_used;
4945 Int32 c_k0 = s->k0;
4946 UInt32* c_tt = s->tt;
4947 UInt32 c_tPos = s->tPos;
4948 char* cs_next_out = s->strm->next_out;
4949 unsigned int cs_avail_out = s->strm->avail_out;
4950 /* end restore */
4952 UInt32 avail_out_INIT = cs_avail_out;
4953 Int32 s_save_nblockPP = s->save_nblock+1;
4954 unsigned int total_out_lo32_old;
4956 while (True) {
4958 /* try to finish existing run */
4959 if (c_state_out_len > 0) {
4960 while (True) {
4961 if (cs_avail_out == 0) goto return_notr;
4962 if (c_state_out_len == 1) break;
4963 *( (UChar*)(cs_next_out) ) = c_state_out_ch;
4964 BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
4965 c_state_out_len--;
4966 cs_next_out++;
4967 cs_avail_out--;
4969 s_state_out_len_eq_one:
4971 if (cs_avail_out == 0) {
4972 c_state_out_len = 1; goto return_notr;
4974 *( (UChar*)(cs_next_out) ) = c_state_out_ch;
4975 BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
4976 cs_next_out++;
4977 cs_avail_out--;
4980 /* Only caused by corrupt data stream? */
4981 if (c_nblock_used > s_save_nblockPP)
4982 return True;
4984 /* can a new run be started? */
4985 if (c_nblock_used == s_save_nblockPP) {
4986 c_state_out_len = 0; goto return_notr;
4988 c_state_out_ch = c_k0;
4989 BZ_GET_FAST_C(k1); c_nblock_used++;
4990 if (k1 != c_k0) {
4991 c_k0 = k1; goto s_state_out_len_eq_one;
4993 if (c_nblock_used == s_save_nblockPP)
4994 goto s_state_out_len_eq_one;
4996 c_state_out_len = 2;
4997 BZ_GET_FAST_C(k1); c_nblock_used++;
4998 if (c_nblock_used == s_save_nblockPP) continue;
4999 if (k1 != c_k0) { c_k0 = k1; continue; };
5001 c_state_out_len = 3;
5002 BZ_GET_FAST_C(k1); c_nblock_used++;
5003 if (c_nblock_used == s_save_nblockPP) continue;
5004 if (k1 != c_k0) { c_k0 = k1; continue; };
5006 BZ_GET_FAST_C(k1); c_nblock_used++;
5007 c_state_out_len = ((Int32)k1) + 4;
5008 BZ_GET_FAST_C(c_k0); c_nblock_used++;
5011 return_notr:
5012 total_out_lo32_old = s->strm->total_out_lo32;
5013 s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
5014 if (s->strm->total_out_lo32 < total_out_lo32_old)
5015 s->strm->total_out_hi32++;
5017 /* save */
5018 s->calculatedBlockCRC = c_calculatedBlockCRC;
5019 s->state_out_ch = c_state_out_ch;
5020 s->state_out_len = c_state_out_len;
5021 s->nblock_used = c_nblock_used;
5022 s->k0 = c_k0;
5023 s->tt = c_tt;
5024 s->tPos = c_tPos;
5025 s->strm->next_out = cs_next_out;
5026 s->strm->avail_out = cs_avail_out;
5027 /* end save */
5029 return False;
5034 /*---------------------------------------------------*/
5035 /* Return True iff data corruption is discovered.
5036 Returns False if there is no problem.
5038 static
5039 Bool unRLE_obuf_to_output_SMALL ( DState* s )
5041 UChar k1;
5043 if (s->blockRandomised) {
5045 while (True) {
5046 /* try to finish existing run */
5047 while (True) {
5048 if (s->strm->avail_out == 0) return False;
5049 if (s->state_out_len == 0) break;
5050 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
5051 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
5052 s->state_out_len--;
5053 s->strm->next_out++;
5054 s->strm->avail_out--;
5055 s->strm->total_out_lo32++;
5056 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
5059 /* can a new run be started? */
5060 if (s->nblock_used == s->save_nblock+1) return False;
5062 /* Only caused by corrupt data stream? */
5063 if (s->nblock_used > s->save_nblock+1)
5064 return True;
5066 s->state_out_len = 1;
5067 s->state_out_ch = s->k0;
5068 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
5069 k1 ^= BZ_RAND_MASK; s->nblock_used++;
5070 if (s->nblock_used == s->save_nblock+1) continue;
5071 if (k1 != s->k0) { s->k0 = k1; continue; };
5073 s->state_out_len = 2;
5074 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
5075 k1 ^= BZ_RAND_MASK; s->nblock_used++;
5076 if (s->nblock_used == s->save_nblock+1) continue;
5077 if (k1 != s->k0) { s->k0 = k1; continue; };
5079 s->state_out_len = 3;
5080 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
5081 k1 ^= BZ_RAND_MASK; s->nblock_used++;
5082 if (s->nblock_used == s->save_nblock+1) continue;
5083 if (k1 != s->k0) { s->k0 = k1; continue; };
5085 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
5086 k1 ^= BZ_RAND_MASK; s->nblock_used++;
5087 s->state_out_len = ((Int32)k1) + 4;
5088 BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK;
5089 s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
5092 } else {
5094 while (True) {
5095 /* try to finish existing run */
5096 while (True) {
5097 if (s->strm->avail_out == 0) return False;
5098 if (s->state_out_len == 0) break;
5099 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
5100 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
5101 s->state_out_len--;
5102 s->strm->next_out++;
5103 s->strm->avail_out--;
5104 s->strm->total_out_lo32++;
5105 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
5108 /* can a new run be started? */
5109 if (s->nblock_used == s->save_nblock+1) return False;
5111 /* Only caused by corrupt data stream? */
5112 if (s->nblock_used > s->save_nblock+1)
5113 return True;
5115 s->state_out_len = 1;
5116 s->state_out_ch = s->k0;
5117 BZ_GET_SMALL(k1); s->nblock_used++;
5118 if (s->nblock_used == s->save_nblock+1) continue;
5119 if (k1 != s->k0) { s->k0 = k1; continue; };
5121 s->state_out_len = 2;
5122 BZ_GET_SMALL(k1); s->nblock_used++;
5123 if (s->nblock_used == s->save_nblock+1) continue;
5124 if (k1 != s->k0) { s->k0 = k1; continue; };
5126 s->state_out_len = 3;
5127 BZ_GET_SMALL(k1); s->nblock_used++;
5128 if (s->nblock_used == s->save_nblock+1) continue;
5129 if (k1 != s->k0) { s->k0 = k1; continue; };
5131 BZ_GET_SMALL(k1); s->nblock_used++;
5132 s->state_out_len = ((Int32)k1) + 4;
5133 BZ_GET_SMALL(s->k0); s->nblock_used++;
5140 /*---------------------------------------------------*/
5141 int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
5143 Bool corrupt;
5144 DState* s;
5145 if (strm == NULL) return BZ_PARAM_ERROR;
5146 s = strm->state;
5147 if (s == NULL) return BZ_PARAM_ERROR;
5148 if (s->strm != strm) return BZ_PARAM_ERROR;
5150 while (True) {
5151 if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
5152 if (s->state == BZ_X_OUTPUT) {
5153 if (s->smallDecompress)
5154 corrupt = unRLE_obuf_to_output_SMALL ( s ); else
5155 corrupt = unRLE_obuf_to_output_FAST ( s );
5156 if (corrupt) return BZ_DATA_ERROR;
5157 if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
5158 BZ_FINALISE_CRC ( s->calculatedBlockCRC );
5159 if (s->verbosity >= 3)
5160 VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC,
5161 s->calculatedBlockCRC );
5162 if (s->verbosity >= 2) VPrintf0 ( "]" );
5163 if (s->calculatedBlockCRC != s->storedBlockCRC)
5164 return BZ_DATA_ERROR;
5165 s->calculatedCombinedCRC
5166 = (s->calculatedCombinedCRC << 1) |
5167 (s->calculatedCombinedCRC >> 31);
5168 s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
5169 s->state = BZ_X_BLKHDR_1;
5170 } else {
5171 return BZ_OK;
5174 if (s->state >= BZ_X_MAGIC_1) {
5175 Int32 r = BZ2_decompress ( s );
5176 if (r == BZ_STREAM_END) {
5177 if (s->verbosity >= 3)
5178 VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x",
5179 s->storedCombinedCRC, s->calculatedCombinedCRC );
5180 if (s->calculatedCombinedCRC != s->storedCombinedCRC)
5181 return BZ_DATA_ERROR;
5182 return r;
5184 if (s->state != BZ_X_OUTPUT) return r;
5188 AssertH ( 0, 6001 );
5190 return 0; /*NOTREACHED*/
5194 /*---------------------------------------------------*/
5195 int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm )
5197 DState* s;
5198 if (strm == NULL) return BZ_PARAM_ERROR;
5199 s = strm->state;
5200 if (s == NULL) return BZ_PARAM_ERROR;
5201 if (s->strm != strm) return BZ_PARAM_ERROR;
5203 if (s->tt != NULL) BZFREE(s->tt);
5204 if (s->ll16 != NULL) BZFREE(s->ll16);
5205 if (s->ll4 != NULL) BZFREE(s->ll4);
5207 BZFREE(strm->state);
5208 strm->state = NULL;
5210 return BZ_OK;
5214 #ifndef BZ_NO_STDIO
5215 /*---------------------------------------------------*/
5216 /*--- File I/O stuff ---*/
5217 /*---------------------------------------------------*/
5219 #define BZ_SETERR(eee) \
5221 if (bzerror != NULL) *bzerror = eee; \
5222 if (bzf != NULL) bzf->lastErr = eee; \
5225 typedef
5226 struct {
5227 FILE* handle;
5228 Char buf[BZ_MAX_UNUSED];
5229 Int32 bufN;
5230 Bool writing;
5231 bz_stream strm;
5232 Int32 lastErr;
5233 Bool initialisedOk;
5235 bzFile;
5238 /*---------------------------------------------*/
5239 static Bool myfeof ( FILE* f )
5241 Int32 c = fgetc ( f );
5242 if (c == EOF) return True;
5243 ungetc ( c, f );
5244 return False;
5248 /*---------------------------------------------------*/
5249 BZFILE* BZ_API(BZ2_bzWriteOpen)
5250 ( int* bzerror,
5251 FILE* f,
5252 int blockSize100k,
5253 int verbosity,
5254 int workFactor )
5256 Int32 ret;
5257 bzFile* bzf = NULL;
5259 BZ_SETERR(BZ_OK);
5261 if (f == NULL ||
5262 (blockSize100k < 1 || blockSize100k > 9) ||
5263 (workFactor < 0 || workFactor > 250) ||
5264 (verbosity < 0 || verbosity > 4))
5265 { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
5267 if (ferror(f))
5268 { BZ_SETERR(BZ_IO_ERROR); return NULL; };
5270 bzf = malloc ( sizeof(bzFile) );
5271 if (bzf == NULL)
5272 { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
5274 BZ_SETERR(BZ_OK);
5275 bzf->initialisedOk = False;
5276 bzf->bufN = 0;
5277 bzf->handle = f;
5278 bzf->writing = True;
5279 bzf->strm.bzalloc = NULL;
5280 bzf->strm.bzfree = NULL;
5281 bzf->strm.opaque = NULL;
5283 if (workFactor == 0) workFactor = 30;
5284 ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k,
5285 verbosity, workFactor );
5286 if (ret != BZ_OK)
5287 { BZ_SETERR(ret); free(bzf); return NULL; };
5289 bzf->strm.avail_in = 0;
5290 bzf->initialisedOk = True;
5291 return bzf;
5296 /*---------------------------------------------------*/
5297 void BZ_API(BZ2_bzWrite)
5298 ( int* bzerror,
5299 BZFILE* b,
5300 void* buf,
5301 int len )
5303 Int32 n, n2, ret;
5304 bzFile* bzf = (bzFile*)b;
5306 BZ_SETERR(BZ_OK);
5307 if (bzf == NULL || buf == NULL || len < 0)
5308 { BZ_SETERR(BZ_PARAM_ERROR); return; };
5309 if (!(bzf->writing))
5310 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
5311 if (ferror(bzf->handle))
5312 { BZ_SETERR(BZ_IO_ERROR); return; };
5314 if (len == 0)
5315 { BZ_SETERR(BZ_OK); return; };
5317 bzf->strm.avail_in = len;
5318 bzf->strm.next_in = buf;
5320 while (True) {
5321 bzf->strm.avail_out = BZ_MAX_UNUSED;
5322 bzf->strm.next_out = bzf->buf;
5323 ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
5324 if (ret != BZ_RUN_OK)
5325 { BZ_SETERR(ret); return; };
5327 if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
5328 n = BZ_MAX_UNUSED - bzf->strm.avail_out;
5329 n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar),
5330 n, bzf->handle );
5331 if (n != n2 || ferror(bzf->handle))
5332 { BZ_SETERR(BZ_IO_ERROR); return; };
5335 if (bzf->strm.avail_in == 0)
5336 { BZ_SETERR(BZ_OK); return; };
5341 /*---------------------------------------------------*/
5342 void BZ_API(BZ2_bzWriteClose)
5343 ( int* bzerror,
5344 BZFILE* b,
5345 int abandon,
5346 unsigned int* nbytes_in,
5347 unsigned int* nbytes_out )
5349 BZ2_bzWriteClose64 ( bzerror, b, abandon,
5350 nbytes_in, NULL, nbytes_out, NULL );
5354 void BZ_API(BZ2_bzWriteClose64)
5355 ( int* bzerror,
5356 BZFILE* b,
5357 int abandon,
5358 unsigned int* nbytes_in_lo32,
5359 unsigned int* nbytes_in_hi32,
5360 unsigned int* nbytes_out_lo32,
5361 unsigned int* nbytes_out_hi32 )
5363 Int32 n, n2, ret;
5364 bzFile* bzf = (bzFile*)b;
5366 if (bzf == NULL)
5367 { BZ_SETERR(BZ_OK); return; };
5368 if (!(bzf->writing))
5369 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
5370 if (ferror(bzf->handle))
5371 { BZ_SETERR(BZ_IO_ERROR); return; };
5373 if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
5374 if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
5375 if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
5376 if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
5378 if ((!abandon) && bzf->lastErr == BZ_OK) {
5379 while (True) {
5380 bzf->strm.avail_out = BZ_MAX_UNUSED;
5381 bzf->strm.next_out = bzf->buf;
5382 ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
5383 if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
5384 { BZ_SETERR(ret); return; };
5386 if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
5387 n = BZ_MAX_UNUSED - bzf->strm.avail_out;
5388 n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar),
5389 n, bzf->handle );
5390 if (n != n2 || ferror(bzf->handle))
5391 { BZ_SETERR(BZ_IO_ERROR); return; };
5394 if (ret == BZ_STREAM_END) break;
5398 if ( !abandon && !ferror ( bzf->handle ) ) {
5399 fflush ( bzf->handle );
5400 if (ferror(bzf->handle))
5401 { BZ_SETERR(BZ_IO_ERROR); return; };
5404 if (nbytes_in_lo32 != NULL)
5405 *nbytes_in_lo32 = bzf->strm.total_in_lo32;
5406 if (nbytes_in_hi32 != NULL)
5407 *nbytes_in_hi32 = bzf->strm.total_in_hi32;
5408 if (nbytes_out_lo32 != NULL)
5409 *nbytes_out_lo32 = bzf->strm.total_out_lo32;
5410 if (nbytes_out_hi32 != NULL)
5411 *nbytes_out_hi32 = bzf->strm.total_out_hi32;
5413 BZ_SETERR(BZ_OK);
5414 BZ2_bzCompressEnd ( &(bzf->strm) );
5415 free ( bzf );
5419 /*---------------------------------------------------*/
5420 BZFILE* BZ_API(BZ2_bzReadOpen)
5421 ( int* bzerror,
5422 FILE* f,
5423 int verbosity,
5424 int small,
5425 void* unused,
5426 int nUnused )
5428 bzFile* bzf = NULL;
5429 int ret;
5431 BZ_SETERR(BZ_OK);
5433 if (f == NULL ||
5434 (small != 0 && small != 1) ||
5435 (verbosity < 0 || verbosity > 4) ||
5436 (unused == NULL && nUnused != 0) ||
5437 (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED)))
5438 { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
5440 if (ferror(f))
5441 { BZ_SETERR(BZ_IO_ERROR); return NULL; };
5443 bzf = malloc ( sizeof(bzFile) );
5444 if (bzf == NULL)
5445 { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
5447 BZ_SETERR(BZ_OK);
5449 bzf->initialisedOk = False;
5450 bzf->handle = f;
5451 bzf->bufN = 0;
5452 bzf->writing = False;
5453 bzf->strm.bzalloc = NULL;
5454 bzf->strm.bzfree = NULL;
5455 bzf->strm.opaque = NULL;
5457 while (nUnused > 0) {
5458 bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++;
5459 unused = ((void*)( 1 + ((UChar*)(unused)) ));
5460 nUnused--;
5463 ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
5464 if (ret != BZ_OK)
5465 { BZ_SETERR(ret); free(bzf); return NULL; };
5467 bzf->strm.avail_in = bzf->bufN;
5468 bzf->strm.next_in = bzf->buf;
5470 bzf->initialisedOk = True;
5471 return bzf;
5475 /*---------------------------------------------------*/
5476 void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
5478 bzFile* bzf = (bzFile*)b;
5480 BZ_SETERR(BZ_OK);
5481 if (bzf == NULL)
5482 { BZ_SETERR(BZ_OK); return; };
5484 if (bzf->writing)
5485 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
5487 if (bzf->initialisedOk)
5488 (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
5489 free ( bzf );
5493 /*---------------------------------------------------*/
5494 int BZ_API(BZ2_bzRead)
5495 ( int* bzerror,
5496 BZFILE* b,
5497 void* buf,
5498 int len )
5500 Int32 n, ret;
5501 bzFile* bzf = (bzFile*)b;
5503 BZ_SETERR(BZ_OK);
5505 if (bzf == NULL || buf == NULL || len < 0)
5506 { BZ_SETERR(BZ_PARAM_ERROR); return 0; };
5508 if (bzf->writing)
5509 { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; };
5511 if (len == 0)
5512 { BZ_SETERR(BZ_OK); return 0; };
5514 bzf->strm.avail_out = len;
5515 bzf->strm.next_out = buf;
5517 while (True) {
5519 if (ferror(bzf->handle))
5520 { BZ_SETERR(BZ_IO_ERROR); return 0; };
5522 if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) {
5523 n = fread ( bzf->buf, sizeof(UChar),
5524 BZ_MAX_UNUSED, bzf->handle );
5525 if (ferror(bzf->handle))
5526 { BZ_SETERR(BZ_IO_ERROR); return 0; };
5527 bzf->bufN = n;
5528 bzf->strm.avail_in = bzf->bufN;
5529 bzf->strm.next_in = bzf->buf;
5532 ret = BZ2_bzDecompress ( &(bzf->strm) );
5534 if (ret != BZ_OK && ret != BZ_STREAM_END)
5535 { BZ_SETERR(ret); return 0; };
5537 if (ret == BZ_OK && myfeof(bzf->handle) &&
5538 bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0)
5539 { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; };
5541 if (ret == BZ_STREAM_END)
5542 { BZ_SETERR(BZ_STREAM_END);
5543 return len - bzf->strm.avail_out; };
5544 if (bzf->strm.avail_out == 0)
5545 { BZ_SETERR(BZ_OK); return len; };
5549 return 0; /*not reached*/
5553 /*---------------------------------------------------*/
5554 void BZ_API(BZ2_bzReadGetUnused)
5555 ( int* bzerror,
5556 BZFILE* b,
5557 void** unused,
5558 int* nUnused )
5560 bzFile* bzf = (bzFile*)b;
5561 if (bzf == NULL)
5562 { BZ_SETERR(BZ_PARAM_ERROR); return; };
5563 if (bzf->lastErr != BZ_STREAM_END)
5564 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
5565 if (unused == NULL || nUnused == NULL)
5566 { BZ_SETERR(BZ_PARAM_ERROR); return; };
5568 BZ_SETERR(BZ_OK);
5569 *nUnused = bzf->strm.avail_in;
5570 *unused = bzf->strm.next_in;
5572 #endif
5575 /*---------------------------------------------------*/
5576 /*--- Misc convenience stuff ---*/
5577 /*---------------------------------------------------*/
5579 /*---------------------------------------------------*/
5580 int BZ_API(BZ2_bzBuffToBuffCompress)
5581 ( char* dest,
5582 unsigned int* destLen,
5583 char* source,
5584 unsigned int sourceLen,
5585 int blockSize100k,
5586 int verbosity,
5587 int workFactor )
5589 bz_stream strm;
5590 int ret;
5592 if (dest == NULL || destLen == NULL ||
5593 source == NULL ||
5594 blockSize100k < 1 || blockSize100k > 9 ||
5595 verbosity < 0 || verbosity > 4 ||
5596 workFactor < 0 || workFactor > 250)
5597 return BZ_PARAM_ERROR;
5599 if (workFactor == 0) workFactor = 30;
5600 strm.bzalloc = NULL;
5601 strm.bzfree = NULL;
5602 strm.opaque = NULL;
5603 ret = BZ2_bzCompressInit ( &strm, blockSize100k,
5604 verbosity, workFactor );
5605 if (ret != BZ_OK) return ret;
5607 strm.next_in = source;
5608 strm.next_out = dest;
5609 strm.avail_in = sourceLen;
5610 strm.avail_out = *destLen;
5612 ret = BZ2_bzCompress ( &strm, BZ_FINISH );
5613 if (ret == BZ_FINISH_OK) goto output_overflow;
5614 if (ret != BZ_STREAM_END) goto errhandler;
5616 /* normal termination */
5617 *destLen -= strm.avail_out;
5618 BZ2_bzCompressEnd ( &strm );
5619 return BZ_OK;
5621 output_overflow:
5622 BZ2_bzCompressEnd ( &strm );
5623 return BZ_OUTBUFF_FULL;
5625 errhandler:
5626 BZ2_bzCompressEnd ( &strm );
5627 return ret;
5631 /*---------------------------------------------------*/
5632 int BZ_API(BZ2_bzBuffToBuffDecompress)
5633 ( char* dest,
5634 unsigned int* destLen,
5635 char* source,
5636 unsigned int sourceLen,
5637 int small,
5638 int verbosity )
5640 bz_stream strm;
5641 int ret;
5643 if (dest == NULL || destLen == NULL ||
5644 source == NULL ||
5645 (small != 0 && small != 1) ||
5646 verbosity < 0 || verbosity > 4)
5647 return BZ_PARAM_ERROR;
5649 strm.bzalloc = NULL;
5650 strm.bzfree = NULL;
5651 strm.opaque = NULL;
5652 ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
5653 if (ret != BZ_OK) return ret;
5655 strm.next_in = source;
5656 strm.next_out = dest;
5657 strm.avail_in = sourceLen;
5658 strm.avail_out = *destLen;
5660 ret = BZ2_bzDecompress ( &strm );
5661 if (ret == BZ_OK) goto output_overflow_or_eof;
5662 if (ret != BZ_STREAM_END) goto errhandler;
5664 /* normal termination */
5665 *destLen -= strm.avail_out;
5666 BZ2_bzDecompressEnd ( &strm );
5667 return BZ_OK;
5669 output_overflow_or_eof:
5670 if (strm.avail_out > 0) {
5671 BZ2_bzDecompressEnd ( &strm );
5672 return BZ_UNEXPECTED_EOF;
5673 } else {
5674 BZ2_bzDecompressEnd ( &strm );
5675 return BZ_OUTBUFF_FULL;
5678 errhandler:
5679 BZ2_bzDecompressEnd ( &strm );
5680 return ret;
5684 /*---------------------------------------------------*/
5685 /*--
5686 Code contributed by Yoshioka Tsuneo
5687 (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp),
5688 to support better zlib compatibility.
5689 This code is not _officially_ part of libbzip2 (yet);
5690 I haven't tested it, documented it, or considered the
5691 threading-safeness of it.
5692 If this code breaks, please contact both Yoshioka and me.
5693 --*/
5694 /*---------------------------------------------------*/
5696 /*---------------------------------------------------*/
5697 /*--
5698 return version like "0.9.0c".
5699 --*/
5700 const char * BZ_API(BZ2_bzlibVersion)(void)
5702 return BZ_VERSION;
5706 #ifndef BZ_NO_STDIO
5707 /*---------------------------------------------------*/
5709 #if defined(_WIN32) || defined(OS2) || defined(MSDOS)
5710 # include <fcntl.h>
5711 # include <io.h>
5712 # define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY)
5713 #else
5714 # define SET_BINARY_MODE(file)
5715 #endif
5716 static
5717 BZFILE * bzopen_or_bzdopen
5718 ( const char *path, /* no use when bzdopen */
5719 int fd, /* no use when bzdopen */
5720 const char *mode,
5721 int open_mode) /* bzopen: 0, bzdopen:1 */
5723 int bzerr;
5724 char unused[BZ_MAX_UNUSED];
5725 int blockSize100k = 9;
5726 int writing = 0;
5727 char mode2[10] = "";
5728 FILE *fp = NULL;
5729 BZFILE *bzfp = NULL;
5730 int verbosity = 0;
5731 int workFactor = 30;
5732 int smallMode = 0;
5733 int nUnused = 0;
5735 if (mode == NULL) return NULL;
5736 while (*mode) {
5737 switch (*mode) {
5738 case 'r':
5739 writing = 0; break;
5740 case 'w':
5741 writing = 1; break;
5742 case 's':
5743 smallMode = 1; break;
5744 default:
5745 if (isdigit((int)(*mode))) {
5746 blockSize100k = *mode-BZ_HDR_0;
5749 mode++;
5751 strcat(mode2, writing ? "w" : "r" );
5752 strcat(mode2,"b"); /* binary mode */
5754 if (open_mode==0) {
5755 if (path==NULL || strcmp(path,"")==0) {
5756 fp = (writing ? stdout : stdin);
5757 SET_BINARY_MODE(fp);
5758 } else {
5759 fp = fopen(path,mode2);
5761 } else {
5762 #ifdef BZ_STRICT_ANSI
5763 fp = NULL;
5764 #else
5765 fp = fdopen(fd,mode2);
5766 #endif
5768 if (fp == NULL) return NULL;
5770 if (writing) {
5771 /* Guard against total chaos and anarchy -- JRS */
5772 if (blockSize100k < 1) blockSize100k = 1;
5773 if (blockSize100k > 9) blockSize100k = 9;
5774 bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
5775 verbosity,workFactor);
5776 } else {
5777 bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
5778 unused,nUnused);
5780 if (bzfp == NULL) {
5781 if (fp != stdin && fp != stdout) fclose(fp);
5782 return NULL;
5784 return bzfp;
5788 /*---------------------------------------------------*/
5789 /*--
5790 open file for read or write.
5791 ex) bzopen("file","w9")
5792 case path="" or NULL => use stdin or stdout.
5793 --*/
5794 BZFILE * BZ_API(BZ2_bzopen)
5795 ( const char *path,
5796 const char *mode )
5798 return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0);
5802 /*---------------------------------------------------*/
5803 BZFILE * BZ_API(BZ2_bzdopen)
5804 ( int fd,
5805 const char *mode )
5807 return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1);
5811 /*---------------------------------------------------*/
5812 int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
5814 int bzerr, nread;
5815 if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
5816 nread = BZ2_bzRead(&bzerr,b,buf,len);
5817 if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
5818 return nread;
5819 } else {
5820 return -1;
5825 /*---------------------------------------------------*/
5826 int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
5828 int bzerr;
5830 BZ2_bzWrite(&bzerr,b,buf,len);
5831 if(bzerr == BZ_OK){
5832 return len;
5833 }else{
5834 return -1;
5839 /*---------------------------------------------------*/
5840 int BZ_API(BZ2_bzflush) (BZFILE *b)
5842 /* do nothing now... */
5843 return 0;
5847 /*---------------------------------------------------*/
5848 void BZ_API(BZ2_bzclose) (BZFILE* b)
5850 int bzerr;
5851 FILE *fp = ((bzFile *)b)->handle;
5853 if (b==NULL) {return;}
5854 if(((bzFile*)b)->writing){
5855 BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
5856 if(bzerr != BZ_OK){
5857 BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
5859 }else{
5860 BZ2_bzReadClose(&bzerr,b);
5862 if(fp!=stdin && fp!=stdout){
5863 fclose(fp);
5868 /*---------------------------------------------------*/
5869 /*--
5870 return last error code
5871 --*/
5872 static char *bzerrorstrings[] = {
5873 "OK"
5874 ,"SEQUENCE_ERROR"
5875 ,"PARAM_ERROR"
5876 ,"MEM_ERROR"
5877 ,"DATA_ERROR"
5878 ,"DATA_ERROR_MAGIC"
5879 ,"IO_ERROR"
5880 ,"UNEXPECTED_EOF"
5881 ,"OUTBUFF_FULL"
5882 ,"CONFIG_ERROR"
5883 ,"???" /* for future */
5884 ,"???" /* for future */
5885 ,"???" /* for future */
5886 ,"???" /* for future */
5887 ,"???" /* for future */
5888 ,"???" /* for future */
5892 const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
5894 int err = ((bzFile *)b)->lastErr;
5896 if(err>0) err = 0;
5897 *errnum = err;
5898 return bzerrorstrings[err*-1];
5900 #endif
5903 /*-------------------------------------------------------------*/
5904 /*--- end bzlib.c ---*/
5905 /*-------------------------------------------------------------*/
5908 /////////////////////////////////////////////////////////////////////
5909 /////////////////////////////////////////////////////////////////////
5912 /* A test program written to test robustness to decompression of
5913 corrupted data. Usage is
5914 unzcrash filename
5915 and the program will read the specified file, compress it (in memory),
5916 and then repeatedly decompress it, each time with a different bit of
5917 the compressed data inverted, so as to test all possible one-bit errors.
5918 This should not cause any invalid memory accesses. If it does,
5919 I want to know about it!
5921 p.s. As you can see from the above description, the process is
5922 incredibly slow. A file of size eg 5KB will cause it to run for
5923 many hours.
5926 //#include <stdio.h>
5927 //#include <assert.h>
5928 //#include "bzlib.h"
5930 #define M_BLOCK 1000000
5932 typedef unsigned char uchar;
5934 #define M_BLOCK_OUT (M_BLOCK + 1000000)
5935 uchar inbuf[M_BLOCK];
5936 uchar outbuf[M_BLOCK_OUT];
5937 uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
5939 int nIn, nOut, nZ;
5941 static char *bzerrorstrings[] = {
5942 "OK"
5943 ,"SEQUENCE_ERROR"
5944 ,"PARAM_ERROR"
5945 ,"MEM_ERROR"
5946 ,"DATA_ERROR"
5947 ,"DATA_ERROR_MAGIC"
5948 ,"IO_ERROR"
5949 ,"UNEXPECTED_EOF"
5950 ,"OUTBUFF_FULL"
5951 ,"???" /* for future */
5952 ,"???" /* for future */
5953 ,"???" /* for future */
5954 ,"???" /* for future */
5955 ,"???" /* for future */
5956 ,"???" /* for future */
5959 void flip_bit ( int bit )
5961 int byteno = bit / 8;
5962 int bitno = bit % 8;
5963 uchar mask = 1 << bitno;
5964 //fprintf ( stderr, "(byte %d bit %d mask %d)",
5965 // byteno, bitno, (int)mask );
5966 zbuf[byteno] ^= mask;
5969 void set_inbuf ( void )
5971 inbuf[0] = 0;
5972 my_strcat(inbuf, "At her sixtieth birthday party, Margaret Thatcher ");
5973 my_strcat(inbuf, "blew on the cake to light the candles.\n");
5974 my_strcat(inbuf, "This program, bzip2, the associated library libbzip2, and all\n");
5975 my_strcat(inbuf, "documentation, are copyright (C) 1996-2004 Julian R Seward. All\n");
5976 my_strcat(inbuf, "rights reserved.\n");
5977 my_strcat(inbuf, "\n");
5978 my_strcat(inbuf, "Redistribution and use in source and binary forms, with or without\n");
5979 my_strcat(inbuf, "modification, are permitted provided that the following conditions\n");
5980 my_strcat(inbuf, "are met:\n");
5981 my_strcat(inbuf, "\n");
5982 my_strcat(inbuf, "1. Redistributions of source code must retain the above copyright\n");
5983 my_strcat(inbuf, " notice, this list of conditions and the following disclaimer.\n");
5984 my_strcat(inbuf, "\n");
5985 my_strcat(inbuf, "2. The origin of this software must not be misrepresented; you must\n");
5986 my_strcat(inbuf, " not claim that you wrote the original software. If you use this\n");
5987 my_strcat(inbuf, " software in a product, an acknowledgment in the product\n");
5988 my_strcat(inbuf, " documentation would be appreciated but is not required.\n");
5989 my_strcat(inbuf, "\n");
5990 my_strcat(inbuf, "3. Altered source versions must be plainly marked as such, and must\n");
5991 my_strcat(inbuf, " not be misrepresented as being the original software.\n");
5992 my_strcat(inbuf, "\n");
5993 my_strcat(inbuf, "4. The name of the author may not be used to endorse or promote\n");
5994 my_strcat(inbuf, " products derived from this software without specific prior written\n");
5995 my_strcat(inbuf, " permission.\n");
5996 my_strcat(inbuf, "\n");
5997 my_strcat(inbuf, "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS\n");
5998 my_strcat(inbuf, "OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n");
5999 my_strcat(inbuf, "WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n");
6000 my_strcat(inbuf, "ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY\n");
6001 my_strcat(inbuf, "DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n");
6002 my_strcat(inbuf, "DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE\n");
6003 my_strcat(inbuf, "GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n");
6004 my_strcat(inbuf, "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n");
6005 my_strcat(inbuf, "WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n");
6006 my_strcat(inbuf, "NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n");
6007 my_strcat(inbuf, "SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n");
6008 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6009 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6010 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6011 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6012 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6013 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6014 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6015 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6016 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6017 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6018 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6019 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6020 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6021 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6022 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6023 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6024 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6025 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6026 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6027 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6028 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6029 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6030 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6031 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6032 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6033 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6034 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6035 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6036 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6037 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6038 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6039 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6040 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6041 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6042 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6043 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6044 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6045 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6046 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6047 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6048 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6049 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
6050 my_strcat(inbuf, "\n");
6054 void entry ( HWord(*service)(HWord,HWord) )
6056 int r;
6057 int bit;
6058 int i;
6060 serviceFn = service;
6062 set_inbuf();
6063 nIn = vexxx_strlen(inbuf)+1;
6064 vexxx_printf( "%d bytes read\n", nIn );
6066 nZ = M_BLOCK;
6067 r = BZ2_bzBuffToBuffCompress (
6068 zbuf, &nZ, inbuf, nIn, 9, 4/*verb*/, 30 );
6070 if (r != BZ_OK) {
6071 vexxx_printf("initial compress failed!\n");
6072 (*serviceFn)(0,0);
6074 vexxx_printf( "%d after compression\n", nZ );
6076 for (bit = 0; bit < nZ*8; bit += (bit < 35 ? 1 : 377)) {
6077 vexxx_printf( "bit %d ", bit );
6078 flip_bit ( bit );
6079 nOut = M_BLOCK_OUT;
6080 r = BZ2_bzBuffToBuffDecompress (
6081 outbuf, &nOut, zbuf, nZ, 1/*small*/, 0 );
6082 vexxx_printf( " %d %s ", r, bzerrorstrings[-r] );
6084 if (r != BZ_OK) {
6085 vexxx_printf( "\n" );
6086 } else {
6087 if (nOut != nIn) {
6088 vexxx_printf( "nIn/nOut mismatch %d %d\n", nIn, nOut );
6089 (*serviceFn)(0,0);
6090 } else {
6091 for (i = 0; i < nOut; i++)
6092 if (inbuf[i] != outbuf[i]) {
6093 vexxx_printf( "mismatch at %d\n", i );
6094 (*serviceFn)(0,0);
6096 if (i == nOut) vexxx_printf( "really ok!\n" );
6100 flip_bit ( bit );
6103 #if 0
6104 assert (nOut == nIn);
6105 for (i = 0; i < nOut; i++) {
6106 if (inbuf[i] != outbuf[i]) {
6107 vexxx_printf( "difference at %d !\n", i );
6108 return 1;
6111 #endif
6113 vexxx_printf( "all ok\n" );
6114 (*serviceFn)(0,0);