2 /*-------------------------------------------------------------*/
3 /*--- Compression machinery (not incl block sorting) ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
23 0.9.0 -- original version.
24 0.9.0a/b -- no changes in this file.
25 0.9.0c -- changed setting of nGroups in sendMTFValues()
26 so as to do a bit better on small files
29 #include <sys/ccompile.h>
30 #include "bzlib_private.h"
33 /*---------------------------------------------------*/
34 /*--- Bit stream I/O ---*/
35 /*---------------------------------------------------*/
37 /*---------------------------------------------------*/
38 void BZ2_bsInitWrite ( EState
* s
)
45 /*---------------------------------------------------*/
47 void bsFinishWrite ( EState
* s
)
49 while (s
->bsLive
> 0) {
50 s
->zbits
[s
->numZ
] = (UChar
)(s
->bsBuff
>> 24);
58 /*---------------------------------------------------*/
61 while (s->bsLive >= 8) { \
63 = (UChar)(s->bsBuff >> 24); \
71 /*---------------------------------------------------*/
74 void bsW ( EState
* s
, Int32 n
, UInt32 v
)
77 s
->bsBuff
|= (v
<< (32 - s
->bsLive
- n
));
82 /*---------------------------------------------------*/
84 void bsPutUInt32 ( EState
* s
, UInt32 u
)
86 bsW ( s
, 8, (u
>> 24) & 0xffL
);
87 bsW ( s
, 8, (u
>> 16) & 0xffL
);
88 bsW ( s
, 8, (u
>> 8) & 0xffL
);
89 bsW ( s
, 8, u
& 0xffL
);
93 /*---------------------------------------------------*/
95 void bsPutUChar ( EState
* s
, UChar c
)
97 bsW( s
, 8, (UInt32
)c
);
101 /*---------------------------------------------------*/
102 /*--- The back end proper ---*/
103 /*---------------------------------------------------*/
105 /*---------------------------------------------------*/
107 void makeMaps_e ( EState
* s
)
111 for (i
= 0; i
< 256; i
++)
113 s
->unseqToSeq
[i
] = s
->nInUse
;
119 /*---------------------------------------------------*/
121 void generateMTFValues ( EState
* s
)
130 After sorting (eg, here),
131 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
133 ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
134 holds the original block data.
136 The first thing to do is generate the MTF values,
138 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
139 Because there are strictly fewer or equal MTF values
140 than block values, ptr values in this area are overwritten
141 with MTF values only when they are no longer needed.
143 The final compressed bitstream is generated into the
145 (UChar*) (&((UChar*)s->arr2)[s->nblock])
147 These storage aliases are set up in bzCompressInit(),
148 except for the last one, which is arranged in
151 UInt32
* ptr
= s
->ptr
;
152 UChar
* block
= s
->block
;
153 UInt16
* mtfv
= s
->mtfv
;
158 for (i
= 0; i
<= EOB
; i
++) s
->mtfFreq
[i
] = 0;
162 for (i
= 0; i
< s
->nInUse
; i
++) yy
[i
] = (UChar
) i
;
164 for (i
= 0; i
< s
->nblock
; i
++) {
166 AssertD ( wr
<= i
, "generateMTFValues(1)" );
167 j
= ptr
[i
]-1; if (j
< 0) j
+= s
->nblock
;
168 ll_i
= s
->unseqToSeq
[block
[j
]];
169 AssertD ( ll_i
< s
->nInUse
, "generateMTFValues(2a)" );
179 mtfv
[wr
] = BZ_RUNB
; wr
++;
180 s
->mtfFreq
[BZ_RUNB
]++;
182 mtfv
[wr
] = BZ_RUNA
; wr
++;
183 s
->mtfFreq
[BZ_RUNA
]++;
185 if (zPend
< 2) break;
186 zPend
= (zPend
- 2) / 2;
192 register UChar
* ryy_j
;
193 register UChar rll_i
;
198 while ( rll_i
!= rtmp
) {
199 register UChar rtmp2
;
206 j
= ryy_j
- &(yy
[0]);
207 mtfv
[wr
] = j
+1; wr
++; s
->mtfFreq
[j
+1]++;
217 mtfv
[wr
] = BZ_RUNB
; wr
++;
218 s
->mtfFreq
[BZ_RUNB
]++;
220 mtfv
[wr
] = BZ_RUNA
; wr
++;
221 s
->mtfFreq
[BZ_RUNA
]++;
223 if (zPend
< 2) break;
224 zPend
= (zPend
- 2) / 2;
229 mtfv
[wr
] = EOB
; wr
++; s
->mtfFreq
[EOB
]++;
235 /*---------------------------------------------------*/
236 #define BZ_LESSER_ICOST 0
237 #define BZ_GREATER_ICOST 15
240 void sendMTFValues ( EState
* s
)
242 Int32 v
, t
, i
, j
, gs
, ge
, totc
, bt
, bc
, iter
;
243 Int32 nSelectors
, alphaSize
, minLen
, maxLen
, selCtr
;
244 Int32 nGroups
, __GNU_UNUSED nBytes
;
247 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
248 is a global since the decoder also needs it.
250 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
251 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
252 are also globals only used in this proc.
253 Made global to keep stack frame size small.
257 UInt16 cost
[BZ_N_GROUPS
];
258 Int32 fave
[BZ_N_GROUPS
];
260 UInt16
* mtfv
= s
->mtfv
;
262 if (s
->verbosity
>= 3)
263 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
264 "%d+2 syms in use\n",
265 s
->nblock
, s
->nMTF
, s
->nInUse
);
267 alphaSize
= s
->nInUse
+2;
268 for (t
= 0; t
< BZ_N_GROUPS
; t
++)
269 for (v
= 0; v
< alphaSize
; v
++)
270 s
->len
[t
][v
] = BZ_GREATER_ICOST
;
272 /*--- Decide how many coding tables to use ---*/
273 AssertH ( s
->nMTF
> 0, 3001 );
274 if (s
->nMTF
< 200) nGroups
= 2; else
275 if (s
->nMTF
< 600) nGroups
= 3; else
276 if (s
->nMTF
< 1200) nGroups
= 4; else
277 if (s
->nMTF
< 2400) nGroups
= 5; else
280 /*--- Generate an initial set of coding tables ---*/
282 Int32 nPart
, remF
, tFreq
, aFreq
;
288 tFreq
= remF
/ nPart
;
291 while (aFreq
< tFreq
&& ge
< alphaSize
-1) {
293 aFreq
+= s
->mtfFreq
[ge
];
297 && nPart
!= nGroups
&& nPart
!= 1
298 && ((nGroups
-nPart
) % 2 == 1)) {
299 aFreq
-= s
->mtfFreq
[ge
];
303 if (s
->verbosity
>= 3)
304 VPrintf5( " initial group %d, [%d .. %d], "
305 "has %d syms (%4.1f%%)\n",
306 nPart
, gs
, ge
, aFreq
,
307 (100.0 * (float)aFreq
) / (float)(s
->nMTF
) );
309 for (v
= 0; v
< alphaSize
; v
++)
310 if (v
>= gs
&& v
<= ge
)
311 s
->len
[nPart
-1][v
] = BZ_LESSER_ICOST
; else
312 s
->len
[nPart
-1][v
] = BZ_GREATER_ICOST
;
321 Iterate up to BZ_N_ITERS times to improve the tables.
323 for (iter
= 0; iter
< BZ_N_ITERS
; iter
++) {
325 for (t
= 0; t
< nGroups
; t
++) fave
[t
] = 0;
327 for (t
= 0; t
< nGroups
; t
++)
328 for (v
= 0; v
< alphaSize
; v
++)
332 Set up an auxiliary length table which is used to fast-track
333 the common case (nGroups == 6).
336 for (v
= 0; v
< alphaSize
; v
++) {
337 s
->len_pack
[v
][0] = (s
->len
[1][v
] << 16) | s
->len
[0][v
];
338 s
->len_pack
[v
][1] = (s
->len
[3][v
] << 16) | s
->len
[2][v
];
339 s
->len_pack
[v
][2] = (s
->len
[5][v
] << 16) | s
->len
[4][v
];
348 /*--- Set group start & end marks. --*/
349 if (gs
>= s
->nMTF
) break;
350 ge
= gs
+ BZ_G_SIZE
- 1;
351 if (ge
>= s
->nMTF
) ge
= s
->nMTF
-1;
354 Calculate the cost of this group as coded
355 by each of the coding tables.
357 for (t
= 0; t
< nGroups
; t
++) cost
[t
] = 0;
359 if (nGroups
== 6 && 50 == ge
-gs
+1) {
360 /*--- fast track the common case ---*/
361 register UInt32 cost01
, cost23
, cost45
;
363 cost01
= cost23
= cost45
= 0;
365 # define BZ_ITER(nn) \
366 icv = mtfv[gs+(nn)]; \
367 cost01 += s->len_pack[icv][0]; \
368 cost23 += s->len_pack[icv][1]; \
369 cost45 += s->len_pack[icv][2]; \
371 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
372 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
373 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
374 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
375 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
376 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
377 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
378 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
379 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
380 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
384 cost
[0] = cost01
& 0xffff; cost
[1] = cost01
>> 16;
385 cost
[2] = cost23
& 0xffff; cost
[3] = cost23
>> 16;
386 cost
[4] = cost45
& 0xffff; cost
[5] = cost45
>> 16;
389 /*--- slow version which correctly handles all situations ---*/
390 for (i
= gs
; i
<= ge
; i
++) {
391 UInt16 icv
= mtfv
[i
];
392 for (t
= 0; t
< nGroups
; t
++) cost
[t
] += s
->len
[t
][icv
];
397 Find the coding table which is best for this group,
398 and record its identity in the selector table.
400 bc
= 999999999; bt
= -1;
401 for (t
= 0; t
< nGroups
; t
++)
402 if (cost
[t
] < bc
) { bc
= cost
[t
]; bt
= t
; };
405 s
->selector
[nSelectors
] = bt
;
409 Increment the symbol frequencies for the selected table.
411 if (nGroups
== 6 && 50 == ge
-gs
+1) {
412 /*--- fast track the common case ---*/
414 # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
416 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
417 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
418 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
419 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
420 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
421 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
422 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
423 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
424 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
425 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
430 /*--- slow version which correctly handles all situations ---*/
431 for (i
= gs
; i
<= ge
; i
++)
432 s
->rfreq
[bt
][ mtfv
[i
] ]++;
437 if (s
->verbosity
>= 3) {
438 VPrintf2 ( " pass %d: size is %d, grp uses are ",
440 for (t
= 0; t
< nGroups
; t
++)
441 VPrintf1 ( "%d ", fave
[t
] );
446 Recompute the tables based on the accumulated frequencies.
448 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
449 comment in huffman.c for details. */
450 for (t
= 0; t
< nGroups
; t
++)
451 BZ2_hbMakeCodeLengths ( &(s
->len
[t
][0]), &(s
->rfreq
[t
][0]),
452 alphaSize
, 17 /*20*/ );
456 AssertH( nGroups
< 8, 3002 );
457 AssertH( nSelectors
< 32768 &&
458 nSelectors
<= (2 + (900000 / BZ_G_SIZE
)),
462 /*--- Compute MTF values for the selectors. ---*/
464 UChar pos
[BZ_N_GROUPS
], ll_i
, tmp2
, tmp
;
465 for (i
= 0; i
< nGroups
; i
++) pos
[i
] = i
;
466 for (i
= 0; i
< nSelectors
; i
++) {
467 ll_i
= s
->selector
[i
];
470 while ( ll_i
!= tmp
) {
477 s
->selectorMtf
[i
] = j
;
481 /*--- Assign actual codes for the tables. --*/
482 for (t
= 0; t
< nGroups
; t
++) {
485 for (i
= 0; i
< alphaSize
; i
++) {
486 if (s
->len
[t
][i
] > maxLen
) maxLen
= s
->len
[t
][i
];
487 if (s
->len
[t
][i
] < minLen
) minLen
= s
->len
[t
][i
];
489 AssertH ( !(maxLen
> 17 /*20*/ ), 3004 );
490 AssertH ( !(minLen
< 1), 3005 );
491 BZ2_hbAssignCodes ( &(s
->code
[t
][0]), &(s
->len
[t
][0]),
492 minLen
, maxLen
, alphaSize
);
495 /*--- Transmit the mapping table. ---*/
498 for (i
= 0; i
< 16; i
++) {
500 for (j
= 0; j
< 16; j
++)
501 if (s
->inUse
[i
* 16 + j
]) inUse16
[i
] = True
;
505 for (i
= 0; i
< 16; i
++)
506 if (inUse16
[i
]) bsW(s
,1,1); else bsW(s
,1,0);
508 for (i
= 0; i
< 16; i
++)
510 for (j
= 0; j
< 16; j
++) {
511 if (s
->inUse
[i
* 16 + j
]) bsW(s
,1,1); else bsW(s
,1,0);
514 if (s
->verbosity
>= 3)
515 VPrintf1( " bytes: mapping %d, ", s
->numZ
-nBytes
);
518 /*--- Now the selectors. ---*/
520 bsW ( s
, 3, nGroups
);
521 bsW ( s
, 15, nSelectors
);
522 for (i
= 0; i
< nSelectors
; i
++) {
523 for (j
= 0; j
< s
->selectorMtf
[i
]; j
++) bsW(s
,1,1);
526 if (s
->verbosity
>= 3)
527 VPrintf1( "selectors %d, ", s
->numZ
-nBytes
);
529 /*--- Now the coding tables. ---*/
532 for (t
= 0; t
< nGroups
; t
++) {
533 Int32 curr
= s
->len
[t
][0];
535 for (i
= 0; i
< alphaSize
; i
++) {
536 while (curr
< s
->len
[t
][i
]) { bsW(s
,2,2); curr
++; /* 10 */ };
537 while (curr
> s
->len
[t
][i
]) { bsW(s
,2,3); curr
--; /* 11 */ };
542 if (s
->verbosity
>= 3)
543 VPrintf1 ( "code lengths %d, ", s
->numZ
-nBytes
);
545 /*--- And finally, the block data proper ---*/
550 if (gs
>= s
->nMTF
) break;
551 ge
= gs
+ BZ_G_SIZE
- 1;
552 if (ge
>= s
->nMTF
) ge
= s
->nMTF
-1;
553 AssertH ( s
->selector
[selCtr
] < nGroups
, 3006 );
555 if (nGroups
== 6 && 50 == ge
-gs
+1) {
556 /*--- fast track the common case ---*/
558 UChar
* s_len_sel_selCtr
559 = &(s
->len
[s
->selector
[selCtr
]][0]);
560 Int32
* s_code_sel_selCtr
561 = &(s
->code
[s
->selector
[selCtr
]][0]);
563 # define BZ_ITAH(nn) \
564 mtfv_i = mtfv[gs+(nn)]; \
566 s_len_sel_selCtr[mtfv_i], \
567 s_code_sel_selCtr[mtfv_i] )
569 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
570 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
571 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
572 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
573 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
574 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
575 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
576 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
577 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
578 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
583 /*--- slow version which correctly handles all situations ---*/
584 for (i
= gs
; i
<= ge
; i
++) {
586 s
->len
[s
->selector
[selCtr
]] [mtfv
[i
]],
587 s
->code
[s
->selector
[selCtr
]] [mtfv
[i
]] );
595 AssertH( selCtr
== nSelectors
, 3007 );
597 if (s
->verbosity
>= 3)
598 VPrintf1( "codes %d\n", s
->numZ
-nBytes
);
602 /*---------------------------------------------------*/
603 void BZ2_compressBlock ( EState
* s
, Bool is_last_block
)
607 BZ_FINALISE_CRC ( s
->blockCRC
);
608 s
->combinedCRC
= (s
->combinedCRC
<< 1) | (s
->combinedCRC
>> 31);
609 s
->combinedCRC
^= s
->blockCRC
;
610 if (s
->blockNo
> 1) s
->numZ
= 0;
612 if (s
->verbosity
>= 2)
613 VPrintf4( " block %d: crc = 0x%08x, "
614 "combined CRC = 0x%08x, size = %d\n",
615 s
->blockNo
, s
->blockCRC
, s
->combinedCRC
, s
->nblock
);
620 s
->zbits
= (UChar
*) (&((UChar
*)s
->arr2
)[s
->nblock
]);
622 /*-- If this is the first block, create the stream header. --*/
623 if (s
->blockNo
== 1) {
624 BZ2_bsInitWrite ( s
);
625 bsPutUChar ( s
, BZ_HDR_B
);
626 bsPutUChar ( s
, BZ_HDR_Z
);
627 bsPutUChar ( s
, BZ_HDR_h
);
628 bsPutUChar ( s
, (UChar
)(BZ_HDR_0
+ s
->blockSize100k
) );
633 bsPutUChar ( s
, 0x31 ); bsPutUChar ( s
, 0x41 );
634 bsPutUChar ( s
, 0x59 ); bsPutUChar ( s
, 0x26 );
635 bsPutUChar ( s
, 0x53 ); bsPutUChar ( s
, 0x59 );
637 /*-- Now the block's CRC, so it is in a known place. --*/
638 bsPutUInt32 ( s
, s
->blockCRC
);
641 Now a single bit indicating (non-)randomisation.
642 As of version 0.9.5, we use a better sorting algorithm
643 which makes randomisation unnecessary. So always set
644 the randomised bit to 'no'. Of course, the decoder
645 still needs to be able to handle randomised blocks
646 so as to maintain backwards compatibility with
647 older versions of bzip2.
651 bsW ( s
, 24, s
->origPtr
);
652 generateMTFValues ( s
);
657 /*-- If this is the last block, add the stream trailer. --*/
660 bsPutUChar ( s
, 0x17 ); bsPutUChar ( s
, 0x72 );
661 bsPutUChar ( s
, 0x45 ); bsPutUChar ( s
, 0x38 );
662 bsPutUChar ( s
, 0x50 ); bsPutUChar ( s
, 0x90 );
663 bsPutUInt32 ( s
, s
->combinedCRC
);
664 if (s
->verbosity
>= 2)
665 VPrintf1( " final combined CRC = 0x%08x\n ", s
->combinedCRC
);
671 /*-------------------------------------------------------------*/
672 /*--- end compress.c ---*/
673 /*-------------------------------------------------------------*/