1 /* $NetBSD: compress.c,v 1.1.1.2 2012/05/07 00:41:46 wiz Exp $ */
4 /*-------------------------------------------------------------*/
5 /*--- Compression machinery (not incl block sorting) ---*/
7 /*-------------------------------------------------------------*/
9 /* ------------------------------------------------------------------
10 This file is part of bzip2/libbzip2, a program and library for
11 lossless, block-sorting data compression.
13 bzip2/libbzip2 version 1.0.6 of 6 September 2010
14 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
16 Please read the WARNING, DISCLAIMER and PATENTS sections in the
19 This program is released under the terms of the license contained
21 ------------------------------------------------------------------ */
25 0.9.0 -- original version.
26 0.9.0a/b -- no changes in this file.
27 0.9.0c -- changed setting of nGroups in sendMTFValues()
28 so as to do a bit better on small files
31 #include "bzlib_private.h"
34 /*---------------------------------------------------*/
35 /*--- Bit stream I/O ---*/
36 /*---------------------------------------------------*/
38 /*---------------------------------------------------*/
39 void BZ2_bsInitWrite ( EState
* s
)
46 /*---------------------------------------------------*/
48 void bsFinishWrite ( EState
* s
)
50 while (s
->bsLive
> 0) {
51 s
->zbits
[s
->numZ
] = (UChar
)(s
->bsBuff
>> 24);
59 /*---------------------------------------------------*/
62 while (s->bsLive >= 8) { \
64 = (UChar)(s->bsBuff >> 24); \
72 /*---------------------------------------------------*/
75 void bsW ( EState
* s
, Int32 n
, UInt32 v
)
78 s
->bsBuff
|= (v
<< (32 - s
->bsLive
- n
));
83 /*---------------------------------------------------*/
85 void bsPutUInt32 ( EState
* s
, UInt32 u
)
87 bsW ( s
, 8, (u
>> 24) & 0xffL
);
88 bsW ( s
, 8, (u
>> 16) & 0xffL
);
89 bsW ( s
, 8, (u
>> 8) & 0xffL
);
90 bsW ( s
, 8, u
& 0xffL
);
94 /*---------------------------------------------------*/
96 void bsPutUChar ( EState
* s
, UChar c
)
98 bsW( s
, 8, (UInt32
)c
);
102 /*---------------------------------------------------*/
103 /*--- The back end proper ---*/
104 /*---------------------------------------------------*/
106 /*---------------------------------------------------*/
108 void makeMaps_e ( EState
* s
)
112 for (i
= 0; i
< 256; i
++)
114 s
->unseqToSeq
[i
] = s
->nInUse
;
120 /*---------------------------------------------------*/
122 void generateMTFValues ( EState
* s
)
131 After sorting (eg, here),
132 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
134 ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
135 holds the original block data.
137 The first thing to do is generate the MTF values,
139 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
140 Because there are strictly fewer or equal MTF values
141 than block values, ptr values in this area are overwritten
142 with MTF values only when they are no longer needed.
144 The final compressed bitstream is generated into the
146 (UChar*) (&((UChar*)s->arr2)[s->nblock])
148 These storage aliases are set up in bzCompressInit(),
149 except for the last one, which is arranged in
152 UInt32
* ptr
= s
->ptr
;
153 UChar
* block
= s
->block
;
154 UInt16
* mtfv
= s
->mtfv
;
159 for (i
= 0; i
<= EOB
; i
++) s
->mtfFreq
[i
] = 0;
163 for (i
= 0; i
< s
->nInUse
; i
++) yy
[i
] = (UChar
) i
;
165 for (i
= 0; i
< s
->nblock
; i
++) {
167 AssertD ( wr
<= i
, "generateMTFValues(1)" );
168 j
= ptr
[i
]-1; if (j
< 0) j
+= s
->nblock
;
169 ll_i
= s
->unseqToSeq
[block
[j
]];
170 AssertD ( ll_i
< s
->nInUse
, "generateMTFValues(2a)" );
180 mtfv
[wr
] = BZ_RUNB
; wr
++;
181 s
->mtfFreq
[BZ_RUNB
]++;
183 mtfv
[wr
] = BZ_RUNA
; wr
++;
184 s
->mtfFreq
[BZ_RUNA
]++;
186 if (zPend
< 2) break;
187 zPend
= (zPend
- 2) / 2;
193 register UChar
* ryy_j
;
194 register UChar rll_i
;
199 while ( rll_i
!= rtmp
) {
200 register UChar rtmp2
;
207 j
= ryy_j
- &(yy
[0]);
208 mtfv
[wr
] = j
+1; wr
++; s
->mtfFreq
[j
+1]++;
218 mtfv
[wr
] = BZ_RUNB
; wr
++;
219 s
->mtfFreq
[BZ_RUNB
]++;
221 mtfv
[wr
] = BZ_RUNA
; wr
++;
222 s
->mtfFreq
[BZ_RUNA
]++;
224 if (zPend
< 2) break;
225 zPend
= (zPend
- 2) / 2;
230 mtfv
[wr
] = EOB
; wr
++; s
->mtfFreq
[EOB
]++;
236 /*---------------------------------------------------*/
237 #define BZ_LESSER_ICOST 0
238 #define BZ_GREATER_ICOST 15
241 void sendMTFValues ( EState
* s
)
243 Int32 v
, t
, i
, j
, gs
, ge
, totc
, bt
, bc
, iter
;
244 Int32 nSelectors
, alphaSize
, minLen
, maxLen
, selCtr
;
245 Int32 nGroups
, nBytes
;
248 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
249 is a global since the decoder also needs it.
251 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
252 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
253 are also globals only used in this proc.
254 Made global to keep stack frame size small.
258 UInt16 cost
[BZ_N_GROUPS
];
259 Int32 fave
[BZ_N_GROUPS
];
262 /* LSC: Fix -Os compilation: -Werror=maybe-uninitialized */
263 memset(cost
, 0, sizeof(cost
));
264 #endif /* defined(__minix) */
265 UInt16
* mtfv
= s
->mtfv
;
267 if (s
->verbosity
>= 3)
268 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
269 "%d+2 syms in use\n",
270 s
->nblock
, s
->nMTF
, s
->nInUse
);
272 alphaSize
= s
->nInUse
+2;
273 for (t
= 0; t
< BZ_N_GROUPS
; t
++)
274 for (v
= 0; v
< alphaSize
; v
++)
275 s
->len
[t
][v
] = BZ_GREATER_ICOST
;
277 /*--- Decide how many coding tables to use ---*/
278 AssertH ( s
->nMTF
> 0, 3001 );
279 if (s
->nMTF
< 200) nGroups
= 2; else
280 if (s
->nMTF
< 600) nGroups
= 3; else
281 if (s
->nMTF
< 1200) nGroups
= 4; else
282 if (s
->nMTF
< 2400) nGroups
= 5; else
285 /*--- Generate an initial set of coding tables ---*/
287 Int32 nPart
, remF
, tFreq
, aFreq
;
293 tFreq
= remF
/ nPart
;
296 while (aFreq
< tFreq
&& ge
< alphaSize
-1) {
298 aFreq
+= s
->mtfFreq
[ge
];
302 && nPart
!= nGroups
&& nPart
!= 1
303 && ((nGroups
-nPart
) % 2 == 1)) {
304 aFreq
-= s
->mtfFreq
[ge
];
308 if (s
->verbosity
>= 3)
309 VPrintf5( " initial group %d, [%d .. %d], "
310 "has %d syms (%4.1f%%)\n",
311 nPart
, gs
, ge
, aFreq
,
312 (100.0 * (float)aFreq
) / (float)(s
->nMTF
) );
314 for (v
= 0; v
< alphaSize
; v
++)
315 if (v
>= gs
&& v
<= ge
)
316 s
->len
[nPart
-1][v
] = BZ_LESSER_ICOST
; else
317 s
->len
[nPart
-1][v
] = BZ_GREATER_ICOST
;
326 Iterate up to BZ_N_ITERS times to improve the tables.
328 for (iter
= 0; iter
< BZ_N_ITERS
; iter
++) {
330 for (t
= 0; t
< nGroups
; t
++) fave
[t
] = 0;
332 for (t
= 0; t
< nGroups
; t
++)
333 for (v
= 0; v
< alphaSize
; v
++)
337 Set up an auxiliary length table which is used to fast-track
338 the common case (nGroups == 6).
341 for (v
= 0; v
< alphaSize
; v
++) {
342 s
->len_pack
[v
][0] = (s
->len
[1][v
] << 16) | s
->len
[0][v
];
343 s
->len_pack
[v
][1] = (s
->len
[3][v
] << 16) | s
->len
[2][v
];
344 s
->len_pack
[v
][2] = (s
->len
[5][v
] << 16) | s
->len
[4][v
];
353 /*--- Set group start & end marks. --*/
354 if (gs
>= s
->nMTF
) break;
355 ge
= gs
+ BZ_G_SIZE
- 1;
356 if (ge
>= s
->nMTF
) ge
= s
->nMTF
-1;
359 Calculate the cost of this group as coded
360 by each of the coding tables.
362 for (t
= 0; t
< nGroups
; t
++) cost
[t
] = 0;
364 if (nGroups
== 6 && 50 == ge
-gs
+1) {
365 /*--- fast track the common case ---*/
366 register UInt32 cost01
, cost23
, cost45
;
368 cost01
= cost23
= cost45
= 0;
370 # define BZ_ITER(nn) \
371 icv = mtfv[gs+(nn)]; \
372 cost01 += s->len_pack[icv][0]; \
373 cost23 += s->len_pack[icv][1]; \
374 cost45 += s->len_pack[icv][2]; \
376 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
377 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
378 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
379 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
380 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
381 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
382 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
383 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
384 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
385 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
389 cost
[0] = cost01
& 0xffff; cost
[1] = cost01
>> 16;
390 cost
[2] = cost23
& 0xffff; cost
[3] = cost23
>> 16;
391 cost
[4] = cost45
& 0xffff; cost
[5] = cost45
>> 16;
394 /*--- slow version which correctly handles all situations ---*/
395 for (i
= gs
; i
<= ge
; i
++) {
396 UInt16 icv
= mtfv
[i
];
397 for (t
= 0; t
< nGroups
; t
++) cost
[t
] += s
->len
[t
][icv
];
402 Find the coding table which is best for this group,
403 and record its identity in the selector table.
405 bc
= 999999999; bt
= -1;
406 for (t
= 0; t
< nGroups
; t
++)
407 if (cost
[t
] < bc
) { bc
= cost
[t
]; bt
= t
; };
410 s
->selector
[nSelectors
] = bt
;
414 Increment the symbol frequencies for the selected table.
416 if (nGroups
== 6 && 50 == ge
-gs
+1) {
417 /*--- fast track the common case ---*/
419 # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
421 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
422 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
423 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
424 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
425 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
426 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
427 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
428 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
429 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
430 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
435 /*--- slow version which correctly handles all situations ---*/
436 for (i
= gs
; i
<= ge
; i
++)
437 s
->rfreq
[bt
][ mtfv
[i
] ]++;
442 if (s
->verbosity
>= 3) {
443 VPrintf2 ( " pass %d: size is %d, grp uses are ",
445 for (t
= 0; t
< nGroups
; t
++)
446 VPrintf1 ( "%d ", fave
[t
] );
451 Recompute the tables based on the accumulated frequencies.
453 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
454 comment in huffman.c for details. */
455 for (t
= 0; t
< nGroups
; t
++)
456 BZ2_hbMakeCodeLengths ( &(s
->len
[t
][0]), &(s
->rfreq
[t
][0]),
457 alphaSize
, 17 /*20*/ );
461 AssertH( nGroups
< 8, 3002 );
462 AssertH( nSelectors
< 32768 &&
463 nSelectors
<= (2 + (900000 / BZ_G_SIZE
)),
467 /*--- Compute MTF values for the selectors. ---*/
469 UChar pos
[BZ_N_GROUPS
], ll_i
, tmp2
, tmp
;
470 for (i
= 0; i
< nGroups
; i
++) pos
[i
] = i
;
471 for (i
= 0; i
< nSelectors
; i
++) {
472 ll_i
= s
->selector
[i
];
475 while ( ll_i
!= tmp
) {
482 s
->selectorMtf
[i
] = j
;
486 /*--- Assign actual codes for the tables. --*/
487 for (t
= 0; t
< nGroups
; t
++) {
490 for (i
= 0; i
< alphaSize
; i
++) {
491 if (s
->len
[t
][i
] > maxLen
) maxLen
= s
->len
[t
][i
];
492 if (s
->len
[t
][i
] < minLen
) minLen
= s
->len
[t
][i
];
494 AssertH ( !(maxLen
> 17 /*20*/ ), 3004 );
495 AssertH ( !(minLen
< 1), 3005 );
496 BZ2_hbAssignCodes ( &(s
->code
[t
][0]), &(s
->len
[t
][0]),
497 minLen
, maxLen
, alphaSize
);
500 /*--- Transmit the mapping table. ---*/
503 for (i
= 0; i
< 16; i
++) {
505 for (j
= 0; j
< 16; j
++)
506 if (s
->inUse
[i
* 16 + j
]) inUse16
[i
] = True
;
510 for (i
= 0; i
< 16; i
++)
511 if (inUse16
[i
]) bsW(s
,1,1); else bsW(s
,1,0);
513 for (i
= 0; i
< 16; i
++)
515 for (j
= 0; j
< 16; j
++) {
516 if (s
->inUse
[i
* 16 + j
]) bsW(s
,1,1); else bsW(s
,1,0);
519 if (s
->verbosity
>= 3)
520 VPrintf1( " bytes: mapping %d, ", s
->numZ
-nBytes
);
523 /*--- Now the selectors. ---*/
525 bsW ( s
, 3, nGroups
);
526 bsW ( s
, 15, nSelectors
);
527 for (i
= 0; i
< nSelectors
; i
++) {
528 for (j
= 0; j
< s
->selectorMtf
[i
]; j
++) bsW(s
,1,1);
531 if (s
->verbosity
>= 3)
532 VPrintf1( "selectors %d, ", s
->numZ
-nBytes
);
534 /*--- Now the coding tables. ---*/
537 for (t
= 0; t
< nGroups
; t
++) {
538 Int32 curr
= s
->len
[t
][0];
540 for (i
= 0; i
< alphaSize
; i
++) {
541 while (curr
< s
->len
[t
][i
]) { bsW(s
,2,2); curr
++; /* 10 */ };
542 while (curr
> s
->len
[t
][i
]) { bsW(s
,2,3); curr
--; /* 11 */ };
547 if (s
->verbosity
>= 3)
548 VPrintf1 ( "code lengths %d, ", s
->numZ
-nBytes
);
550 /*--- And finally, the block data proper ---*/
555 if (gs
>= s
->nMTF
) break;
556 ge
= gs
+ BZ_G_SIZE
- 1;
557 if (ge
>= s
->nMTF
) ge
= s
->nMTF
-1;
558 AssertH ( s
->selector
[selCtr
] < nGroups
, 3006 );
560 if (nGroups
== 6 && 50 == ge
-gs
+1) {
561 /*--- fast track the common case ---*/
563 UChar
* s_len_sel_selCtr
564 = &(s
->len
[s
->selector
[selCtr
]][0]);
565 Int32
* s_code_sel_selCtr
566 = &(s
->code
[s
->selector
[selCtr
]][0]);
568 # define BZ_ITAH(nn) \
569 mtfv_i = mtfv[gs+(nn)]; \
571 s_len_sel_selCtr[mtfv_i], \
572 s_code_sel_selCtr[mtfv_i] )
574 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
575 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
576 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
577 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
578 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
579 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
580 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
581 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
582 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
583 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
588 /*--- slow version which correctly handles all situations ---*/
589 for (i
= gs
; i
<= ge
; i
++) {
591 s
->len
[s
->selector
[selCtr
]] [mtfv
[i
]],
592 s
->code
[s
->selector
[selCtr
]] [mtfv
[i
]] );
600 AssertH( selCtr
== nSelectors
, 3007 );
602 if (s
->verbosity
>= 3)
603 VPrintf1( "codes %d\n", s
->numZ
-nBytes
);
607 /*---------------------------------------------------*/
608 void BZ2_compressBlock ( EState
* s
, Bool is_last_block
)
612 BZ_FINALISE_CRC ( s
->blockCRC
);
613 s
->combinedCRC
= (s
->combinedCRC
<< 1) | (s
->combinedCRC
>> 31);
614 s
->combinedCRC
^= s
->blockCRC
;
615 if (s
->blockNo
> 1) s
->numZ
= 0;
617 if (s
->verbosity
>= 2)
618 VPrintf4( " block %d: crc = 0x%08x, "
619 "combined CRC = 0x%08x, size = %d\n",
620 s
->blockNo
, s
->blockCRC
, s
->combinedCRC
, s
->nblock
);
625 s
->zbits
= (UChar
*) (&((UChar
*)s
->arr2
)[s
->nblock
]);
627 /*-- If this is the first block, create the stream header. --*/
628 if (s
->blockNo
== 1) {
629 BZ2_bsInitWrite ( s
);
630 bsPutUChar ( s
, BZ_HDR_B
);
631 bsPutUChar ( s
, BZ_HDR_Z
);
632 bsPutUChar ( s
, BZ_HDR_h
);
633 bsPutUChar ( s
, (UChar
)(BZ_HDR_0
+ s
->blockSize100k
) );
638 bsPutUChar ( s
, 0x31 ); bsPutUChar ( s
, 0x41 );
639 bsPutUChar ( s
, 0x59 ); bsPutUChar ( s
, 0x26 );
640 bsPutUChar ( s
, 0x53 ); bsPutUChar ( s
, 0x59 );
642 /*-- Now the block's CRC, so it is in a known place. --*/
643 bsPutUInt32 ( s
, s
->blockCRC
);
646 Now a single bit indicating (non-)randomisation.
647 As of version 0.9.5, we use a better sorting algorithm
648 which makes randomisation unnecessary. So always set
649 the randomised bit to 'no'. Of course, the decoder
650 still needs to be able to handle randomised blocks
651 so as to maintain backwards compatibility with
652 older versions of bzip2.
656 bsW ( s
, 24, s
->origPtr
);
657 generateMTFValues ( s
);
662 /*-- If this is the last block, add the stream trailer. --*/
665 bsPutUChar ( s
, 0x17 ); bsPutUChar ( s
, 0x72 );
666 bsPutUChar ( s
, 0x45 ); bsPutUChar ( s
, 0x38 );
667 bsPutUChar ( s
, 0x50 ); bsPutUChar ( s
, 0x90 );
668 bsPutUInt32 ( s
, s
->combinedCRC
);
669 if (s
->verbosity
>= 2)
670 VPrintf1( " final combined CRC = 0x%08x\n ", s
->combinedCRC
);
676 /*-------------------------------------------------------------*/
677 /*--- end compress.c ---*/
678 /*-------------------------------------------------------------*/