Drop main() prototype. Syncs with NetBSD-8
[minix.git] / external / bsd / bzip2 / dist / blocksort.c
blobe739bacee214557b9610ea5f9ba93ef96e7bad7d
1 /* $NetBSD: blocksort.c,v 1.1.1.2 2012/05/07 00:41:45 wiz Exp $ */
4 /*-------------------------------------------------------------*/
5 /*--- Block sorting machinery ---*/
6 /*--- blocksort.c ---*/
7 /*-------------------------------------------------------------*/
9 /* ------------------------------------------------------------------
10 This file is part of bzip2/libbzip2, a program and library for
11 lossless, block-sorting data compression.
13 bzip2/libbzip2 version 1.0.6 of 6 September 2010
14 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
16 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 README file.
19 This program is released under the terms of the license contained
20 in the file LICENSE.
21 ------------------------------------------------------------------ */
24 #include "bzlib_private.h"
26 /*---------------------------------------------*/
27 /*--- Fallback O(N log(N)^2) sorting ---*/
28 /*--- algorithm, for repetitive blocks ---*/
29 /*---------------------------------------------*/
31 /*---------------------------------------------*/
32 static
33 __inline__
34 void fallbackSimpleSort ( UInt32* fmap,
35 UInt32* eclass,
36 Int32 lo,
37 Int32 hi )
39 Int32 i, j, tmp;
40 UInt32 ec_tmp;
42 if (lo == hi) return;
44 if (hi - lo > 3) {
45 for ( i = hi-4; i >= lo; i-- ) {
46 tmp = fmap[i];
47 ec_tmp = eclass[tmp];
48 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
49 fmap[j-4] = fmap[j];
50 fmap[j-4] = tmp;
54 for ( i = hi-1; i >= lo; i-- ) {
55 tmp = fmap[i];
56 ec_tmp = eclass[tmp];
57 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
58 fmap[j-1] = fmap[j];
59 fmap[j-1] = tmp;
64 /*---------------------------------------------*/
65 #define fswap(zz1, zz2) \
66 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
68 #define fvswap(zzp1, zzp2, zzn) \
69 { \
70 Int32 yyp1 = (zzp1); \
71 Int32 yyp2 = (zzp2); \
72 Int32 yyn = (zzn); \
73 while (yyn > 0) { \
74 fswap(fmap[yyp1], fmap[yyp2]); \
75 yyp1++; yyp2++; yyn--; \
76 } \
80 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
82 #define fpush(lz,hz) { stackLo[sp] = lz; \
83 stackHi[sp] = hz; \
84 sp++; }
86 #define fpop(lz,hz) { sp--; \
87 lz = stackLo[sp]; \
88 hz = stackHi[sp]; }
90 #define FALLBACK_QSORT_SMALL_THRESH 10
91 #define FALLBACK_QSORT_STACK_SIZE 100
94 static
95 void fallbackQSort3 ( UInt32* fmap,
96 UInt32* eclass,
97 Int32 loSt,
98 Int32 hiSt )
100 Int32 unLo, unHi, ltLo, gtHi, n, m;
101 Int32 sp, lo, hi;
102 UInt32 med, r, r3;
103 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
104 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
106 r = 0;
108 sp = 0;
109 fpush ( loSt, hiSt );
111 while (sp > 0) {
113 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
115 fpop ( lo, hi );
116 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
117 fallbackSimpleSort ( fmap, eclass, lo, hi );
118 continue;
121 /* Random partitioning. Median of 3 sometimes fails to
122 avoid bad cases. Median of 9 seems to help but
123 looks rather expensive. This too seems to work but
124 is cheaper. Guidance for the magic constants
125 7621 and 32768 is taken from Sedgewick's algorithms
126 book, chapter 35.
128 r = ((r * 7621) + 1) % 32768;
129 r3 = r % 3;
130 if (r3 == 0) med = eclass[fmap[lo]]; else
131 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
132 med = eclass[fmap[hi]];
134 unLo = ltLo = lo;
135 unHi = gtHi = hi;
137 while (1) {
138 while (1) {
139 if (unLo > unHi) break;
140 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
141 if (n == 0) {
142 fswap(fmap[unLo], fmap[ltLo]);
143 ltLo++; unLo++;
144 continue;
146 if (n > 0) break;
147 unLo++;
149 while (1) {
150 if (unLo > unHi) break;
151 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
152 if (n == 0) {
153 fswap(fmap[unHi], fmap[gtHi]);
154 gtHi--; unHi--;
155 continue;
157 if (n < 0) break;
158 unHi--;
160 if (unLo > unHi) break;
161 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
164 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
166 if (gtHi < ltLo) continue;
168 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
169 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
171 n = lo + unLo - ltLo - 1;
172 m = hi - (gtHi - unHi) + 1;
174 if (n - lo > hi - m) {
175 fpush ( lo, n );
176 fpush ( m, hi );
177 } else {
178 fpush ( m, hi );
179 fpush ( lo, n );
184 #undef fmin
185 #undef fpush
186 #undef fpop
187 #undef fswap
188 #undef fvswap
189 #undef FALLBACK_QSORT_SMALL_THRESH
190 #undef FALLBACK_QSORT_STACK_SIZE
193 /*---------------------------------------------*/
194 /* Pre:
195 nblock > 0
196 eclass exists for [0 .. nblock-1]
197 ((UChar*)eclass) [0 .. nblock-1] holds block
198 ptr exists for [0 .. nblock-1]
200 Post:
201 ((UChar*)eclass) [0 .. nblock-1] holds block
202 All other areas of eclass destroyed
203 fmap [0 .. nblock-1] holds sorted order
204 bhtab [ 0 .. 2+(nblock/32) ] destroyed
207 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
208 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
209 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
210 #define WORD_BH(zz) bhtab[(zz) >> 5]
211 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
213 static
214 void fallbackSort ( UInt32* fmap,
215 UInt32* eclass,
216 UInt32* bhtab,
217 Int32 nblock,
218 Int32 verb )
220 Int32 ftab[257];
221 Int32 ftabCopy[256];
222 Int32 H, i, j, k, l, r, cc, cc1;
223 Int32 nNotDone;
224 Int32 nBhtab;
225 UChar* eclass8 = (UChar*)eclass;
227 /*--
228 Initial 1-char radix sort to generate
229 initial fmap and initial BH bits.
230 --*/
231 if (verb >= 4)
232 VPrintf0 ( " bucket sorting ...\n" );
233 for (i = 0; i < 257; i++) ftab[i] = 0;
234 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
235 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
236 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
238 for (i = 0; i < nblock; i++) {
239 j = eclass8[i];
240 k = ftab[j] - 1;
241 ftab[j] = k;
242 fmap[k] = i;
245 nBhtab = 2 + (nblock / 32);
246 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
247 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
249 /*--
250 Inductively refine the buckets. Kind-of an
251 "exponential radix sort" (!), inspired by the
252 Manber-Myers suffix array construction algorithm.
253 --*/
255 /*-- set sentinel bits for block-end detection --*/
256 for (i = 0; i < 32; i++) {
257 SET_BH(nblock + 2*i);
258 CLEAR_BH(nblock + 2*i + 1);
261 /*-- the log(N) loop --*/
262 H = 1;
263 while (1) {
265 if (verb >= 4)
266 VPrintf1 ( " depth %6d has ", H );
268 j = 0;
269 for (i = 0; i < nblock; i++) {
270 if (ISSET_BH(i)) j = i;
271 k = fmap[i] - H; if (k < 0) k += nblock;
272 eclass[k] = j;
275 nNotDone = 0;
276 r = -1;
277 while (1) {
279 /*-- find the next non-singleton bucket --*/
280 k = r + 1;
281 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
282 if (ISSET_BH(k)) {
283 while (WORD_BH(k) == 0xffffffff) k += 32;
284 while (ISSET_BH(k)) k++;
286 l = k - 1;
287 if (l >= nblock) break;
288 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
289 if (!ISSET_BH(k)) {
290 while (WORD_BH(k) == 0x00000000) k += 32;
291 while (!ISSET_BH(k)) k++;
293 r = k - 1;
294 if (r >= nblock) break;
296 /*-- now [l, r] bracket current bucket --*/
297 if (r > l) {
298 nNotDone += (r - l + 1);
299 fallbackQSort3 ( fmap, eclass, l, r );
301 /*-- scan bucket and generate header bits-- */
302 cc = -1;
303 for (i = l; i <= r; i++) {
304 cc1 = eclass[fmap[i]];
305 if (cc != cc1) { SET_BH(i); cc = cc1; };
310 if (verb >= 4)
311 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
313 H *= 2;
314 if (H > nblock || nNotDone == 0) break;
317 /*--
318 Reconstruct the original block in
319 eclass8 [0 .. nblock-1], since the
320 previous phase destroyed it.
321 --*/
322 if (verb >= 4)
323 VPrintf0 ( " reconstructing block ...\n" );
324 j = 0;
325 for (i = 0; i < nblock; i++) {
326 while (ftabCopy[j] == 0) j++;
327 ftabCopy[j]--;
328 eclass8[fmap[i]] = (UChar)j;
330 AssertH ( j < 256, 1005 );
333 #undef SET_BH
334 #undef CLEAR_BH
335 #undef ISSET_BH
336 #undef WORD_BH
337 #undef UNALIGNED_BH
340 /*---------------------------------------------*/
341 /*--- The main, O(N^2 log(N)) sorting ---*/
342 /*--- algorithm. Faster for "normal" ---*/
343 /*--- non-repetitive blocks. ---*/
344 /*---------------------------------------------*/
346 /*---------------------------------------------*/
347 static
348 __inline__
349 Bool mainGtU ( UInt32 i1,
350 UInt32 i2,
351 UChar* block,
352 UInt16* quadrant,
353 UInt32 nblock,
354 Int32* budget )
356 Int32 k;
357 UChar c1, c2;
358 UInt16 s1, s2;
360 AssertD ( i1 != i2, "mainGtU" );
361 /* 1 */
362 c1 = block[i1]; c2 = block[i2];
363 if (c1 != c2) return (c1 > c2);
364 i1++; i2++;
365 /* 2 */
366 c1 = block[i1]; c2 = block[i2];
367 if (c1 != c2) return (c1 > c2);
368 i1++; i2++;
369 /* 3 */
370 c1 = block[i1]; c2 = block[i2];
371 if (c1 != c2) return (c1 > c2);
372 i1++; i2++;
373 /* 4 */
374 c1 = block[i1]; c2 = block[i2];
375 if (c1 != c2) return (c1 > c2);
376 i1++; i2++;
377 /* 5 */
378 c1 = block[i1]; c2 = block[i2];
379 if (c1 != c2) return (c1 > c2);
380 i1++; i2++;
381 /* 6 */
382 c1 = block[i1]; c2 = block[i2];
383 if (c1 != c2) return (c1 > c2);
384 i1++; i2++;
385 /* 7 */
386 c1 = block[i1]; c2 = block[i2];
387 if (c1 != c2) return (c1 > c2);
388 i1++; i2++;
389 /* 8 */
390 c1 = block[i1]; c2 = block[i2];
391 if (c1 != c2) return (c1 > c2);
392 i1++; i2++;
393 /* 9 */
394 c1 = block[i1]; c2 = block[i2];
395 if (c1 != c2) return (c1 > c2);
396 i1++; i2++;
397 /* 10 */
398 c1 = block[i1]; c2 = block[i2];
399 if (c1 != c2) return (c1 > c2);
400 i1++; i2++;
401 /* 11 */
402 c1 = block[i1]; c2 = block[i2];
403 if (c1 != c2) return (c1 > c2);
404 i1++; i2++;
405 /* 12 */
406 c1 = block[i1]; c2 = block[i2];
407 if (c1 != c2) return (c1 > c2);
408 i1++; i2++;
410 k = nblock + 8;
412 do {
413 /* 1 */
414 c1 = block[i1]; c2 = block[i2];
415 if (c1 != c2) return (c1 > c2);
416 s1 = quadrant[i1]; s2 = quadrant[i2];
417 if (s1 != s2) return (s1 > s2);
418 i1++; i2++;
419 /* 2 */
420 c1 = block[i1]; c2 = block[i2];
421 if (c1 != c2) return (c1 > c2);
422 s1 = quadrant[i1]; s2 = quadrant[i2];
423 if (s1 != s2) return (s1 > s2);
424 i1++; i2++;
425 /* 3 */
426 c1 = block[i1]; c2 = block[i2];
427 if (c1 != c2) return (c1 > c2);
428 s1 = quadrant[i1]; s2 = quadrant[i2];
429 if (s1 != s2) return (s1 > s2);
430 i1++; i2++;
431 /* 4 */
432 c1 = block[i1]; c2 = block[i2];
433 if (c1 != c2) return (c1 > c2);
434 s1 = quadrant[i1]; s2 = quadrant[i2];
435 if (s1 != s2) return (s1 > s2);
436 i1++; i2++;
437 /* 5 */
438 c1 = block[i1]; c2 = block[i2];
439 if (c1 != c2) return (c1 > c2);
440 s1 = quadrant[i1]; s2 = quadrant[i2];
441 if (s1 != s2) return (s1 > s2);
442 i1++; i2++;
443 /* 6 */
444 c1 = block[i1]; c2 = block[i2];
445 if (c1 != c2) return (c1 > c2);
446 s1 = quadrant[i1]; s2 = quadrant[i2];
447 if (s1 != s2) return (s1 > s2);
448 i1++; i2++;
449 /* 7 */
450 c1 = block[i1]; c2 = block[i2];
451 if (c1 != c2) return (c1 > c2);
452 s1 = quadrant[i1]; s2 = quadrant[i2];
453 if (s1 != s2) return (s1 > s2);
454 i1++; i2++;
455 /* 8 */
456 c1 = block[i1]; c2 = block[i2];
457 if (c1 != c2) return (c1 > c2);
458 s1 = quadrant[i1]; s2 = quadrant[i2];
459 if (s1 != s2) return (s1 > s2);
460 i1++; i2++;
462 if (i1 >= nblock) i1 -= nblock;
463 if (i2 >= nblock) i2 -= nblock;
465 k -= 8;
466 (*budget)--;
468 while (k >= 0);
470 return False;
474 /*---------------------------------------------*/
475 /*--
476 Knuth's increments seem to work better
477 than Incerpi-Sedgewick here. Possibly
478 because the number of elems to sort is
479 usually small, typically <= 20.
480 --*/
481 static
482 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
483 9841, 29524, 88573, 265720,
484 797161, 2391484 };
486 static
487 void mainSimpleSort ( UInt32* ptr,
488 UChar* block,
489 UInt16* quadrant,
490 Int32 nblock,
491 Int32 lo,
492 Int32 hi,
493 Int32 d,
494 Int32* budget )
496 Int32 i, j, h, bigN, hp;
497 UInt32 v;
499 bigN = hi - lo + 1;
500 if (bigN < 2) return;
502 hp = 0;
503 while (incs[hp] < bigN) hp++;
504 hp--;
506 for (; hp >= 0; hp--) {
507 h = incs[hp];
509 i = lo + h;
510 while (True) {
512 /*-- copy 1 --*/
513 if (i > hi) break;
514 v = ptr[i];
515 j = i;
516 while ( mainGtU (
517 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
518 ) ) {
519 ptr[j] = ptr[j-h];
520 j = j - h;
521 if (j <= (lo + h - 1)) break;
523 ptr[j] = v;
524 i++;
526 /*-- copy 2 --*/
527 if (i > hi) break;
528 v = ptr[i];
529 j = i;
530 while ( mainGtU (
531 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
532 ) ) {
533 ptr[j] = ptr[j-h];
534 j = j - h;
535 if (j <= (lo + h - 1)) break;
537 ptr[j] = v;
538 i++;
540 /*-- copy 3 --*/
541 if (i > hi) break;
542 v = ptr[i];
543 j = i;
544 while ( mainGtU (
545 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
546 ) ) {
547 ptr[j] = ptr[j-h];
548 j = j - h;
549 if (j <= (lo + h - 1)) break;
551 ptr[j] = v;
552 i++;
554 if (*budget < 0) return;
560 /*---------------------------------------------*/
561 /*--
562 The following is an implementation of
563 an elegant 3-way quicksort for strings,
564 described in a paper "Fast Algorithms for
565 Sorting and Searching Strings", by Robert
566 Sedgewick and Jon L. Bentley.
567 --*/
569 #define mswap(zz1, zz2) \
570 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
572 #define mvswap(zzp1, zzp2, zzn) \
574 Int32 yyp1 = (zzp1); \
575 Int32 yyp2 = (zzp2); \
576 Int32 yyn = (zzn); \
577 while (yyn > 0) { \
578 mswap(ptr[yyp1], ptr[yyp2]); \
579 yyp1++; yyp2++; yyn--; \
583 static
584 __inline__
585 UChar mmed3 ( UChar a, UChar b, UChar c )
587 UChar t;
588 if (a > b) { t = a; a = b; b = t; };
589 if (b > c) {
590 b = c;
591 if (a > b) b = a;
593 return b;
596 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
598 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
599 stackHi[sp] = hz; \
600 stackD [sp] = dz; \
601 sp++; }
603 #define mpop(lz,hz,dz) { sp--; \
604 lz = stackLo[sp]; \
605 hz = stackHi[sp]; \
606 dz = stackD [sp]; }
609 #define mnextsize(az) (nextHi[az]-nextLo[az])
611 #define mnextswap(az,bz) \
612 { Int32 tz; \
613 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
614 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
615 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
618 #define MAIN_QSORT_SMALL_THRESH 20
619 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
620 #define MAIN_QSORT_STACK_SIZE 100
622 static
623 void mainQSort3 ( UInt32* ptr,
624 UChar* block,
625 UInt16* quadrant,
626 Int32 nblock,
627 Int32 loSt,
628 Int32 hiSt,
629 Int32 dSt,
630 Int32* budget )
632 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
633 Int32 sp, lo, hi, d;
635 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
636 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
637 Int32 stackD [MAIN_QSORT_STACK_SIZE];
639 Int32 nextLo[3];
640 Int32 nextHi[3];
641 Int32 nextD [3];
643 sp = 0;
644 mpush ( loSt, hiSt, dSt );
646 while (sp > 0) {
648 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
650 mpop ( lo, hi, d );
651 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
652 d > MAIN_QSORT_DEPTH_THRESH) {
653 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
654 if (*budget < 0) return;
655 continue;
658 med = (Int32)
659 mmed3 ( block[ptr[ lo ]+d],
660 block[ptr[ hi ]+d],
661 block[ptr[ (lo+hi)>>1 ]+d] );
663 unLo = ltLo = lo;
664 unHi = gtHi = hi;
666 while (True) {
667 while (True) {
668 if (unLo > unHi) break;
669 n = ((Int32)block[ptr[unLo]+d]) - med;
670 if (n == 0) {
671 mswap(ptr[unLo], ptr[ltLo]);
672 ltLo++; unLo++; continue;
674 if (n > 0) break;
675 unLo++;
677 while (True) {
678 if (unLo > unHi) break;
679 n = ((Int32)block[ptr[unHi]+d]) - med;
680 if (n == 0) {
681 mswap(ptr[unHi], ptr[gtHi]);
682 gtHi--; unHi--; continue;
684 if (n < 0) break;
685 unHi--;
687 if (unLo > unHi) break;
688 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
691 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
693 if (gtHi < ltLo) {
694 mpush(lo, hi, d+1 );
695 continue;
698 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
699 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
701 n = lo + unLo - ltLo - 1;
702 m = hi - (gtHi - unHi) + 1;
704 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
705 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
706 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
710 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
712 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
713 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
715 mpush (nextLo[0], nextHi[0], nextD[0]);
716 mpush (nextLo[1], nextHi[1], nextD[1]);
717 mpush (nextLo[2], nextHi[2], nextD[2]);
721 #undef mswap
722 #undef mvswap
723 #undef mpush
724 #undef mpop
725 #undef mmin
726 #undef mnextsize
727 #undef mnextswap
728 #undef MAIN_QSORT_SMALL_THRESH
729 #undef MAIN_QSORT_DEPTH_THRESH
730 #undef MAIN_QSORT_STACK_SIZE
733 /*---------------------------------------------*/
734 /* Pre:
735 nblock > N_OVERSHOOT
736 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
737 ((UChar*)block32) [0 .. nblock-1] holds block
738 ptr exists for [0 .. nblock-1]
740 Post:
741 ((UChar*)block32) [0 .. nblock-1] holds block
742 All other areas of block32 destroyed
743 ftab [0 .. 65536 ] destroyed
744 ptr [0 .. nblock-1] holds sorted order
745 if (*budget < 0), sorting was abandoned
748 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
749 #define SETMASK (1 << 21)
750 #define CLEARMASK (~(SETMASK))
752 static
753 void mainSort ( UInt32* ptr,
754 UChar* block,
755 UInt16* quadrant,
756 UInt32* ftab,
757 Int32 nblock,
758 Int32 verb,
759 Int32* budget )
761 Int32 i, j, k, ss, sb;
762 Int32 runningOrder[256];
763 Bool bigDone[256];
764 Int32 copyStart[256];
765 Int32 copyEnd [256];
766 UChar c1;
767 Int32 numQSorted;
768 UInt16 s;
769 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
771 /*-- set up the 2-byte frequency table --*/
772 for (i = 65536; i >= 0; i--) ftab[i] = 0;
774 j = block[0] << 8;
775 i = nblock-1;
776 for (; i >= 3; i -= 4) {
777 quadrant[i] = 0;
778 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
779 ftab[j]++;
780 quadrant[i-1] = 0;
781 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
782 ftab[j]++;
783 quadrant[i-2] = 0;
784 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
785 ftab[j]++;
786 quadrant[i-3] = 0;
787 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
788 ftab[j]++;
790 for (; i >= 0; i--) {
791 quadrant[i] = 0;
792 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
793 ftab[j]++;
796 /*-- (emphasises close relationship of block & quadrant) --*/
797 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
798 block [nblock+i] = block[i];
799 quadrant[nblock+i] = 0;
802 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
804 /*-- Complete the initial radix sort --*/
805 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
807 s = block[0] << 8;
808 i = nblock-1;
809 for (; i >= 3; i -= 4) {
810 s = (s >> 8) | (block[i] << 8);
811 j = ftab[s] -1;
812 ftab[s] = j;
813 ptr[j] = i;
814 s = (s >> 8) | (block[i-1] << 8);
815 j = ftab[s] -1;
816 ftab[s] = j;
817 ptr[j] = i-1;
818 s = (s >> 8) | (block[i-2] << 8);
819 j = ftab[s] -1;
820 ftab[s] = j;
821 ptr[j] = i-2;
822 s = (s >> 8) | (block[i-3] << 8);
823 j = ftab[s] -1;
824 ftab[s] = j;
825 ptr[j] = i-3;
827 for (; i >= 0; i--) {
828 s = (s >> 8) | (block[i] << 8);
829 j = ftab[s] -1;
830 ftab[s] = j;
831 ptr[j] = i;
834 /*--
835 Now ftab contains the first loc of every small bucket.
836 Calculate the running order, from smallest to largest
837 big bucket.
838 --*/
839 for (i = 0; i <= 255; i++) {
840 bigDone [i] = False;
841 runningOrder[i] = i;
845 Int32 vv;
846 Int32 h = 1;
847 do h = 3 * h + 1; while (h <= 256);
848 do {
849 h = h / 3;
850 for (i = h; i <= 255; i++) {
851 vv = runningOrder[i];
852 j = i;
853 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
854 runningOrder[j] = runningOrder[j-h];
855 j = j - h;
856 if (j <= (h - 1)) goto zero;
858 zero:
859 runningOrder[j] = vv;
861 } while (h != 1);
864 /*--
865 The main sorting loop.
866 --*/
868 numQSorted = 0;
870 for (i = 0; i <= 255; i++) {
872 /*--
873 Process big buckets, starting with the least full.
874 Basically this is a 3-step process in which we call
875 mainQSort3 to sort the small buckets [ss, j], but
876 also make a big effort to avoid the calls if we can.
877 --*/
878 ss = runningOrder[i];
880 /*--
881 Step 1:
882 Complete the big bucket [ss] by quicksorting
883 any unsorted small buckets [ss, j], for j != ss.
884 Hopefully previous pointer-scanning phases have already
885 completed many of the small buckets [ss, j], so
886 we don't have to sort them at all.
887 --*/
888 for (j = 0; j <= 255; j++) {
889 if (j != ss) {
890 sb = (ss << 8) + j;
891 if ( ! (ftab[sb] & SETMASK) ) {
892 Int32 lo = ftab[sb] & CLEARMASK;
893 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
894 if (hi > lo) {
895 if (verb >= 4)
896 VPrintf4 ( " qsort [0x%x, 0x%x] "
897 "done %d this %d\n",
898 ss, j, numQSorted, hi - lo + 1 );
899 mainQSort3 (
900 ptr, block, quadrant, nblock,
901 lo, hi, BZ_N_RADIX, budget
903 numQSorted += (hi - lo + 1);
904 if (*budget < 0) return;
907 ftab[sb] |= SETMASK;
911 AssertH ( !bigDone[ss], 1006 );
913 /*--
914 Step 2:
915 Now scan this big bucket [ss] so as to synthesise the
916 sorted order for small buckets [t, ss] for all t,
917 including, magically, the bucket [ss,ss] too.
918 This will avoid doing Real Work in subsequent Step 1's.
919 --*/
921 for (j = 0; j <= 255; j++) {
922 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
923 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
925 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
926 k = ptr[j]-1; if (k < 0) k += nblock;
927 c1 = block[k];
928 if (!bigDone[c1])
929 ptr[ copyStart[c1]++ ] = k;
931 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
932 k = ptr[j]-1; if (k < 0) k += nblock;
933 c1 = block[k];
934 if (!bigDone[c1])
935 ptr[ copyEnd[c1]-- ] = k;
939 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
941 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
942 Necessity for this case is demonstrated by compressing
943 a sequence of approximately 48.5 million of character
944 251; 1.0.0/1.0.1 will then die here. */
945 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
946 1007 )
948 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
950 /*--
951 Step 3:
952 The [ss] big bucket is now done. Record this fact,
953 and update the quadrant descriptors. Remember to
954 update quadrants in the overshoot area too, if
955 necessary. The "if (i < 255)" test merely skips
956 this updating for the last bucket processed, since
957 updating for the last bucket is pointless.
959 The quadrant array provides a way to incrementally
960 cache sort orderings, as they appear, so as to
961 make subsequent comparisons in fullGtU() complete
962 faster. For repetitive blocks this makes a big
963 difference (but not big enough to be able to avoid
964 the fallback sorting mechanism, exponential radix sort).
966 The precise meaning is: at all times:
968 for 0 <= i < nblock and 0 <= j <= nblock
970 if block[i] != block[j],
972 then the relative values of quadrant[i] and
973 quadrant[j] are meaningless.
975 else {
976 if quadrant[i] < quadrant[j]
977 then the string starting at i lexicographically
978 precedes the string starting at j
980 else if quadrant[i] > quadrant[j]
981 then the string starting at j lexicographically
982 precedes the string starting at i
984 else
985 the relative ordering of the strings starting
986 at i and j has not yet been determined.
988 --*/
989 bigDone[ss] = True;
991 if (i < 255) {
992 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
993 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
994 Int32 shifts = 0;
996 while ((bbSize >> shifts) > 65534) shifts++;
998 for (j = bbSize-1; j >= 0; j--) {
999 Int32 a2update = ptr[bbStart + j];
1000 UInt16 qVal = (UInt16)(j >> shifts);
1001 quadrant[a2update] = qVal;
1002 if (a2update < BZ_N_OVERSHOOT)
1003 quadrant[a2update + nblock] = qVal;
1005 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1010 if (verb >= 4)
1011 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1012 nblock, numQSorted, nblock - numQSorted );
1015 #undef BIGFREQ
1016 #undef SETMASK
1017 #undef CLEARMASK
1020 /*---------------------------------------------*/
1021 /* Pre:
1022 nblock > 0
1023 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1024 ((UChar*)arr2) [0 .. nblock-1] holds block
1025 arr1 exists for [0 .. nblock-1]
1027 Post:
1028 ((UChar*)arr2) [0 .. nblock-1] holds block
1029 All other areas of block destroyed
1030 ftab [ 0 .. 65536 ] destroyed
1031 arr1 [0 .. nblock-1] holds sorted order
1033 void BZ2_blockSort ( EState* s )
1035 UInt32* ptr = s->ptr;
1036 UChar* block = s->block;
1037 UInt32* ftab = s->ftab;
1038 Int32 nblock = s->nblock;
1039 Int32 verb = s->verbosity;
1040 Int32 wfact = s->workFactor;
1041 UInt16* quadrant;
1042 Int32 budget;
1043 Int32 budgetInit;
1044 Int32 i;
1046 if (nblock < 10000) {
1047 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1048 } else {
1049 /* Calculate the location for quadrant, remembering to get
1050 the alignment right. Assumes that &(block[0]) is at least
1051 2-byte aligned -- this should be ok since block is really
1052 the first section of arr2.
1054 i = nblock+BZ_N_OVERSHOOT;
1055 if (i & 1) i++;
1056 quadrant = (UInt16*)(&(block[i]));
1058 /* (wfact-1) / 3 puts the default-factor-30
1059 transition point at very roughly the same place as
1060 with v0.1 and v0.9.0.
1061 Not that it particularly matters any more, since the
1062 resulting compressed stream is now the same regardless
1063 of whether or not we use the main sort or fallback sort.
1065 if (wfact < 1 ) wfact = 1;
1066 if (wfact > 100) wfact = 100;
1067 budgetInit = nblock * ((wfact-1) / 3);
1068 budget = budgetInit;
1070 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1071 if (verb >= 3)
1072 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1073 budgetInit - budget,
1074 nblock,
1075 (float)(budgetInit - budget) /
1076 (float)(nblock==0 ? 1 : nblock) );
1077 if (budget < 0) {
1078 if (verb >= 2)
1079 VPrintf0 ( " too repetitive; using fallback"
1080 " sorting algorithm\n" );
1081 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1085 s->origPtr = -1;
1086 for (i = 0; i < s->nblock; i++)
1087 if (ptr[i] == 0)
1088 { s->origPtr = i; break; };
1090 AssertH( s->origPtr != -1, 1003 );
1094 /*-------------------------------------------------------------*/
1095 /*--- end blocksort.c ---*/
1096 /*-------------------------------------------------------------*/