add ext4,vfat and tar.bz2
[u-tools.git] / u-tools / apps / busybox / archival / bz / blocksort.c
blob0e73ffeba847032d2d33cbd6ec962c6238311285
1 /*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
7 /*-------------------------------------------------------------*/
8 /*--- Block sorting machinery ---*/
9 /*--- blocksort.c ---*/
10 /*-------------------------------------------------------------*/
12 /* ------------------------------------------------------------------
13 This file is part of bzip2/libbzip2, a program and library for
14 lossless, block-sorting data compression.
16 bzip2/libbzip2 version 1.0.4 of 20 December 2006
17 Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
19 Please read the WARNING, DISCLAIMER and PATENTS sections in the
20 README file.
22 This program is released under the terms of the license contained
23 in the file LICENSE.
24 ------------------------------------------------------------------ */
26 /* #include "bzlib_private.h" */
28 #define mswap(zz1, zz2) \
29 { \
30 int32_t zztmp = zz1; \
31 zz1 = zz2; \
32 zz2 = zztmp; \
35 static
36 /* No measurable speed gain with inlining */
37 /* ALWAYS_INLINE */
38 void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
40 while (zzn > 0) {
41 mswap(ptr[zzp1], ptr[zzp2]);
42 zzp1++;
43 zzp2++;
44 zzn--;
48 static
49 ALWAYS_INLINE
50 int32_t mmin(int32_t a, int32_t b)
52 return (a < b) ? a : b;
56 /*---------------------------------------------*/
57 /*--- Fallback O(N log(N)^2) sorting ---*/
58 /*--- algorithm, for repetitive blocks ---*/
59 /*---------------------------------------------*/
61 /*---------------------------------------------*/
62 static
63 inline
64 void fallbackSimpleSort(uint32_t* fmap,
65 uint32_t* eclass,
66 int32_t lo,
67 int32_t hi)
69 int32_t i, j, tmp;
70 uint32_t ec_tmp;
72 if (lo == hi) return;
74 if (hi - lo > 3) {
75 for (i = hi-4; i >= lo; i--) {
76 tmp = fmap[i];
77 ec_tmp = eclass[tmp];
78 for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
79 fmap[j-4] = fmap[j];
80 fmap[j-4] = tmp;
84 for (i = hi-1; i >= lo; i--) {
85 tmp = fmap[i];
86 ec_tmp = eclass[tmp];
87 for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
88 fmap[j-1] = fmap[j];
89 fmap[j-1] = tmp;
94 /*---------------------------------------------*/
95 #define fpush(lz,hz) { \
96 stackLo[sp] = lz; \
97 stackHi[sp] = hz; \
98 sp++; \
101 #define fpop(lz,hz) { \
102 sp--; \
103 lz = stackLo[sp]; \
104 hz = stackHi[sp]; \
107 #define FALLBACK_QSORT_SMALL_THRESH 10
108 #define FALLBACK_QSORT_STACK_SIZE 100
110 static
111 void fallbackQSort3(uint32_t* fmap,
112 uint32_t* eclass,
113 int32_t loSt,
114 int32_t hiSt)
116 int32_t unLo, unHi, ltLo, gtHi, n, m;
117 int32_t sp, lo, hi;
118 uint32_t med, r, r3;
119 int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
120 int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
122 r = 0;
124 sp = 0;
125 fpush(loSt, hiSt);
127 while (sp > 0) {
128 AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
130 fpop(lo, hi);
131 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
132 fallbackSimpleSort(fmap, eclass, lo, hi);
133 continue;
136 /* Random partitioning. Median of 3 sometimes fails to
137 * avoid bad cases. Median of 9 seems to help but
138 * looks rather expensive. This too seems to work but
139 * is cheaper. Guidance for the magic constants
140 * 7621 and 32768 is taken from Sedgewick's algorithms
141 * book, chapter 35.
143 r = ((r * 7621) + 1) % 32768;
144 r3 = r % 3;
145 if (r3 == 0)
146 med = eclass[fmap[lo]];
147 else if (r3 == 1)
148 med = eclass[fmap[(lo+hi)>>1]];
149 else
150 med = eclass[fmap[hi]];
152 unLo = ltLo = lo;
153 unHi = gtHi = hi;
155 while (1) {
156 while (1) {
157 if (unLo > unHi) break;
158 n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
159 if (n == 0) {
160 mswap(fmap[unLo], fmap[ltLo]);
161 ltLo++;
162 unLo++;
163 continue;
165 if (n > 0) break;
166 unLo++;
168 while (1) {
169 if (unLo > unHi) break;
170 n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
171 if (n == 0) {
172 mswap(fmap[unHi], fmap[gtHi]);
173 gtHi--; unHi--;
174 continue;
176 if (n < 0) break;
177 unHi--;
179 if (unLo > unHi) break;
180 mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
183 AssertD(unHi == unLo-1, "fallbackQSort3(2)");
185 if (gtHi < ltLo) continue;
187 n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
188 m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
190 n = lo + unLo - ltLo - 1;
191 m = hi - (gtHi - unHi) + 1;
193 if (n - lo > hi - m) {
194 fpush(lo, n);
195 fpush(m, hi);
196 } else {
197 fpush(m, hi);
198 fpush(lo, n);
203 #undef fpush
204 #undef fpop
205 #undef FALLBACK_QSORT_SMALL_THRESH
206 #undef FALLBACK_QSORT_STACK_SIZE
209 /*---------------------------------------------*/
210 /* Pre:
211 * nblock > 0
212 * eclass exists for [0 .. nblock-1]
213 * ((uint8_t*)eclass) [0 .. nblock-1] holds block
214 * ptr exists for [0 .. nblock-1]
216 * Post:
217 * ((uint8_t*)eclass) [0 .. nblock-1] holds block
218 * All other areas of eclass destroyed
219 * fmap [0 .. nblock-1] holds sorted order
220 * bhtab[0 .. 2+(nblock/32)] destroyed
223 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
224 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
225 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
226 #define WORD_BH(zz) bhtab[(zz) >> 5]
227 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
229 static
230 void fallbackSort(uint32_t* fmap,
231 uint32_t* eclass,
232 uint32_t* bhtab,
233 int32_t nblock)
235 int32_t ftab[257];
236 int32_t ftabCopy[256];
237 int32_t H, i, j, k, l, r, cc, cc1;
238 int32_t nNotDone;
239 int32_t nBhtab;
240 uint8_t* eclass8 = (uint8_t*)eclass;
243 * Initial 1-char radix sort to generate
244 * initial fmap and initial BH bits.
246 for (i = 0; i < 257; i++) ftab[i] = 0;
247 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
248 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
250 j = ftab[0]; /* bbox: optimized */
251 for (i = 1; i < 257; i++) {
252 j += ftab[i];
253 ftab[i] = j;
256 for (i = 0; i < nblock; i++) {
257 j = eclass8[i];
258 k = ftab[j] - 1;
259 ftab[j] = k;
260 fmap[k] = i;
263 nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
264 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
265 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
268 * Inductively refine the buckets. Kind-of an
269 * "exponential radix sort" (!), inspired by the
270 * Manber-Myers suffix array construction algorithm.
273 /*-- set sentinel bits for block-end detection --*/
274 for (i = 0; i < 32; i++) {
275 SET_BH(nblock + 2*i);
276 CLEAR_BH(nblock + 2*i + 1);
279 /*-- the log(N) loop --*/
280 H = 1;
281 while (1) {
282 j = 0;
283 for (i = 0; i < nblock; i++) {
284 if (ISSET_BH(i))
285 j = i;
286 k = fmap[i] - H;
287 if (k < 0)
288 k += nblock;
289 eclass[k] = j;
292 nNotDone = 0;
293 r = -1;
294 while (1) {
296 /*-- find the next non-singleton bucket --*/
297 k = r + 1;
298 while (ISSET_BH(k) && UNALIGNED_BH(k))
299 k++;
300 if (ISSET_BH(k)) {
301 while (WORD_BH(k) == 0xffffffff) k += 32;
302 while (ISSET_BH(k)) k++;
304 l = k - 1;
305 if (l >= nblock)
306 break;
307 while (!ISSET_BH(k) && UNALIGNED_BH(k))
308 k++;
309 if (!ISSET_BH(k)) {
310 while (WORD_BH(k) == 0x00000000) k += 32;
311 while (!ISSET_BH(k)) k++;
313 r = k - 1;
314 if (r >= nblock)
315 break;
317 /*-- now [l, r] bracket current bucket --*/
318 if (r > l) {
319 nNotDone += (r - l + 1);
320 fallbackQSort3(fmap, eclass, l, r);
322 /*-- scan bucket and generate header bits-- */
323 cc = -1;
324 for (i = l; i <= r; i++) {
325 cc1 = eclass[fmap[i]];
326 if (cc != cc1) {
327 SET_BH(i);
328 cc = cc1;
334 H *= 2;
335 if (H > nblock || nNotDone == 0)
336 break;
340 * Reconstruct the original block in
341 * eclass8 [0 .. nblock-1], since the
342 * previous phase destroyed it.
344 j = 0;
345 for (i = 0; i < nblock; i++) {
346 while (ftabCopy[j] == 0)
347 j++;
348 ftabCopy[j]--;
349 eclass8[fmap[i]] = (uint8_t)j;
351 AssertH(j < 256, 1005);
354 #undef SET_BH
355 #undef CLEAR_BH
356 #undef ISSET_BH
357 #undef WORD_BH
358 #undef UNALIGNED_BH
361 /*---------------------------------------------*/
362 /*--- The main, O(N^2 log(N)) sorting ---*/
363 /*--- algorithm. Faster for "normal" ---*/
364 /*--- non-repetitive blocks. ---*/
365 /*---------------------------------------------*/
367 /*---------------------------------------------*/
368 static
369 NOINLINE
370 int mainGtU(
371 uint32_t i1,
372 uint32_t i2,
373 uint8_t* block,
374 uint16_t* quadrant,
375 uint32_t nblock,
376 int32_t* budget)
378 int32_t k;
379 uint8_t c1, c2;
380 uint16_t s1, s2;
382 /* Loop unrolling here is actually very useful
383 * (generated code is much simpler),
384 * code size increase is only 270 bytes (i386)
385 * but speeds up compression 10% overall
388 #if CONFIG_BZIP2_FEATURE_SPEED >= 1
390 #define TIMES_8(code) \
391 code; code; code; code; \
392 code; code; code; code;
393 #define TIMES_12(code) \
394 code; code; code; code; \
395 code; code; code; code; \
396 code; code; code; code;
398 #else
400 #define TIMES_8(code) \
402 int nn = 8; \
403 do { \
404 code; \
405 } while (--nn); \
407 #define TIMES_12(code) \
409 int nn = 12; \
410 do { \
411 code; \
412 } while (--nn); \
415 #endif
417 AssertD(i1 != i2, "mainGtU");
418 TIMES_12(
419 c1 = block[i1]; c2 = block[i2];
420 if (c1 != c2) return (c1 > c2);
421 i1++; i2++;
424 k = nblock + 8;
426 do {
427 TIMES_8(
428 c1 = block[i1]; c2 = block[i2];
429 if (c1 != c2) return (c1 > c2);
430 s1 = quadrant[i1]; s2 = quadrant[i2];
431 if (s1 != s2) return (s1 > s2);
432 i1++; i2++;
435 if (i1 >= nblock) i1 -= nblock;
436 if (i2 >= nblock) i2 -= nblock;
438 (*budget)--;
439 k -= 8;
440 } while (k >= 0);
442 return False;
444 #undef TIMES_8
445 #undef TIMES_12
447 /*---------------------------------------------*/
449 * Knuth's increments seem to work better
450 * than Incerpi-Sedgewick here. Possibly
451 * because the number of elems to sort is
452 * usually small, typically <= 20.
454 static
455 const int32_t incs[14] = {
456 1, 4, 13, 40, 121, 364, 1093, 3280,
457 9841, 29524, 88573, 265720,
458 797161, 2391484
461 static
462 void mainSimpleSort(uint32_t* ptr,
463 uint8_t* block,
464 uint16_t* quadrant,
465 int32_t nblock,
466 int32_t lo,
467 int32_t hi,
468 int32_t d,
469 int32_t* budget)
471 int32_t i, j, h, bigN, hp;
472 uint32_t v;
474 bigN = hi - lo + 1;
475 if (bigN < 2) return;
477 hp = 0;
478 while (incs[hp] < bigN) hp++;
479 hp--;
481 for (; hp >= 0; hp--) {
482 h = incs[hp];
484 i = lo + h;
485 while (1) {
486 /*-- copy 1 --*/
487 if (i > hi) break;
488 v = ptr[i];
489 j = i;
490 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
491 ptr[j] = ptr[j-h];
492 j = j - h;
493 if (j <= (lo + h - 1)) break;
495 ptr[j] = v;
496 i++;
498 /* 1.5% overall speedup, +290 bytes */
499 #if CONFIG_BZIP2_FEATURE_SPEED >= 3
500 /*-- copy 2 --*/
501 if (i > hi) break;
502 v = ptr[i];
503 j = i;
504 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
505 ptr[j] = ptr[j-h];
506 j = j - h;
507 if (j <= (lo + h - 1)) break;
509 ptr[j] = v;
510 i++;
512 /*-- copy 3 --*/
513 if (i > hi) break;
514 v = ptr[i];
515 j = i;
516 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
517 ptr[j] = ptr[j-h];
518 j = j - h;
519 if (j <= (lo + h - 1)) break;
521 ptr[j] = v;
522 i++;
523 #endif
524 if (*budget < 0) return;
530 /*---------------------------------------------*/
532 * The following is an implementation of
533 * an elegant 3-way quicksort for strings,
534 * described in a paper "Fast Algorithms for
535 * Sorting and Searching Strings", by Robert
536 * Sedgewick and Jon L. Bentley.
539 static
540 ALWAYS_INLINE
541 uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
543 uint8_t t;
544 if (a > b) {
545 t = a;
546 a = b;
547 b = t;
549 /* here b >= a */
550 if (b > c) {
551 b = c;
552 if (a > b)
553 b = a;
555 return b;
558 #define mpush(lz,hz,dz) \
560 stackLo[sp] = lz; \
561 stackHi[sp] = hz; \
562 stackD [sp] = dz; \
563 sp++; \
566 #define mpop(lz,hz,dz) \
568 sp--; \
569 lz = stackLo[sp]; \
570 hz = stackHi[sp]; \
571 dz = stackD [sp]; \
574 #define mnextsize(az) (nextHi[az] - nextLo[az])
576 #define mnextswap(az,bz) \
578 int32_t tz; \
579 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
580 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
581 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
584 #define MAIN_QSORT_SMALL_THRESH 20
585 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
586 #define MAIN_QSORT_STACK_SIZE 100
588 static
589 void mainQSort3(uint32_t* ptr,
590 uint8_t* block,
591 uint16_t* quadrant,
592 int32_t nblock,
593 int32_t loSt,
594 int32_t hiSt,
595 int32_t dSt,
596 int32_t* budget)
598 int32_t unLo, unHi, ltLo, gtHi, n, m, med;
599 int32_t sp, lo, hi, d;
601 int32_t stackLo[MAIN_QSORT_STACK_SIZE];
602 int32_t stackHi[MAIN_QSORT_STACK_SIZE];
603 int32_t stackD [MAIN_QSORT_STACK_SIZE];
605 int32_t nextLo[3];
606 int32_t nextHi[3];
607 int32_t nextD [3];
609 sp = 0;
610 mpush(loSt, hiSt, dSt);
612 while (sp > 0) {
613 AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
615 mpop(lo, hi, d);
616 if (hi - lo < MAIN_QSORT_SMALL_THRESH
617 || d > MAIN_QSORT_DEPTH_THRESH
619 mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
620 if (*budget < 0)
621 return;
622 continue;
624 med = (int32_t) mmed3(block[ptr[lo ] + d],
625 block[ptr[hi ] + d],
626 block[ptr[(lo+hi) >> 1] + d]);
628 unLo = ltLo = lo;
629 unHi = gtHi = hi;
631 while (1) {
632 while (1) {
633 if (unLo > unHi)
634 break;
635 n = ((int32_t)block[ptr[unLo]+d]) - med;
636 if (n == 0) {
637 mswap(ptr[unLo], ptr[ltLo]);
638 ltLo++;
639 unLo++;
640 continue;
642 if (n > 0) break;
643 unLo++;
645 while (1) {
646 if (unLo > unHi)
647 break;
648 n = ((int32_t)block[ptr[unHi]+d]) - med;
649 if (n == 0) {
650 mswap(ptr[unHi], ptr[gtHi]);
651 gtHi--;
652 unHi--;
653 continue;
655 if (n < 0) break;
656 unHi--;
658 if (unLo > unHi)
659 break;
660 mswap(ptr[unLo], ptr[unHi]);
661 unLo++;
662 unHi--;
665 AssertD(unHi == unLo-1, "mainQSort3(2)");
667 if (gtHi < ltLo) {
668 mpush(lo, hi, d + 1);
669 continue;
672 n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
673 m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
675 n = lo + unLo - ltLo - 1;
676 m = hi - (gtHi - unHi) + 1;
678 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
679 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
680 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
682 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
683 if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
684 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
686 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
687 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
689 mpush(nextLo[0], nextHi[0], nextD[0]);
690 mpush(nextLo[1], nextHi[1], nextD[1]);
691 mpush(nextLo[2], nextHi[2], nextD[2]);
695 #undef mpush
696 #undef mpop
697 #undef mnextsize
698 #undef mnextswap
699 #undef MAIN_QSORT_SMALL_THRESH
700 #undef MAIN_QSORT_DEPTH_THRESH
701 #undef MAIN_QSORT_STACK_SIZE
704 /*---------------------------------------------*/
705 /* Pre:
706 * nblock > N_OVERSHOOT
707 * block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
708 * ((uint8_t*)block32) [0 .. nblock-1] holds block
709 * ptr exists for [0 .. nblock-1]
711 * Post:
712 * ((uint8_t*)block32) [0 .. nblock-1] holds block
713 * All other areas of block32 destroyed
714 * ftab[0 .. 65536] destroyed
715 * ptr [0 .. nblock-1] holds sorted order
716 * if (*budget < 0), sorting was abandoned
719 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
720 #define SETMASK (1 << 21)
721 #define CLEARMASK (~(SETMASK))
723 static NOINLINE
724 void mainSort(EState* state,
725 uint32_t* ptr,
726 uint8_t* block,
727 uint16_t* quadrant,
728 uint32_t* ftab,
729 int32_t nblock,
730 int32_t* budget)
732 int32_t i, j, k, ss, sb;
733 uint8_t c1;
734 int32_t numQSorted;
735 uint16_t s;
736 Bool bigDone[256];
737 /* bbox: moved to EState to save stack
738 int32_t runningOrder[256];
739 int32_t copyStart[256];
740 int32_t copyEnd [256];
742 #define runningOrder (state->mainSort__runningOrder)
743 #define copyStart (state->mainSort__copyStart)
744 #define copyEnd (state->mainSort__copyEnd)
746 /*-- set up the 2-byte frequency table --*/
747 /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
748 memset(ftab, 0, 65537 * sizeof(ftab[0]));
750 j = block[0] << 8;
751 i = nblock - 1;
752 /* 3%, +300 bytes */
753 #if CONFIG_BZIP2_FEATURE_SPEED >= 2
754 for (; i >= 3; i -= 4) {
755 quadrant[i] = 0;
756 j = (j >> 8) | (((uint16_t)block[i]) << 8);
757 ftab[j]++;
758 quadrant[i-1] = 0;
759 j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
760 ftab[j]++;
761 quadrant[i-2] = 0;
762 j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
763 ftab[j]++;
764 quadrant[i-3] = 0;
765 j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
766 ftab[j]++;
768 #endif
769 for (; i >= 0; i--) {
770 quadrant[i] = 0;
771 j = (j >> 8) | (((uint16_t)block[i]) << 8);
772 ftab[j]++;
775 /*-- (emphasises close relationship of block & quadrant) --*/
776 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
777 block [nblock+i] = block[i];
778 quadrant[nblock+i] = 0;
781 /*-- Complete the initial radix sort --*/
782 j = ftab[0]; /* bbox: optimized */
783 for (i = 1; i <= 65536; i++) {
784 j += ftab[i];
785 ftab[i] = j;
788 s = block[0] << 8;
789 i = nblock - 1;
790 #if CONFIG_BZIP2_FEATURE_SPEED >= 2
791 for (; i >= 3; i -= 4) {
792 s = (s >> 8) | (block[i] << 8);
793 j = ftab[s] - 1;
794 ftab[s] = j;
795 ptr[j] = i;
796 s = (s >> 8) | (block[i-1] << 8);
797 j = ftab[s] - 1;
798 ftab[s] = j;
799 ptr[j] = i-1;
800 s = (s >> 8) | (block[i-2] << 8);
801 j = ftab[s] - 1;
802 ftab[s] = j;
803 ptr[j] = i-2;
804 s = (s >> 8) | (block[i-3] << 8);
805 j = ftab[s] - 1;
806 ftab[s] = j;
807 ptr[j] = i-3;
809 #endif
810 for (; i >= 0; i--) {
811 s = (s >> 8) | (block[i] << 8);
812 j = ftab[s] - 1;
813 ftab[s] = j;
814 ptr[j] = i;
818 * Now ftab contains the first loc of every small bucket.
819 * Calculate the running order, from smallest to largest
820 * big bucket.
822 for (i = 0; i <= 255; i++) {
823 bigDone [i] = False;
824 runningOrder[i] = i;
828 int32_t vv;
829 /* bbox: was: int32_t h = 1; */
830 /* do h = 3 * h + 1; while (h <= 256); */
831 uint32_t h = 364;
833 do {
834 /*h = h / 3;*/
835 h = (h * 171) >> 9; /* bbox: fast h/3 */
836 for (i = h; i <= 255; i++) {
837 vv = runningOrder[i];
838 j = i;
839 while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
840 runningOrder[j] = runningOrder[j-h];
841 j = j - h;
842 if (j <= (h - 1))
843 goto zero;
845 zero:
846 runningOrder[j] = vv;
848 } while (h != 1);
852 * The main sorting loop.
855 numQSorted = 0;
857 for (i = 0; i <= 255; i++) {
860 * Process big buckets, starting with the least full.
861 * Basically this is a 3-step process in which we call
862 * mainQSort3 to sort the small buckets [ss, j], but
863 * also make a big effort to avoid the calls if we can.
865 ss = runningOrder[i];
868 * Step 1:
869 * Complete the big bucket [ss] by quicksorting
870 * any unsorted small buckets [ss, j], for j != ss.
871 * Hopefully previous pointer-scanning phases have already
872 * completed many of the small buckets [ss, j], so
873 * we don't have to sort them at all.
875 for (j = 0; j <= 255; j++) {
876 if (j != ss) {
877 sb = (ss << 8) + j;
878 if (!(ftab[sb] & SETMASK)) {
879 int32_t lo = ftab[sb] & CLEARMASK;
880 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
881 if (hi > lo) {
882 mainQSort3(
883 ptr, block, quadrant, nblock,
884 lo, hi, BZ_N_RADIX, budget
886 if (*budget < 0) return;
887 numQSorted += (hi - lo + 1);
890 ftab[sb] |= SETMASK;
894 AssertH(!bigDone[ss], 1006);
897 * Step 2:
898 * Now scan this big bucket [ss] so as to synthesise the
899 * sorted order for small buckets [t, ss] for all t,
900 * including, magically, the bucket [ss,ss] too.
901 * This will avoid doing Real Work in subsequent Step 1's.
904 for (j = 0; j <= 255; j++) {
905 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
906 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
908 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
909 k = ptr[j] - 1;
910 if (k < 0)
911 k += nblock;
912 c1 = block[k];
913 if (!bigDone[c1])
914 ptr[copyStart[c1]++] = k;
916 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
917 k = ptr[j]-1;
918 if (k < 0)
919 k += nblock;
920 c1 = block[k];
921 if (!bigDone[c1])
922 ptr[copyEnd[c1]--] = k;
926 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
927 * Necessity for this case is demonstrated by compressing
928 * a sequence of approximately 48.5 million of character
929 * 251; 1.0.0/1.0.1 will then die here. */
930 AssertH((copyStart[ss]-1 == copyEnd[ss]) \
931 || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
933 for (j = 0; j <= 255; j++)
934 ftab[(j << 8) + ss] |= SETMASK;
937 * Step 3:
938 * The [ss] big bucket is now done. Record this fact,
939 * and update the quadrant descriptors. Remember to
940 * update quadrants in the overshoot area too, if
941 * necessary. The "if (i < 255)" test merely skips
942 * this updating for the last bucket processed, since
943 * updating for the last bucket is pointless.
945 * The quadrant array provides a way to incrementally
946 * cache sort orderings, as they appear, so as to
947 * make subsequent comparisons in fullGtU() complete
948 * faster. For repetitive blocks this makes a big
949 * difference (but not big enough to be able to avoid
950 * the fallback sorting mechanism, exponential radix sort).
952 * The precise meaning is: at all times:
954 * for 0 <= i < nblock and 0 <= j <= nblock
956 * if block[i] != block[j],
958 * then the relative values of quadrant[i] and
959 * quadrant[j] are meaningless.
961 * else {
962 * if quadrant[i] < quadrant[j]
963 * then the string starting at i lexicographically
964 * precedes the string starting at j
966 * else if quadrant[i] > quadrant[j]
967 * then the string starting at j lexicographically
968 * precedes the string starting at i
970 * else
971 * the relative ordering of the strings starting
972 * at i and j has not yet been determined.
975 bigDone[ss] = True;
977 if (i < 255) {
978 int32_t bbStart = ftab[ss << 8] & CLEARMASK;
979 int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
980 int32_t shifts = 0;
982 while ((bbSize >> shifts) > 65534) shifts++;
984 for (j = bbSize-1; j >= 0; j--) {
985 int32_t a2update = ptr[bbStart + j];
986 uint16_t qVal = (uint16_t)(j >> shifts);
987 quadrant[a2update] = qVal;
988 if (a2update < BZ_N_OVERSHOOT)
989 quadrant[a2update + nblock] = qVal;
991 AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
994 #undef runningOrder
995 #undef copyStart
996 #undef copyEnd
999 #undef BIGFREQ
1000 #undef SETMASK
1001 #undef CLEARMASK
1004 /*---------------------------------------------*/
1005 /* Pre:
1006 * nblock > 0
1007 * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1008 * ((uint8_t*)arr2)[0 .. nblock-1] holds block
1009 * arr1 exists for [0 .. nblock-1]
1011 * Post:
1012 * ((uint8_t*)arr2) [0 .. nblock-1] holds block
1013 * All other areas of block destroyed
1014 * ftab[0 .. 65536] destroyed
1015 * arr1[0 .. nblock-1] holds sorted order
1017 static NOINLINE
1018 void BZ2_blockSort(EState* s)
1020 /* In original bzip2 1.0.4, it's a parameter, but 30
1021 * (which was the default) should work ok. */
1022 enum { wfact = 30 };
1024 uint32_t* ptr = s->ptr;
1025 uint8_t* block = s->block;
1026 uint32_t* ftab = s->ftab;
1027 int32_t nblock = s->nblock;
1028 uint16_t* quadrant;
1029 int32_t budget;
1030 int32_t i;
1032 if (nblock < 10000) {
1033 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1034 } else {
1035 /* Calculate the location for quadrant, remembering to get
1036 * the alignment right. Assumes that &(block[0]) is at least
1037 * 2-byte aligned -- this should be ok since block is really
1038 * the first section of arr2.
1040 i = nblock + BZ_N_OVERSHOOT;
1041 if (i & 1) i++;
1042 quadrant = (uint16_t*)(&(block[i]));
1044 /* (wfact-1) / 3 puts the default-factor-30
1045 * transition point at very roughly the same place as
1046 * with v0.1 and v0.9.0.
1047 * Not that it particularly matters any more, since the
1048 * resulting compressed stream is now the same regardless
1049 * of whether or not we use the main sort or fallback sort.
1051 budget = nblock * ((wfact-1) / 3);
1053 mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
1054 if (budget < 0) {
1055 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1059 s->origPtr = -1;
1060 for (i = 0; i < s->nblock; i++)
1061 if (ptr[i] == 0) {
1062 s->origPtr = i;
1063 break;
1066 AssertH(s->origPtr != -1, 1003);
1070 /*-------------------------------------------------------------*/
1071 /*--- end blocksort.c ---*/
1072 /*-------------------------------------------------------------*/