1 /* $NetBSD: blocksort.c,v 1.1.1.2 2012/05/07 00:41:45 wiz Exp $ */
4 /*-------------------------------------------------------------*/
5 /*--- Block sorting machinery ---*/
6 /*--- blocksort.c ---*/
7 /*-------------------------------------------------------------*/
9 /* ------------------------------------------------------------------
10 This file is part of bzip2/libbzip2, a program and library for
11 lossless, block-sorting data compression.
13 bzip2/libbzip2 version 1.0.6 of 6 September 2010
14 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
16 Please read the WARNING, DISCLAIMER and PATENTS sections in the
19 This program is released under the terms of the license contained
21 ------------------------------------------------------------------ */
24 #include "bzlib_private.h"
26 /*---------------------------------------------*/
27 /*--- Fallback O(N log(N)^2) sorting ---*/
28 /*--- algorithm, for repetitive blocks ---*/
29 /*---------------------------------------------*/
31 /*---------------------------------------------*/
34 void fallbackSimpleSort ( UInt32
* fmap
,
45 for ( i
= hi
-4; i
>= lo
; i
-- ) {
48 for ( j
= i
+4; j
<= hi
&& ec_tmp
> eclass
[fmap
[j
]]; j
+= 4 )
54 for ( i
= hi
-1; i
>= lo
; i
-- ) {
57 for ( j
= i
+1; j
<= hi
&& ec_tmp
> eclass
[fmap
[j
]]; j
++ )
64 /*---------------------------------------------*/
65 #define fswap(zz1, zz2) \
66 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
68 #define fvswap(zzp1, zzp2, zzn) \
70 Int32 yyp1 = (zzp1); \
71 Int32 yyp2 = (zzp2); \
74 fswap(fmap[yyp1], fmap[yyp2]); \
75 yyp1++; yyp2++; yyn--; \
80 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
82 #define fpush(lz,hz) { stackLo[sp] = lz; \
86 #define fpop(lz,hz) { sp--; \
90 #define FALLBACK_QSORT_SMALL_THRESH 10
91 #define FALLBACK_QSORT_STACK_SIZE 100
95 void fallbackQSort3 ( UInt32
* fmap
,
100 Int32 unLo
, unHi
, ltLo
, gtHi
, n
, m
;
103 Int32 stackLo
[FALLBACK_QSORT_STACK_SIZE
];
104 Int32 stackHi
[FALLBACK_QSORT_STACK_SIZE
];
109 fpush ( loSt
, hiSt
);
113 AssertH ( sp
< FALLBACK_QSORT_STACK_SIZE
- 1, 1004 );
116 if (hi
- lo
< FALLBACK_QSORT_SMALL_THRESH
) {
117 fallbackSimpleSort ( fmap
, eclass
, lo
, hi
);
121 /* Random partitioning. Median of 3 sometimes fails to
122 avoid bad cases. Median of 9 seems to help but
123 looks rather expensive. This too seems to work but
124 is cheaper. Guidance for the magic constants
125 7621 and 32768 is taken from Sedgewick's algorithms
128 r
= ((r
* 7621) + 1) % 32768;
130 if (r3
== 0) med
= eclass
[fmap
[lo
]]; else
131 if (r3
== 1) med
= eclass
[fmap
[(lo
+hi
)>>1]]; else
132 med
= eclass
[fmap
[hi
]];
139 if (unLo
> unHi
) break;
140 n
= (Int32
)eclass
[fmap
[unLo
]] - (Int32
)med
;
142 fswap(fmap
[unLo
], fmap
[ltLo
]);
150 if (unLo
> unHi
) break;
151 n
= (Int32
)eclass
[fmap
[unHi
]] - (Int32
)med
;
153 fswap(fmap
[unHi
], fmap
[gtHi
]);
160 if (unLo
> unHi
) break;
161 fswap(fmap
[unLo
], fmap
[unHi
]); unLo
++; unHi
--;
164 AssertD ( unHi
== unLo
-1, "fallbackQSort3(2)" );
166 if (gtHi
< ltLo
) continue;
168 n
= fmin(ltLo
-lo
, unLo
-ltLo
); fvswap(lo
, unLo
-n
, n
);
169 m
= fmin(hi
-gtHi
, gtHi
-unHi
); fvswap(unLo
, hi
-m
+1, m
);
171 n
= lo
+ unLo
- ltLo
- 1;
172 m
= hi
- (gtHi
- unHi
) + 1;
174 if (n
- lo
> hi
- m
) {
189 #undef FALLBACK_QSORT_SMALL_THRESH
190 #undef FALLBACK_QSORT_STACK_SIZE
193 /*---------------------------------------------*/
196 eclass exists for [0 .. nblock-1]
197 ((UChar*)eclass) [0 .. nblock-1] holds block
198 ptr exists for [0 .. nblock-1]
201 ((UChar*)eclass) [0 .. nblock-1] holds block
202 All other areas of eclass destroyed
203 fmap [0 .. nblock-1] holds sorted order
204 bhtab [ 0 .. 2+(nblock/32) ] destroyed
207 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
208 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
209 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
210 #define WORD_BH(zz) bhtab[(zz) >> 5]
211 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
214 void fallbackSort ( UInt32
* fmap
,
222 Int32 H
, i
, j
, k
, l
, r
, cc
, cc1
;
225 UChar
* eclass8
= (UChar
*)eclass
;
228 Initial 1-char radix sort to generate
229 initial fmap and initial BH bits.
232 VPrintf0 ( " bucket sorting ...\n" );
233 for (i
= 0; i
< 257; i
++) ftab
[i
] = 0;
234 for (i
= 0; i
< nblock
; i
++) ftab
[eclass8
[i
]]++;
235 for (i
= 0; i
< 256; i
++) ftabCopy
[i
] = ftab
[i
];
236 for (i
= 1; i
< 257; i
++) ftab
[i
] += ftab
[i
-1];
238 for (i
= 0; i
< nblock
; i
++) {
245 nBhtab
= 2 + (nblock
/ 32);
246 for (i
= 0; i
< nBhtab
; i
++) bhtab
[i
] = 0;
247 for (i
= 0; i
< 256; i
++) SET_BH(ftab
[i
]);
250 Inductively refine the buckets. Kind-of an
251 "exponential radix sort" (!), inspired by the
252 Manber-Myers suffix array construction algorithm.
255 /*-- set sentinel bits for block-end detection --*/
256 for (i
= 0; i
< 32; i
++) {
257 SET_BH(nblock
+ 2*i
);
258 CLEAR_BH(nblock
+ 2*i
+ 1);
261 /*-- the log(N) loop --*/
266 VPrintf1 ( " depth %6d has ", H
);
269 for (i
= 0; i
< nblock
; i
++) {
270 if (ISSET_BH(i
)) j
= i
;
271 k
= fmap
[i
] - H
; if (k
< 0) k
+= nblock
;
279 /*-- find the next non-singleton bucket --*/
281 while (ISSET_BH(k
) && UNALIGNED_BH(k
)) k
++;
283 while (WORD_BH(k
) == 0xffffffff) k
+= 32;
284 while (ISSET_BH(k
)) k
++;
287 if (l
>= nblock
) break;
288 while (!ISSET_BH(k
) && UNALIGNED_BH(k
)) k
++;
290 while (WORD_BH(k
) == 0x00000000) k
+= 32;
291 while (!ISSET_BH(k
)) k
++;
294 if (r
>= nblock
) break;
296 /*-- now [l, r] bracket current bucket --*/
298 nNotDone
+= (r
- l
+ 1);
299 fallbackQSort3 ( fmap
, eclass
, l
, r
);
301 /*-- scan bucket and generate header bits-- */
303 for (i
= l
; i
<= r
; i
++) {
304 cc1
= eclass
[fmap
[i
]];
305 if (cc
!= cc1
) { SET_BH(i
); cc
= cc1
; };
311 VPrintf1 ( "%6d unresolved strings\n", nNotDone
);
314 if (H
> nblock
|| nNotDone
== 0) break;
318 Reconstruct the original block in
319 eclass8 [0 .. nblock-1], since the
320 previous phase destroyed it.
323 VPrintf0 ( " reconstructing block ...\n" );
325 for (i
= 0; i
< nblock
; i
++) {
326 while (ftabCopy
[j
] == 0) j
++;
328 eclass8
[fmap
[i
]] = (UChar
)j
;
330 AssertH ( j
< 256, 1005 );
340 /*---------------------------------------------*/
341 /*--- The main, O(N^2 log(N)) sorting ---*/
342 /*--- algorithm. Faster for "normal" ---*/
343 /*--- non-repetitive blocks. ---*/
344 /*---------------------------------------------*/
346 /*---------------------------------------------*/
349 Bool
mainGtU ( UInt32 i1
,
360 AssertD ( i1
!= i2
, "mainGtU" );
362 c1
= block
[i1
]; c2
= block
[i2
];
363 if (c1
!= c2
) return (c1
> c2
);
366 c1
= block
[i1
]; c2
= block
[i2
];
367 if (c1
!= c2
) return (c1
> c2
);
370 c1
= block
[i1
]; c2
= block
[i2
];
371 if (c1
!= c2
) return (c1
> c2
);
374 c1
= block
[i1
]; c2
= block
[i2
];
375 if (c1
!= c2
) return (c1
> c2
);
378 c1
= block
[i1
]; c2
= block
[i2
];
379 if (c1
!= c2
) return (c1
> c2
);
382 c1
= block
[i1
]; c2
= block
[i2
];
383 if (c1
!= c2
) return (c1
> c2
);
386 c1
= block
[i1
]; c2
= block
[i2
];
387 if (c1
!= c2
) return (c1
> c2
);
390 c1
= block
[i1
]; c2
= block
[i2
];
391 if (c1
!= c2
) return (c1
> c2
);
394 c1
= block
[i1
]; c2
= block
[i2
];
395 if (c1
!= c2
) return (c1
> c2
);
398 c1
= block
[i1
]; c2
= block
[i2
];
399 if (c1
!= c2
) return (c1
> c2
);
402 c1
= block
[i1
]; c2
= block
[i2
];
403 if (c1
!= c2
) return (c1
> c2
);
406 c1
= block
[i1
]; c2
= block
[i2
];
407 if (c1
!= c2
) return (c1
> c2
);
414 c1
= block
[i1
]; c2
= block
[i2
];
415 if (c1
!= c2
) return (c1
> c2
);
416 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
417 if (s1
!= s2
) return (s1
> s2
);
420 c1
= block
[i1
]; c2
= block
[i2
];
421 if (c1
!= c2
) return (c1
> c2
);
422 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
423 if (s1
!= s2
) return (s1
> s2
);
426 c1
= block
[i1
]; c2
= block
[i2
];
427 if (c1
!= c2
) return (c1
> c2
);
428 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
429 if (s1
!= s2
) return (s1
> s2
);
432 c1
= block
[i1
]; c2
= block
[i2
];
433 if (c1
!= c2
) return (c1
> c2
);
434 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
435 if (s1
!= s2
) return (s1
> s2
);
438 c1
= block
[i1
]; c2
= block
[i2
];
439 if (c1
!= c2
) return (c1
> c2
);
440 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
441 if (s1
!= s2
) return (s1
> s2
);
444 c1
= block
[i1
]; c2
= block
[i2
];
445 if (c1
!= c2
) return (c1
> c2
);
446 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
447 if (s1
!= s2
) return (s1
> s2
);
450 c1
= block
[i1
]; c2
= block
[i2
];
451 if (c1
!= c2
) return (c1
> c2
);
452 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
453 if (s1
!= s2
) return (s1
> s2
);
456 c1
= block
[i1
]; c2
= block
[i2
];
457 if (c1
!= c2
) return (c1
> c2
);
458 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
459 if (s1
!= s2
) return (s1
> s2
);
462 if (i1
>= nblock
) i1
-= nblock
;
463 if (i2
>= nblock
) i2
-= nblock
;
474 /*---------------------------------------------*/
476 Knuth's increments seem to work better
477 than Incerpi-Sedgewick here. Possibly
478 because the number of elems to sort is
479 usually small, typically <= 20.
482 Int32 incs
[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
483 9841, 29524, 88573, 265720,
487 void mainSimpleSort ( UInt32
* ptr
,
496 Int32 i
, j
, h
, bigN
, hp
;
500 if (bigN
< 2) return;
503 while (incs
[hp
] < bigN
) hp
++;
506 for (; hp
>= 0; hp
--) {
517 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
521 if (j
<= (lo
+ h
- 1)) break;
531 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
535 if (j
<= (lo
+ h
- 1)) break;
545 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
549 if (j
<= (lo
+ h
- 1)) break;
554 if (*budget
< 0) return;
560 /*---------------------------------------------*/
562 The following is an implementation of
563 an elegant 3-way quicksort for strings,
564 described in a paper "Fast Algorithms for
565 Sorting and Searching Strings", by Robert
566 Sedgewick and Jon L. Bentley.
569 #define mswap(zz1, zz2) \
570 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
572 #define mvswap(zzp1, zzp2, zzn) \
574 Int32 yyp1 = (zzp1); \
575 Int32 yyp2 = (zzp2); \
578 mswap(ptr[yyp1], ptr[yyp2]); \
579 yyp1++; yyp2++; yyn--; \
585 UChar
mmed3 ( UChar a
, UChar b
, UChar c
)
588 if (a
> b
) { t
= a
; a
= b
; b
= t
; };
596 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
598 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
603 #define mpop(lz,hz,dz) { sp--; \
609 #define mnextsize(az) (nextHi[az]-nextLo[az])
611 #define mnextswap(az,bz) \
613 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
614 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
615 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
618 #define MAIN_QSORT_SMALL_THRESH 20
619 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
620 #define MAIN_QSORT_STACK_SIZE 100
623 void mainQSort3 ( UInt32
* ptr
,
632 Int32 unLo
, unHi
, ltLo
, gtHi
, n
, m
, med
;
635 Int32 stackLo
[MAIN_QSORT_STACK_SIZE
];
636 Int32 stackHi
[MAIN_QSORT_STACK_SIZE
];
637 Int32 stackD
[MAIN_QSORT_STACK_SIZE
];
644 mpush ( loSt
, hiSt
, dSt
);
648 AssertH ( sp
< MAIN_QSORT_STACK_SIZE
- 2, 1001 );
651 if (hi
- lo
< MAIN_QSORT_SMALL_THRESH
||
652 d
> MAIN_QSORT_DEPTH_THRESH
) {
653 mainSimpleSort ( ptr
, block
, quadrant
, nblock
, lo
, hi
, d
, budget
);
654 if (*budget
< 0) return;
659 mmed3 ( block
[ptr
[ lo
]+d
],
661 block
[ptr
[ (lo
+hi
)>>1 ]+d
] );
668 if (unLo
> unHi
) break;
669 n
= ((Int32
)block
[ptr
[unLo
]+d
]) - med
;
671 mswap(ptr
[unLo
], ptr
[ltLo
]);
672 ltLo
++; unLo
++; continue;
678 if (unLo
> unHi
) break;
679 n
= ((Int32
)block
[ptr
[unHi
]+d
]) - med
;
681 mswap(ptr
[unHi
], ptr
[gtHi
]);
682 gtHi
--; unHi
--; continue;
687 if (unLo
> unHi
) break;
688 mswap(ptr
[unLo
], ptr
[unHi
]); unLo
++; unHi
--;
691 AssertD ( unHi
== unLo
-1, "mainQSort3(2)" );
698 n
= mmin(ltLo
-lo
, unLo
-ltLo
); mvswap(lo
, unLo
-n
, n
);
699 m
= mmin(hi
-gtHi
, gtHi
-unHi
); mvswap(unLo
, hi
-m
+1, m
);
701 n
= lo
+ unLo
- ltLo
- 1;
702 m
= hi
- (gtHi
- unHi
) + 1;
704 nextLo
[0] = lo
; nextHi
[0] = n
; nextD
[0] = d
;
705 nextLo
[1] = m
; nextHi
[1] = hi
; nextD
[1] = d
;
706 nextLo
[2] = n
+1; nextHi
[2] = m
-1; nextD
[2] = d
+1;
708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
710 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
712 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
713 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
715 mpush (nextLo
[0], nextHi
[0], nextD
[0]);
716 mpush (nextLo
[1], nextHi
[1], nextD
[1]);
717 mpush (nextLo
[2], nextHi
[2], nextD
[2]);
728 #undef MAIN_QSORT_SMALL_THRESH
729 #undef MAIN_QSORT_DEPTH_THRESH
730 #undef MAIN_QSORT_STACK_SIZE
733 /*---------------------------------------------*/
736 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
737 ((UChar*)block32) [0 .. nblock-1] holds block
738 ptr exists for [0 .. nblock-1]
741 ((UChar*)block32) [0 .. nblock-1] holds block
742 All other areas of block32 destroyed
743 ftab [0 .. 65536 ] destroyed
744 ptr [0 .. nblock-1] holds sorted order
745 if (*budget < 0), sorting was abandoned
748 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
749 #define SETMASK (1 << 21)
750 #define CLEARMASK (~(SETMASK))
753 void mainSort ( UInt32
* ptr
,
761 Int32 i
, j
, k
, ss
, sb
;
762 Int32 runningOrder
[256];
764 Int32 copyStart
[256];
769 if (verb
>= 4) VPrintf0 ( " main sort initialise ...\n" );
771 /*-- set up the 2-byte frequency table --*/
772 for (i
= 65536; i
>= 0; i
--) ftab
[i
] = 0;
776 for (; i
>= 3; i
-= 4) {
778 j
= (j
>> 8) | ( ((UInt16
)block
[i
]) << 8);
781 j
= (j
>> 8) | ( ((UInt16
)block
[i
-1]) << 8);
784 j
= (j
>> 8) | ( ((UInt16
)block
[i
-2]) << 8);
787 j
= (j
>> 8) | ( ((UInt16
)block
[i
-3]) << 8);
790 for (; i
>= 0; i
--) {
792 j
= (j
>> 8) | ( ((UInt16
)block
[i
]) << 8);
796 /*-- (emphasises close relationship of block & quadrant) --*/
797 for (i
= 0; i
< BZ_N_OVERSHOOT
; i
++) {
798 block
[nblock
+i
] = block
[i
];
799 quadrant
[nblock
+i
] = 0;
802 if (verb
>= 4) VPrintf0 ( " bucket sorting ...\n" );
804 /*-- Complete the initial radix sort --*/
805 for (i
= 1; i
<= 65536; i
++) ftab
[i
] += ftab
[i
-1];
809 for (; i
>= 3; i
-= 4) {
810 s
= (s
>> 8) | (block
[i
] << 8);
814 s
= (s
>> 8) | (block
[i
-1] << 8);
818 s
= (s
>> 8) | (block
[i
-2] << 8);
822 s
= (s
>> 8) | (block
[i
-3] << 8);
827 for (; i
>= 0; i
--) {
828 s
= (s
>> 8) | (block
[i
] << 8);
835 Now ftab contains the first loc of every small bucket.
836 Calculate the running order, from smallest to largest
839 for (i
= 0; i
<= 255; i
++) {
847 do h
= 3 * h
+ 1; while (h
<= 256);
850 for (i
= h
; i
<= 255; i
++) {
851 vv
= runningOrder
[i
];
853 while ( BIGFREQ(runningOrder
[j
-h
]) > BIGFREQ(vv
) ) {
854 runningOrder
[j
] = runningOrder
[j
-h
];
856 if (j
<= (h
- 1)) goto zero
;
859 runningOrder
[j
] = vv
;
865 The main sorting loop.
870 for (i
= 0; i
<= 255; i
++) {
873 Process big buckets, starting with the least full.
874 Basically this is a 3-step process in which we call
875 mainQSort3 to sort the small buckets [ss, j], but
876 also make a big effort to avoid the calls if we can.
878 ss
= runningOrder
[i
];
882 Complete the big bucket [ss] by quicksorting
883 any unsorted small buckets [ss, j], for j != ss.
884 Hopefully previous pointer-scanning phases have already
885 completed many of the small buckets [ss, j], so
886 we don't have to sort them at all.
888 for (j
= 0; j
<= 255; j
++) {
891 if ( ! (ftab
[sb
] & SETMASK
) ) {
892 Int32 lo
= ftab
[sb
] & CLEARMASK
;
893 Int32 hi
= (ftab
[sb
+1] & CLEARMASK
) - 1;
896 VPrintf4 ( " qsort [0x%x, 0x%x] "
898 ss
, j
, numQSorted
, hi
- lo
+ 1 );
900 ptr
, block
, quadrant
, nblock
,
901 lo
, hi
, BZ_N_RADIX
, budget
903 numQSorted
+= (hi
- lo
+ 1);
904 if (*budget
< 0) return;
911 AssertH ( !bigDone
[ss
], 1006 );
915 Now scan this big bucket [ss] so as to synthesise the
916 sorted order for small buckets [t, ss] for all t,
917 including, magically, the bucket [ss,ss] too.
918 This will avoid doing Real Work in subsequent Step 1's.
921 for (j
= 0; j
<= 255; j
++) {
922 copyStart
[j
] = ftab
[(j
<< 8) + ss
] & CLEARMASK
;
923 copyEnd
[j
] = (ftab
[(j
<< 8) + ss
+ 1] & CLEARMASK
) - 1;
925 for (j
= ftab
[ss
<< 8] & CLEARMASK
; j
< copyStart
[ss
]; j
++) {
926 k
= ptr
[j
]-1; if (k
< 0) k
+= nblock
;
929 ptr
[ copyStart
[c1
]++ ] = k
;
931 for (j
= (ftab
[(ss
+1) << 8] & CLEARMASK
) - 1; j
> copyEnd
[ss
]; j
--) {
932 k
= ptr
[j
]-1; if (k
< 0) k
+= nblock
;
935 ptr
[ copyEnd
[c1
]-- ] = k
;
939 AssertH ( (copyStart
[ss
]-1 == copyEnd
[ss
])
941 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
942 Necessity for this case is demonstrated by compressing
943 a sequence of approximately 48.5 million of character
944 251; 1.0.0/1.0.1 will then die here. */
945 (copyStart
[ss
] == 0 && copyEnd
[ss
] == nblock
-1),
948 for (j
= 0; j
<= 255; j
++) ftab
[(j
<< 8) + ss
] |= SETMASK
;
952 The [ss] big bucket is now done. Record this fact,
953 and update the quadrant descriptors. Remember to
954 update quadrants in the overshoot area too, if
955 necessary. The "if (i < 255)" test merely skips
956 this updating for the last bucket processed, since
957 updating for the last bucket is pointless.
959 The quadrant array provides a way to incrementally
960 cache sort orderings, as they appear, so as to
961 make subsequent comparisons in fullGtU() complete
962 faster. For repetitive blocks this makes a big
963 difference (but not big enough to be able to avoid
964 the fallback sorting mechanism, exponential radix sort).
966 The precise meaning is: at all times:
968 for 0 <= i < nblock and 0 <= j <= nblock
970 if block[i] != block[j],
972 then the relative values of quadrant[i] and
973 quadrant[j] are meaningless.
976 if quadrant[i] < quadrant[j]
977 then the string starting at i lexicographically
978 precedes the string starting at j
980 else if quadrant[i] > quadrant[j]
981 then the string starting at j lexicographically
982 precedes the string starting at i
985 the relative ordering of the strings starting
986 at i and j has not yet been determined.
992 Int32 bbStart
= ftab
[ss
<< 8] & CLEARMASK
;
993 Int32 bbSize
= (ftab
[(ss
+1) << 8] & CLEARMASK
) - bbStart
;
996 while ((bbSize
>> shifts
) > 65534) shifts
++;
998 for (j
= bbSize
-1; j
>= 0; j
--) {
999 Int32 a2update
= ptr
[bbStart
+ j
];
1000 UInt16 qVal
= (UInt16
)(j
>> shifts
);
1001 quadrant
[a2update
] = qVal
;
1002 if (a2update
< BZ_N_OVERSHOOT
)
1003 quadrant
[a2update
+ nblock
] = qVal
;
1005 AssertH ( ((bbSize
-1) >> shifts
) <= 65535, 1002 );
1011 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1012 nblock
, numQSorted
, nblock
- numQSorted
);
1020 /*---------------------------------------------*/
1023 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1024 ((UChar*)arr2) [0 .. nblock-1] holds block
1025 arr1 exists for [0 .. nblock-1]
1028 ((UChar*)arr2) [0 .. nblock-1] holds block
1029 All other areas of block destroyed
1030 ftab [ 0 .. 65536 ] destroyed
1031 arr1 [0 .. nblock-1] holds sorted order
1033 void BZ2_blockSort ( EState
* s
)
1035 UInt32
* ptr
= s
->ptr
;
1036 UChar
* block
= s
->block
;
1037 UInt32
* ftab
= s
->ftab
;
1038 Int32 nblock
= s
->nblock
;
1039 Int32 verb
= s
->verbosity
;
1040 Int32 wfact
= s
->workFactor
;
1046 if (nblock
< 10000) {
1047 fallbackSort ( s
->arr1
, s
->arr2
, ftab
, nblock
, verb
);
1049 /* Calculate the location for quadrant, remembering to get
1050 the alignment right. Assumes that &(block[0]) is at least
1051 2-byte aligned -- this should be ok since block is really
1052 the first section of arr2.
1054 i
= nblock
+BZ_N_OVERSHOOT
;
1056 quadrant
= (UInt16
*)(&(block
[i
]));
1058 /* (wfact-1) / 3 puts the default-factor-30
1059 transition point at very roughly the same place as
1060 with v0.1 and v0.9.0.
1061 Not that it particularly matters any more, since the
1062 resulting compressed stream is now the same regardless
1063 of whether or not we use the main sort or fallback sort.
1065 if (wfact
< 1 ) wfact
= 1;
1066 if (wfact
> 100) wfact
= 100;
1067 budgetInit
= nblock
* ((wfact
-1) / 3);
1068 budget
= budgetInit
;
1070 mainSort ( ptr
, block
, quadrant
, ftab
, nblock
, verb
, &budget
);
1072 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1073 budgetInit
- budget
,
1075 (float)(budgetInit
- budget
) /
1076 (float)(nblock
==0 ? 1 : nblock
) );
1079 VPrintf0 ( " too repetitive; using fallback"
1080 " sorting algorithm\n" );
1081 fallbackSort ( s
->arr1
, s
->arr2
, ftab
, nblock
, verb
);
1086 for (i
= 0; i
< s
->nblock
; i
++)
1088 { s
->origPtr
= i
; break; };
1090 AssertH( s
->origPtr
!= -1, 1003 );
1094 /*-------------------------------------------------------------*/
1095 /*--- end blocksort.c ---*/
1096 /*-------------------------------------------------------------*/