1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
24 * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
25 * Additional work by David Walters
27 * OPCODE - Optimized Collision Detection
28 * Copyright (C) 2001 Pierre Terdiman
29 * Homepage: http://www.codercorner.com/Opcode.htm
31 * This version does complete radix sort, not "classical" SAP. So, we
32 * have no temporal coherence, but are able to handle any movement
33 * velocities equally well.
36 #include <ode/common.h>
37 #include <ode/collision_space.h>
38 #include <ode/collision.h>
42 #include "collision_kernel.h"
43 #include "collision_space_internal.h"
45 // Reference counting helper for radix sort global data.
46 //static void RadixSortRef();
47 //static void RadixSortDeref();
50 // --------------------------------------------------------------------------
52 // --------------------------------------------------------------------------
54 struct RaixSortContext
57 RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL
), mPrimaryRanks(NULL
) {}
58 ~RaixSortContext() { FreeRanks(); }
60 // OPCODE's Radix Sorting, returns a list of indices in sorted order
61 const uint32
* RadixSort( const float* input2
, uint32 nb
);
65 void AllocateRanks(sizeint nNewSize
);
67 void ReallocateRanksIfNecessary(sizeint nNewSize
);
70 void SetCurrentSize(sizeint nValue
) { mCurrentSize
= nValue
; }
71 sizeint
GetCurrentSize() const { return mCurrentSize
; }
73 void SetCurrentUtilization(sizeint nValue
) { mCurrentUtilization
= nValue
; }
74 sizeint
GetCurrentUtilization() const { return mCurrentUtilization
; }
76 uint32
*GetRanks1() const { return mPrimaryRanks
; }
77 uint32
*GetRanks2() const { return mRanksBuffer
+ ((mRanksBuffer
+ mCurrentSize
) - mPrimaryRanks
); }
78 void SwapRanks() { mPrimaryRanks
= GetRanks2(); }
80 bool AreRanksValid() const { return mRanksValid
; }
81 void InvalidateRanks() { mRanksValid
= false; }
82 void ValidateRanks() { mRanksValid
= true; }
85 sizeint mCurrentSize
; //!< Current size of the indices list
86 sizeint mCurrentUtilization
; //!< Current utilization of the indices list
88 uint32
* mRanksBuffer
; //!< Two lists allocated sequentially in a single block
89 uint32
* mPrimaryRanks
;
92 void RaixSortContext::AllocateRanks(sizeint nNewSize
)
94 dIASSERT(GetCurrentSize() == 0);
96 mRanksBuffer
= new uint32
[2 * nNewSize
];
97 mPrimaryRanks
= mRanksBuffer
;
99 SetCurrentSize(nNewSize
);
102 void RaixSortContext::FreeRanks()
106 delete[] mRanksBuffer
;
109 void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize
)
111 sizeint nCurUtilization
= GetCurrentUtilization();
113 if (nNewSize
!= nCurUtilization
)
115 sizeint nCurSize
= GetCurrentSize();
117 if ( nNewSize
> nCurSize
)
119 // Free previously used ram
122 // Get some fresh one
123 AllocateRanks(nNewSize
);
127 SetCurrentUtilization(nNewSize
);
131 // --------------------------------------------------------------------------
133 // --------------------------------------------------------------------------
135 struct dxSAPSpace
: public dxSpace
137 // Constructor / Destructor
138 dxSAPSpace( dSpaceID _space
, int sortaxis
);
142 virtual dxGeom
* getGeom(int i
);
143 virtual void add(dxGeom
* g
);
144 virtual void remove(dxGeom
* g
);
145 virtual void dirty(dxGeom
* g
);
146 virtual void computeAABB();
147 virtual void cleanGeoms();
148 virtual void collide( void *data
, dNearCallback
*callback
);
149 virtual void collide2( void *data
, dxGeom
*geom
, dNearCallback
*callback
);
153 //--------------------------------------------------------------------------
154 // Local Declarations
155 //--------------------------------------------------------------------------
157 //! A generic couple structure
160 uint32 id0
; //!< First index of the pair
161 uint32 id1
; //!< Second index of the pair
163 // Default and Value Constructor
165 Pair( uint32 i0
, uint32 i1
) : id0( i0
), id1( i1
) {}
168 //--------------------------------------------------------------------------
170 //--------------------------------------------------------------------------
173 * Complete box pruning.
174 * Returns a list of overlapping pairs of boxes, each box of the pair
175 * belongs to the same set.
177 * @param count [in] number of boxes.
178 * @param geoms [in] geoms of boxes.
179 * @param pairs [out] array of overlapping pairs.
181 void BoxPruning( int count
, const dxGeom
** geoms
, dArray
< Pair
>& pairs
);
184 //--------------------------------------------------------------------------
185 // Implementation Data
186 //--------------------------------------------------------------------------
188 // We have two lists (arrays of pointers) to dirty and clean
189 // geoms. Each geom knows it's index into the corresponding list
190 // (see macros above).
191 dArray
<dxGeom
*> DirtyList
; // dirty geoms
192 dArray
<dxGeom
*> GeomList
; // clean geoms
194 // For SAP, we ultimately separate "normal" geoms and the ones that have
195 // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
196 // infinite geoms anyway).
197 dArray
<dxGeom
*> TmpGeomList
; // temporary for normal geoms
198 dArray
<dxGeom
*> TmpInfGeomList
; // temporary for geoms with infinite AABBs
200 // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
201 // Axis indices into geom's aabb are: min=idx, max=idx+1
206 // pruning position array scratch pad
207 // NOTE: this is float not dReal because of the OPCODE radix sorter
208 dArray
< float > poslist
;
209 RaixSortContext sortContext
;
213 dSpaceID
dSweepAndPruneSpaceCreate( dxSpace
* space
, int axisorder
) {
214 return new dxSAPSpace( space
, axisorder
);
218 //==============================================================================
220 #define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
222 // HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists.
223 #define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); }
224 #define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); }
225 #define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex)
226 #define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex)
227 #define GEOM_INVALID_IDX (-1)
231 * A bit of repetitive work - similar to collideAABBs, but doesn't check
232 * if AABBs intersect (because SAP returns pairs with overlapping AABBs).
234 static void collideGeomsNoAABBs( dxGeom
*g1
, dxGeom
*g2
, void *data
, dNearCallback
*callback
)
236 dIASSERT( (g1
->gflags
& GEOM_AABB_BAD
)==0 );
237 dIASSERT( (g2
->gflags
& GEOM_AABB_BAD
)==0 );
239 // no contacts if both geoms on the same body, and the body is not 0
240 if (g1
->body
== g2
->body
&& g1
->body
) return;
242 // test if the category and collide bitfields match
243 if ( ((g1
->category_bits
& g2
->collide_bits
) ||
244 (g2
->category_bits
& g1
->collide_bits
)) == 0) {
248 dReal
*bounds1
= g1
->aabb
;
249 dReal
*bounds2
= g2
->aabb
;
251 // check if either object is able to prove that it doesn't intersect the
253 if (g1
->AABBTest (g2
,bounds2
) == 0) return;
254 if (g2
->AABBTest (g1
,bounds1
) == 0) return;
256 // the objects might actually intersect - call the space callback function
257 callback (data
,g1
,g2
);
261 dxSAPSpace::dxSAPSpace( dSpaceID _space
, int axisorder
) : dxSpace( _space
)
263 type
= dSweepAndPruneSpaceClass
;
265 // Init AABB to infinity
266 aabb
[0] = -dInfinity
;
268 aabb
[2] = -dInfinity
;
270 aabb
[4] = -dInfinity
;
273 ax0idx
= ( ( axisorder
) & 3 ) << 1;
274 ax1idx
= ( ( axisorder
>> 2 ) & 3 ) << 1;
275 ax2idx
= ( ( axisorder
>> 4 ) & 3 ) << 1;
278 dxSAPSpace::~dxSAPSpace()
280 CHECK_NOT_LOCKED(this);
282 // note that destroying each geom will call remove()
283 for ( ; DirtyList
.size(); dGeomDestroy( DirtyList
[ 0 ] ) ) {}
284 for ( ; GeomList
.size(); dGeomDestroy( GeomList
[ 0 ] ) ) {}
288 for ( ; DirtyList
.size(); remove( DirtyList
[ 0 ] ) ) {}
289 for ( ; GeomList
.size(); remove( GeomList
[ 0 ] ) ) {}
293 dxGeom
* dxSAPSpace::getGeom( int i
)
295 dUASSERT( i
>= 0 && i
< count
, "index out of range" );
296 int dirtySize
= DirtyList
.size();
300 return GeomList
[i
-dirtySize
];
303 void dxSAPSpace::add( dxGeom
* g
)
305 CHECK_NOT_LOCKED (this);
307 dUASSERT(g
->tome_ex
== 0 && g
->next_ex
== 0, "geom is already in a space");
311 GEOM_SET_DIRTY_IDX( g
, DirtyList
.size() );
312 GEOM_SET_GEOM_IDX( g
, GEOM_INVALID_IDX
);
318 void dxSAPSpace::remove( dxGeom
* g
)
320 CHECK_NOT_LOCKED(this);
322 dUASSERT(g
->parent_space
== this,"object is not in this space");
325 int dirtyIdx
= GEOM_GET_DIRTY_IDX(g
);
326 int geomIdx
= GEOM_GET_GEOM_IDX(g
);
327 // must be in one list, not in both
329 (dirtyIdx
==GEOM_INVALID_IDX
&& geomIdx
>=0 && geomIdx
<GeomList
.size()) ||
330 (geomIdx
==GEOM_INVALID_IDX
&& dirtyIdx
>=0 && dirtyIdx
<DirtyList
.size()),
331 "geom indices messed up" );
332 if( dirtyIdx
!= GEOM_INVALID_IDX
) {
333 // we're in dirty list, remove
334 int dirtySize
= DirtyList
.size();
335 if (dirtyIdx
!= dirtySize
-1) {
336 dxGeom
* lastG
= DirtyList
[dirtySize
-1];
337 DirtyList
[dirtyIdx
] = lastG
;
338 GEOM_SET_DIRTY_IDX(lastG
,dirtyIdx
);
340 GEOM_SET_DIRTY_IDX(g
,GEOM_INVALID_IDX
);
341 DirtyList
.setSize( dirtySize
-1 );
343 // we're in geom list, remove
344 int geomSize
= GeomList
.size();
345 if (geomIdx
!= geomSize
-1) {
346 dxGeom
* lastG
= GeomList
[geomSize
-1];
347 GeomList
[geomIdx
] = lastG
;
348 GEOM_SET_GEOM_IDX(lastG
,geomIdx
);
350 GEOM_SET_GEOM_IDX(g
,GEOM_INVALID_IDX
);
351 GeomList
.setSize( geomSize
-1 );
357 void dxSAPSpace::dirty( dxGeom
* g
)
360 dUASSERT(g
->parent_space
== this, "object is not in this space");
362 // check if already dirtied
363 int dirtyIdx
= GEOM_GET_DIRTY_IDX(g
);
364 if( dirtyIdx
!= GEOM_INVALID_IDX
)
367 int geomIdx
= GEOM_GET_GEOM_IDX(g
);
368 dUASSERT( geomIdx
>=0 && geomIdx
<GeomList
.size(), "geom indices messed up" );
370 // remove from geom list, place last in place of this
371 int geomSize
= GeomList
.size();
372 if (geomIdx
!= geomSize
-1) {
373 dxGeom
* lastG
= GeomList
[geomSize
-1];
374 GeomList
[geomIdx
] = lastG
;
375 GEOM_SET_GEOM_IDX(lastG
,geomIdx
);
377 GeomList
.setSize( geomSize
-1 );
380 GEOM_SET_GEOM_IDX( g
, GEOM_INVALID_IDX
);
381 GEOM_SET_DIRTY_IDX( g
, DirtyList
.size() );
385 void dxSAPSpace::computeAABB()
390 void dxSAPSpace::cleanGeoms()
392 int dirtySize
= DirtyList
.size();
396 // compute the AABBs of all dirty geoms, clear the dirty flags,
397 // remove from dirty list, place into geom list
400 int geomSize
= GeomList
.size();
401 GeomList
.setSize( geomSize
+ dirtySize
); // ensure space in geom list
403 for( int i
= 0; i
< dirtySize
; ++i
) {
404 dxGeom
* g
= DirtyList
[i
];
406 ((dxSpace
*)g
)->cleanGeoms();
410 dIASSERT((g
->gflags
& GEOM_AABB_BAD
) == 0);
412 g
->gflags
&= ~GEOM_DIRTY
;
414 // remove from dirty list, add to geom list
415 GEOM_SET_DIRTY_IDX( g
, GEOM_INVALID_IDX
);
416 GEOM_SET_GEOM_IDX( g
, geomSize
+ i
);
417 GeomList
[geomSize
+i
] = g
;
420 DirtyList
.setSize( 0 );
425 void dxSAPSpace::collide( void *data
, dNearCallback
*callback
)
433 // by now all geoms are in GeomList, and DirtyList must be empty
434 int geom_count
= GeomList
.size();
435 dUASSERT( geom_count
== count
, "geom counts messed up" );
437 // separate all ENABLED geoms into infinite AABBs and normal AABBs
438 TmpGeomList
.setSize(0);
439 TmpInfGeomList
.setSize(0);
440 int axis0max
= ax0idx
+ 1;
441 for( int i
= 0; i
< geom_count
; ++i
) {
442 dxGeom
* g
= GeomList
[i
];
443 if( !GEOM_ENABLED(g
) ) // skip disabled ones
445 const dReal
& amax
= g
->aabb
[axis0max
];
446 if( amax
== dInfinity
) // HACK? probably not...
447 TmpInfGeomList
.push( g
);
449 TmpGeomList
.push( g
);
452 // do SAP on normal AABBs
453 dArray
< Pair
> overlapBoxes
;
454 int tmp_geom_count
= TmpGeomList
.size();
455 if ( tmp_geom_count
> 0 )
457 // Generate a list of overlapping boxes
458 BoxPruning( tmp_geom_count
, (const dxGeom
**)TmpGeomList
.data(), overlapBoxes
);
461 // collide overlapping
462 int overlapCount
= overlapBoxes
.size();
463 for( int j
= 0; j
< overlapCount
; ++j
)
465 const Pair
& pair
= overlapBoxes
[ j
];
466 dxGeom
* g1
= TmpGeomList
[ pair
.id0
];
467 dxGeom
* g2
= TmpGeomList
[ pair
.id1
];
468 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
471 int infSize
= TmpInfGeomList
.size();
472 int normSize
= TmpGeomList
.size();
475 for ( m
= 0; m
< infSize
; ++m
)
477 dxGeom
* g1
= TmpInfGeomList
[ m
];
479 // collide infinite ones
480 for( n
= m
+1; n
< infSize
; ++n
) {
481 dxGeom
* g2
= TmpInfGeomList
[n
];
482 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
485 // collide infinite ones with normal ones
486 for( n
= 0; n
< normSize
; ++n
) {
487 dxGeom
* g2
= TmpGeomList
[n
];
488 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
495 void dxSAPSpace::collide2( void *data
, dxGeom
*geom
, dNearCallback
*callback
)
497 dAASSERT (geom
&& callback
);
499 // TODO: This is just a simple N^2 implementation
504 geom
->recomputeAABB();
506 // intersect bounding boxes
507 int geom_count
= GeomList
.size();
508 for ( int i
= 0; i
< geom_count
; ++i
) {
509 dxGeom
* g
= GeomList
[i
];
510 if ( GEOM_ENABLED(g
) )
511 collideAABBs (g
,geom
,data
,callback
);
518 void dxSAPSpace::BoxPruning( int count
, const dxGeom
** geoms
, dArray
< Pair
>& pairs
)
520 // Size the poslist (+1 for infinity end cap)
521 poslist
.setSize( count
);
523 // 1) Build main list using the primary axis
524 // NOTE: uses floats instead of dReals because that's what radix sort wants
525 for( int i
= 0; i
< count
; ++i
)
526 poslist
[ i
] = (float)TmpGeomList
[i
]->aabb
[ ax0idx
];
529 const uint32
* Sorted
= sortContext
.RadixSort( poslist
.data(), count
);
532 const uint32
* const LastSorted
= Sorted
+ count
;
533 const uint32
* RunningAddress
= Sorted
;
537 while ( Sorted
< LastSorted
)
539 IndexPair
.id0
= *Sorted
++;
541 // empty, this loop just advances RunningAddress
542 for (bExitLoop
= false; poslist
[*RunningAddress
++] < poslist
[IndexPair
.id0
]; )
544 if (RunningAddress
== LastSorted
)
551 if ( bExitLoop
|| RunningAddress
== LastSorted
) // Not a bug!!!
556 const float idx0ax0max
= (float)geoms
[IndexPair
.id0
]->aabb
[ax0idx
+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning
557 const dReal idx0ax1max
= geoms
[IndexPair
.id0
]->aabb
[ax1idx
+1];
558 const dReal idx0ax2max
= geoms
[IndexPair
.id0
]->aabb
[ax2idx
+1];
560 for (const uint32
* RunningAddress2
= RunningAddress
; poslist
[ IndexPair
.id1
= *RunningAddress2
++ ] <= idx0ax0max
; )
562 const dReal
* aabb0
= geoms
[ IndexPair
.id0
]->aabb
;
563 const dReal
* aabb1
= geoms
[ IndexPair
.id1
]->aabb
;
566 if ( idx0ax1max
>= aabb1
[ax1idx
] && aabb1
[ax1idx
+1] >= aabb0
[ax1idx
]
567 && idx0ax2max
>= aabb1
[ax2idx
] && aabb1
[ax2idx
+1] >= aabb0
[ax2idx
] )
569 pairs
.push( IndexPair
);
572 if (RunningAddress2
== LastSorted
)
578 } // while ( RunningAddress < LastSorted && Sorted < LastSorted )
582 //==============================================================================
584 //------------------------------------------------------------------------------
586 //------------------------------------------------------------------------------
590 #define CHECK_PASS_VALIDITY(pass) \
591 /* Shortcut to current counters */ \
592 const uint32* CurCount = &mHistogram[pass<<8]; \
594 /* Reset flag. The sorting pass is supposed to be performed. (default) */ \
595 bool PerformPass = true; \
597 /* Check pass validity */ \
599 /* If all values have the same byte, sorting is useless. */ \
600 /* It may happen when sorting bytes or words instead of dwords. */ \
601 /* This routine actually sorts words faster than dwords, and bytes */ \
602 /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
603 /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
605 /* Get first byte */ \
606 uint8 UniqueVal = *(((const uint8*)input)+pass); \
608 /* Check that byte's counter */ \
609 if(CurCount[UniqueVal]==nb) PerformPass=false;
611 // WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
612 const uint32
* RaixSortContext::RadixSort( const float* input2
, uint32 nb
)
614 union _type_cast_union
616 _type_cast_union(const float *floats
): asFloats(floats
) {}
617 _type_cast_union(const uint32
*uints32
): asUInts32(uints32
) {}
619 const float *asFloats
;
620 const uint32
*asUInts32
;
621 const uint8
*asUInts8
;
624 const uint32
* input
= _type_cast_union(input2
).asUInts32
;
626 // Resize lists if needed
627 ReallocateRanksIfNecessary(nb
);
629 // Allocate histograms & offsets on the stack
630 uint32 mHistogram
[256*4];
633 // Create histograms (counters). Counters for all passes are created in one run.
634 // Pros: read input buffer once instead of four times
635 // Cons: mHistogram is 4Kb instead of 1Kb
636 // Floating-point values are always supposed to be signed values, so there's only one code path there.
637 // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
638 // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
639 // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
640 // wouldn't work with mixed positive/negative values....
642 /* Clear counters/histograms */
643 memset(mHistogram
, 0, 256*4*sizeof(uint32
));
645 /* Prepare to count */
646 const uint8
* p
= _type_cast_union(input
).asUInts8
;
647 const uint8
* pe
= &p
[nb
*4];
648 uint32
* h0
= &mHistogram
[0]; /* Histogram for first pass (LSB) */
649 uint32
* h1
= &mHistogram
[256]; /* Histogram for second pass */
650 uint32
* h2
= &mHistogram
[512]; /* Histogram for third pass */
651 uint32
* h3
= &mHistogram
[768]; /* Histogram for last pass (MSB) */
653 bool AlreadySorted
= true; /* Optimism... */
655 if (!AreRanksValid())
657 /* Prepare for temporal coherence */
658 const float* Running
= input2
;
659 float PrevVal
= *Running
;
663 /* Read input input2 in previous sorted order */
664 float Val
= *Running
++;
665 /* Check whether already sorted or not */
666 if(Val
<PrevVal
) { AlreadySorted
= false; break; } /* Early out */
667 /* Update for next iteration */
670 /* Create histograms */
671 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
674 /* If all input values are already sorted, we just have to return and leave the */
675 /* previous list unchanged. That way the routine may take advantage of temporal */
676 /* coherence, for example when used to sort transparent faces. */
679 uint32
* const Ranks1
= GetRanks1();
680 for(uint32 i
=0;i
<nb
;i
++) Ranks1
[i
] = i
;
686 /* Prepare for temporal coherence */
687 uint32
* const Ranks1
= GetRanks1();
689 uint32
* Indices
= Ranks1
;
690 float PrevVal
= input2
[*Indices
];
694 /* Read input input2 in previous sorted order */
695 float Val
= input2
[*Indices
++];
696 /* Check whether already sorted or not */
697 if(Val
<PrevVal
) { AlreadySorted
= false; break; } /* Early out */
698 /* Update for next iteration */
701 /* Create histograms */
702 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
705 /* If all input values are already sorted, we just have to return and leave the */
706 /* previous list unchanged. That way the routine may take advantage of temporal */
707 /* coherence, for example when used to sort transparent faces. */
708 if(AlreadySorted
) { return Ranks1
; }
711 /* Else there has been an early out and we must finish computing the histograms */
714 /* Create histograms without the previous overhead */
715 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
719 // Compute #negative values involved if needed
720 uint32 NbNegativeValues
= 0;
722 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
723 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
724 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
725 uint32
* h3
= &mHistogram
[768];
726 for(uint32 i
=128;i
<256;i
++) NbNegativeValues
+= h3
[i
]; // 768 for last histogram, 128 for negative part
728 // Radix sort, j is the pass number (0=LSB, 3=MSB)
729 for(uint32 j
=0;j
<4;j
++)
731 // Should we care about negative values?
734 // Here we deal with positive values only
735 CHECK_PASS_VALIDITY(j
);
739 uint32
* const Ranks2
= GetRanks2();
742 for(uint32 i
=1;i
<256;i
++) mLink
[i
] = mLink
[i
-1] + CurCount
[i
-1];
744 // Perform Radix Sort
745 const uint8
* InputBytes
= _type_cast_union(input
).asUInts8
;
747 if (!AreRanksValid())
749 for(uint32 i
=0;i
<nb
;i
++)
751 *mLink
[InputBytes
[i
<<2]]++ = i
;
758 uint32
* const Ranks1
= GetRanks1();
760 uint32
* Indices
= Ranks1
;
761 uint32
* const IndicesEnd
= Ranks1
+ nb
;
762 while(Indices
!=IndicesEnd
)
764 uint32 id
= *Indices
++;
765 *mLink
[InputBytes
[id
<<2]]++ = id
;
769 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
775 // This is a special case to correctly handle negative values
776 CHECK_PASS_VALIDITY(j
);
780 uint32
* const Ranks2
= GetRanks2();
782 // Create biased offsets, in order for negative numbers to be sorted as well
783 mLink
[0] = Ranks2
+ NbNegativeValues
; // First positive number takes place after the negative ones
784 for(uint32 i
=1;i
<128;i
++) mLink
[i
] = mLink
[i
-1] + CurCount
[i
-1]; // 1 to 128 for positive numbers
786 // We must reverse the sorting order for negative numbers!
788 for(uint32 i
=0;i
<127;i
++) mLink
[254-i
] = mLink
[255-i
] + CurCount
[255-i
]; // Fixing the wrong order for negative values
789 for(uint32 i
=128;i
<256;i
++) mLink
[i
] += CurCount
[i
]; // Fixing the wrong place for negative values
791 // Perform Radix Sort
792 if (!AreRanksValid())
794 for(uint32 i
=0;i
<nb
;i
++)
796 uint32 Radix
= input
[i
]>>24; // Radix byte, same as above. AND is useless here (uint32).
797 // ### cmp to be killed. Not good. Later.
798 if(Radix
<128) *mLink
[Radix
]++ = i
; // Number is positive, same as above
799 else *(--mLink
[Radix
]) = i
; // Number is negative, flip the sorting order
806 uint32
* const Ranks1
= GetRanks1();
808 for(uint32 i
=0;i
<nb
;i
++)
810 uint32 Radix
= input
[Ranks1
[i
]]>>24; // Radix byte, same as above. AND is useless here (uint32).
811 // ### cmp to be killed. Not good. Later.
812 if(Radix
<128) *mLink
[Radix
]++ = Ranks1
[i
]; // Number is positive, same as above
813 else *(--mLink
[Radix
]) = Ranks1
[i
]; // Number is negative, flip the sorting order
816 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
821 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
824 if (!AreRanksValid())
826 uint32
* const Ranks2
= GetRanks2();
828 for(uint32 i
=0;i
<nb
;i
++)
837 uint32
* const Ranks1
= GetRanks1();
838 uint32
* const Ranks2
= GetRanks2();
839 for(uint32 i
=0;i
<nb
;i
++) Ranks2
[i
] = Ranks1
[nb
-i
-1];
842 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
850 uint32
* const Ranks1
= GetRanks1();