1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
24 * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
25 * Additional work by David Walters
27 * OPCODE - Optimized Collision Detection
28 * Copyright (C) 2001 Pierre Terdiman
29 * Homepage: http://www.codercorner.com/Opcode.htm
31 * This version does complete radix sort, not "classical" SAP. So, we
32 * have no temporal coherence, but are able to handle any movement
33 * velocities equally well.
36 #include <ode/common.h>
37 #include <ode/collision_space.h>
38 #include <ode/collision.h>
42 #include "collision_kernel.h"
43 #include "collision_space_internal.h"
45 // Reference counting helper for radix sort global data.
46 //static void RadixSortRef();
47 //static void RadixSortDeref();
50 // --------------------------------------------------------------------------
52 // --------------------------------------------------------------------------
54 struct RaixSortContext
57 RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL
), mPrimaryRanks(NULL
) {}
58 ~RaixSortContext() { FreeRanks(); }
60 // OPCODE's Radix Sorting, returns a list of indices in sorted order
61 const uint32
* RadixSort( const float* input2
, uint32 nb
);
65 void AllocateRanks(sizeint nNewSize
);
67 void ReallocateRanksIfNecessary(sizeint nNewSize
);
70 void SetCurrentSize(sizeint nValue
) { mCurrentSize
= nValue
; }
71 sizeint
GetCurrentSize() const { return mCurrentSize
; }
73 void SetCurrentUtilization(sizeint nValue
) { mCurrentUtilization
= nValue
; }
74 sizeint
GetCurrentUtilization() const { return mCurrentUtilization
; }
76 uint32
*GetRanks1() const { return mPrimaryRanks
; }
77 uint32
*GetRanks2() const { return mRanksBuffer
+ ((mRanksBuffer
+ mCurrentSize
) - mPrimaryRanks
); }
78 void SwapRanks() { mPrimaryRanks
= GetRanks2(); }
80 bool AreRanksValid() const { return mRanksValid
; }
81 void InvalidateRanks() { mRanksValid
= false; }
82 void ValidateRanks() { mRanksValid
= true; }
85 sizeint mCurrentSize
; //!< Current size of the indices list
86 sizeint mCurrentUtilization
; //!< Current utilization of the indices list
88 uint32
* mRanksBuffer
; //!< Two lists allocated sequentially in a single block
89 uint32
* mPrimaryRanks
;
92 void RaixSortContext::AllocateRanks(sizeint nNewSize
)
94 dIASSERT(GetCurrentSize() == 0);
96 mRanksBuffer
= new uint32
[2 * nNewSize
];
97 mPrimaryRanks
= mRanksBuffer
;
99 SetCurrentSize(nNewSize
);
102 void RaixSortContext::FreeRanks()
106 delete[] mRanksBuffer
;
109 void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize
)
111 sizeint nCurUtilization
= GetCurrentUtilization();
113 if (nNewSize
!= nCurUtilization
)
115 sizeint nCurSize
= GetCurrentSize();
117 if ( nNewSize
> nCurSize
)
119 // Free previously used ram
122 // Get some fresh one
123 AllocateRanks(nNewSize
);
127 SetCurrentUtilization(nNewSize
);
131 // --------------------------------------------------------------------------
133 // --------------------------------------------------------------------------
135 struct dxSAPSpace
: public dxSpace
137 // Constructor / Destructor
138 dxSAPSpace( dSpaceID _space
, int sortaxis
);
142 virtual dxGeom
* getGeom(int i
);
143 virtual void add(dxGeom
* g
);
144 virtual void remove(dxGeom
* g
);
145 virtual void dirty(dxGeom
* g
);
146 virtual void computeAABB();
147 virtual void cleanGeoms();
148 virtual void collide( void *data
, dNearCallback
*callback
);
149 virtual void collide2( void *data
, dxGeom
*geom
, dNearCallback
*callback
);
153 //--------------------------------------------------------------------------
154 // Local Declarations
155 //--------------------------------------------------------------------------
157 //! A generic couple structure
160 uint32 id0
; //!< First index of the pair
161 uint32 id1
; //!< Second index of the pair
163 // Default and Value Constructor
165 Pair( uint32 i0
, uint32 i1
) : id0( i0
), id1( i1
) {}
168 //--------------------------------------------------------------------------
170 //--------------------------------------------------------------------------
173 * Complete box pruning.
174 * Returns a list of overlapping pairs of boxes, each box of the pair
175 * belongs to the same set.
177 * @param count [in] number of boxes.
178 * @param geoms [in] geoms of boxes.
179 * @param pairs [out] array of overlapping pairs.
181 void BoxPruning( int count
, const dxGeom
** geoms
, dArray
< Pair
>& pairs
);
184 //--------------------------------------------------------------------------
185 // Implementation Data
186 //--------------------------------------------------------------------------
188 // We have two lists (arrays of pointers) to dirty and clean
189 // geoms. Each geom knows it's index into the corresponding list
190 // (see macros above).
191 dArray
<dxGeom
*> DirtyList
; // dirty geoms
192 dArray
<dxGeom
*> GeomList
; // clean geoms
194 // For SAP, we ultimately separate "normal" geoms and the ones that have
195 // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
196 // infinite geoms anyway).
197 dArray
<dxGeom
*> TmpGeomList
; // temporary for normal geoms
198 dArray
<dxGeom
*> TmpInfGeomList
; // temporary for geoms with infinite AABBs
200 // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
201 // Axis indices into geom's aabb are: min=idx, max=idx+1
206 // pruning position array scratch pad
207 // NOTE: this is float not dReal because of the OPCODE radix sorter
208 dArray
< float > poslist
;
209 RaixSortContext sortContext
;
213 dSpaceID
dSweepAndPruneSpaceCreate( dxSpace
* space
, int axisorder
) {
214 return new dxSAPSpace( space
, axisorder
);
218 //==============================================================================
220 #define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
222 // HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists.
223 #define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); }
224 #define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); }
225 #define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex)
226 #define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex)
227 #define GEOM_INVALID_IDX (-1)
231 * A bit of repetitive work - similar to collideAABBs, but doesn't check
232 * if AABBs intersect (because SAP returns pairs with overlapping AABBs).
234 static void collideGeomsNoAABBs( dxGeom
*g1
, dxGeom
*g2
, void *data
, dNearCallback
*callback
)
236 dIASSERT( (g1
->gflags
& GEOM_AABB_BAD
)==0 );
237 dIASSERT( (g2
->gflags
& GEOM_AABB_BAD
)==0 );
239 // no contacts if both geoms on the same body, and the body is not 0
240 if (g1
->body
== g2
->body
&& g1
->body
) return;
242 // test if the category and collide bitfields match
243 if ( ((g1
->category_bits
& g2
->collide_bits
) ||
244 (g2
->category_bits
& g1
->collide_bits
)) == 0) {
248 dReal
*bounds1
= g1
->aabb
;
249 dReal
*bounds2
= g2
->aabb
;
251 // check if either object is able to prove that it doesn't intersect the
253 if (g1
->AABBTest (g2
,bounds2
) == 0) return;
254 if (g2
->AABBTest (g1
,bounds1
) == 0) return;
256 // the objects might actually intersect - call the space callback function
257 callback (data
,g1
,g2
);
261 dxSAPSpace::dxSAPSpace( dSpaceID _space
, int axisorder
) : dxSpace( _space
)
263 type
= dSweepAndPruneSpaceClass
;
265 // Init AABB to infinity
266 aabb
[0] = -dInfinity
;
268 aabb
[2] = -dInfinity
;
270 aabb
[4] = -dInfinity
;
273 ax0idx
= ( ( axisorder
) & 3 ) << 1;
274 ax1idx
= ( ( axisorder
>> 2 ) & 3 ) << 1;
275 ax2idx
= ( ( axisorder
>> 4 ) & 3 ) << 1;
278 dxSAPSpace::~dxSAPSpace()
280 CHECK_NOT_LOCKED(this);
282 // note that destroying each geom will call remove()
283 for ( ; DirtyList
.size(); dGeomDestroy( DirtyList
[ 0 ] ) ) {}
284 for ( ; GeomList
.size(); dGeomDestroy( GeomList
[ 0 ] ) ) {}
288 for ( ; DirtyList
.size(); remove( DirtyList
[ 0 ] ) ) {}
289 for ( ; GeomList
.size(); remove( GeomList
[ 0 ] ) ) {}
293 dxGeom
* dxSAPSpace::getGeom( int i
)
295 dUASSERT( i
>= 0 && i
< count
, "index out of range" );
296 int dirtySize
= DirtyList
.size();
300 return GeomList
[i
-dirtySize
];
303 void dxSAPSpace::add( dxGeom
* g
)
305 CHECK_NOT_LOCKED (this);
307 dUASSERT(g
->tome_ex
== 0 && g
->next_ex
== 0, "geom is already in a space");
311 GEOM_SET_DIRTY_IDX( g
, DirtyList
.size() );
312 GEOM_SET_GEOM_IDX( g
, GEOM_INVALID_IDX
);
318 void dxSAPSpace::remove( dxGeom
* g
)
320 CHECK_NOT_LOCKED(this);
322 dUASSERT(g
->parent_space
== this,"object is not in this space");
325 int dirtyIdx
= GEOM_GET_DIRTY_IDX(g
);
326 int geomIdx
= GEOM_GET_GEOM_IDX(g
);
327 // must be in one list, not in both
329 (dirtyIdx
==GEOM_INVALID_IDX
&& geomIdx
>=0 && geomIdx
<GeomList
.size()) ||
330 (geomIdx
==GEOM_INVALID_IDX
&& dirtyIdx
>=0 && dirtyIdx
<DirtyList
.size()),
331 "geom indices messed up" );
332 if( dirtyIdx
!= GEOM_INVALID_IDX
) {
333 // we're in dirty list, remove
334 int dirtySize
= DirtyList
.size();
335 if (dirtyIdx
!= dirtySize
-1) {
336 dxGeom
* lastG
= DirtyList
[dirtySize
-1];
337 DirtyList
[dirtyIdx
] = lastG
;
338 GEOM_SET_DIRTY_IDX(lastG
,dirtyIdx
);
340 GEOM_SET_DIRTY_IDX(g
,GEOM_INVALID_IDX
);
341 DirtyList
.setSize( dirtySize
-1 );
343 // we're in geom list, remove
344 int geomSize
= GeomList
.size();
345 if (geomIdx
!= geomSize
-1) {
346 dxGeom
* lastG
= GeomList
[geomSize
-1];
347 GeomList
[geomIdx
] = lastG
;
348 GEOM_SET_GEOM_IDX(lastG
,geomIdx
);
350 GEOM_SET_GEOM_IDX(g
,GEOM_INVALID_IDX
);
351 GeomList
.setSize( geomSize
-1 );
355 dUASSERT((g
->next_ex
= 0, true), "Needed for an assertion check only");
360 void dxSAPSpace::dirty( dxGeom
* g
)
363 dUASSERT(g
->parent_space
== this, "object is not in this space");
365 // check if already dirtied
366 int dirtyIdx
= GEOM_GET_DIRTY_IDX(g
);
367 if( dirtyIdx
!= GEOM_INVALID_IDX
)
370 int geomIdx
= GEOM_GET_GEOM_IDX(g
);
371 dUASSERT( geomIdx
>=0 && geomIdx
<GeomList
.size(), "geom indices messed up" );
373 // remove from geom list, place last in place of this
374 int geomSize
= GeomList
.size();
375 if (geomIdx
!= geomSize
-1) {
376 dxGeom
* lastG
= GeomList
[geomSize
-1];
377 GeomList
[geomIdx
] = lastG
;
378 GEOM_SET_GEOM_IDX(lastG
,geomIdx
);
380 GeomList
.setSize( geomSize
-1 );
383 GEOM_SET_GEOM_IDX( g
, GEOM_INVALID_IDX
);
384 GEOM_SET_DIRTY_IDX( g
, DirtyList
.size() );
388 void dxSAPSpace::computeAABB()
393 void dxSAPSpace::cleanGeoms()
395 int dirtySize
= DirtyList
.size();
399 // compute the AABBs of all dirty geoms, clear the dirty flags,
400 // remove from dirty list, place into geom list
403 int geomSize
= GeomList
.size();
404 GeomList
.setSize( geomSize
+ dirtySize
); // ensure space in geom list
406 for( int i
= 0; i
< dirtySize
; ++i
) {
407 dxGeom
* g
= DirtyList
[i
];
409 ((dxSpace
*)g
)->cleanGeoms();
413 dIASSERT((g
->gflags
& GEOM_AABB_BAD
) == 0);
415 g
->gflags
&= ~GEOM_DIRTY
;
417 // remove from dirty list, add to geom list
418 GEOM_SET_DIRTY_IDX( g
, GEOM_INVALID_IDX
);
419 GEOM_SET_GEOM_IDX( g
, geomSize
+ i
);
420 GeomList
[geomSize
+i
] = g
;
423 DirtyList
.setSize( 0 );
428 void dxSAPSpace::collide( void *data
, dNearCallback
*callback
)
436 // by now all geoms are in GeomList, and DirtyList must be empty
437 int geom_count
= GeomList
.size();
438 dUASSERT( geom_count
== count
, "geom counts messed up" );
440 // separate all ENABLED geoms into infinite AABBs and normal AABBs
441 TmpGeomList
.setSize(0);
442 TmpInfGeomList
.setSize(0);
443 int axis0max
= ax0idx
+ 1;
444 for( int i
= 0; i
< geom_count
; ++i
) {
445 dxGeom
* g
= GeomList
[i
];
446 if( !GEOM_ENABLED(g
) ) // skip disabled ones
448 const dReal
& amax
= g
->aabb
[axis0max
];
449 if( amax
== dInfinity
) // HACK? probably not...
450 TmpInfGeomList
.push( g
);
452 TmpGeomList
.push( g
);
455 // do SAP on normal AABBs
456 dArray
< Pair
> overlapBoxes
;
457 int tmp_geom_count
= TmpGeomList
.size();
458 if ( tmp_geom_count
> 0 )
460 // Generate a list of overlapping boxes
461 BoxPruning( tmp_geom_count
, (const dxGeom
**)TmpGeomList
.data(), overlapBoxes
);
464 // collide overlapping
465 int overlapCount
= overlapBoxes
.size();
466 for( int j
= 0; j
< overlapCount
; ++j
)
468 const Pair
& pair
= overlapBoxes
[ j
];
469 dxGeom
* g1
= TmpGeomList
[ pair
.id0
];
470 dxGeom
* g2
= TmpGeomList
[ pair
.id1
];
471 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
474 int infSize
= TmpInfGeomList
.size();
475 int normSize
= TmpGeomList
.size();
478 for ( m
= 0; m
< infSize
; ++m
)
480 dxGeom
* g1
= TmpInfGeomList
[ m
];
482 // collide infinite ones
483 for( n
= m
+1; n
< infSize
; ++n
) {
484 dxGeom
* g2
= TmpInfGeomList
[n
];
485 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
488 // collide infinite ones with normal ones
489 for( n
= 0; n
< normSize
; ++n
) {
490 dxGeom
* g2
= TmpGeomList
[n
];
491 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
498 void dxSAPSpace::collide2( void *data
, dxGeom
*geom
, dNearCallback
*callback
)
500 dAASSERT (geom
&& callback
);
502 // TODO: This is just a simple N^2 implementation
507 geom
->recomputeAABB();
509 // intersect bounding boxes
510 int geom_count
= GeomList
.size();
511 for ( int i
= 0; i
< geom_count
; ++i
) {
512 dxGeom
* g
= GeomList
[i
];
513 if ( GEOM_ENABLED(g
) )
514 collideAABBs (g
,geom
,data
,callback
);
521 void dxSAPSpace::BoxPruning( int count
, const dxGeom
** geoms
, dArray
< Pair
>& pairs
)
523 // Size the poslist (+1 for infinity end cap)
524 poslist
.setSize( count
);
526 // 1) Build main list using the primary axis
527 // NOTE: uses floats instead of dReals because that's what radix sort wants
528 for( int i
= 0; i
< count
; ++i
)
529 poslist
[ i
] = (float)TmpGeomList
[i
]->aabb
[ ax0idx
];
532 const uint32
* Sorted
= sortContext
.RadixSort( poslist
.data(), count
);
535 const uint32
* const LastSorted
= Sorted
+ count
;
536 const uint32
* RunningAddress
= Sorted
;
540 while ( Sorted
< LastSorted
)
542 IndexPair
.id0
= *Sorted
++;
544 // empty, this loop just advances RunningAddress
545 for (bExitLoop
= false; poslist
[*RunningAddress
++] < poslist
[IndexPair
.id0
]; )
547 if (RunningAddress
== LastSorted
)
554 if ( bExitLoop
|| RunningAddress
== LastSorted
) // Not a bug!!!
559 const float idx0ax0max
= (float)geoms
[IndexPair
.id0
]->aabb
[ax0idx
+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning
560 const dReal idx0ax1max
= geoms
[IndexPair
.id0
]->aabb
[ax1idx
+1];
561 const dReal idx0ax2max
= geoms
[IndexPair
.id0
]->aabb
[ax2idx
+1];
563 for (const uint32
* RunningAddress2
= RunningAddress
; poslist
[ IndexPair
.id1
= *RunningAddress2
++ ] <= idx0ax0max
; )
565 const dReal
* aabb0
= geoms
[ IndexPair
.id0
]->aabb
;
566 const dReal
* aabb1
= geoms
[ IndexPair
.id1
]->aabb
;
569 if ( idx0ax1max
>= aabb1
[ax1idx
] && aabb1
[ax1idx
+1] >= aabb0
[ax1idx
]
570 && idx0ax2max
>= aabb1
[ax2idx
] && aabb1
[ax2idx
+1] >= aabb0
[ax2idx
] )
572 pairs
.push( IndexPair
);
575 if (RunningAddress2
== LastSorted
)
581 } // while ( RunningAddress < LastSorted && Sorted < LastSorted )
585 //==============================================================================
587 //------------------------------------------------------------------------------
589 //------------------------------------------------------------------------------
593 #define CHECK_PASS_VALIDITY(pass) \
594 /* Shortcut to current counters */ \
595 const uint32* CurCount = &mHistogram[pass<<8]; \
597 /* Reset flag. The sorting pass is supposed to be performed. (default) */ \
598 bool PerformPass = true; \
600 /* Check pass validity */ \
602 /* If all values have the same byte, sorting is useless. */ \
603 /* It may happen when sorting bytes or words instead of dwords. */ \
604 /* This routine actually sorts words faster than dwords, and bytes */ \
605 /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
606 /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
608 /* Get first byte */ \
609 uint8 UniqueVal = *(((const uint8*)input)+pass); \
611 /* Check that byte's counter */ \
612 if(CurCount[UniqueVal]==nb) PerformPass=false;
614 // WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
615 const uint32
* RaixSortContext::RadixSort( const float* input2
, uint32 nb
)
617 union _type_cast_union
619 _type_cast_union(const float *floats
): asFloats(floats
) {}
620 _type_cast_union(const uint32
*uints32
): asUInts32(uints32
) {}
622 const float *asFloats
;
623 const uint32
*asUInts32
;
624 const uint8
*asUInts8
;
627 const uint32
* input
= _type_cast_union(input2
).asUInts32
;
629 // Resize lists if needed
630 ReallocateRanksIfNecessary(nb
);
632 // Allocate histograms & offsets on the stack
633 uint32 mHistogram
[256*4];
636 // Create histograms (counters). Counters for all passes are created in one run.
637 // Pros: read input buffer once instead of four times
638 // Cons: mHistogram is 4Kb instead of 1Kb
639 // Floating-point values are always supposed to be signed values, so there's only one code path there.
640 // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
641 // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
642 // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
643 // wouldn't work with mixed positive/negative values....
645 /* Clear counters/histograms */
646 memset(mHistogram
, 0, 256*4*sizeof(uint32
));
648 /* Prepare to count */
649 const uint8
* p
= _type_cast_union(input
).asUInts8
;
650 const uint8
* pe
= &p
[nb
*4];
651 uint32
* h0
= &mHistogram
[0]; /* Histogram for first pass (LSB) */
652 uint32
* h1
= &mHistogram
[256]; /* Histogram for second pass */
653 uint32
* h2
= &mHistogram
[512]; /* Histogram for third pass */
654 uint32
* h3
= &mHistogram
[768]; /* Histogram for last pass (MSB) */
656 bool AlreadySorted
= true; /* Optimism... */
658 if (!AreRanksValid())
660 /* Prepare for temporal coherence */
661 const float* Running
= input2
;
662 float PrevVal
= *Running
;
666 /* Read input input2 in previous sorted order */
667 float Val
= *Running
++;
668 /* Check whether already sorted or not */
669 if(Val
<PrevVal
) { AlreadySorted
= false; break; } /* Early out */
670 /* Update for next iteration */
673 /* Create histograms */
674 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
677 /* If all input values are already sorted, we just have to return and leave the */
678 /* previous list unchanged. That way the routine may take advantage of temporal */
679 /* coherence, for example when used to sort transparent faces. */
682 uint32
* const Ranks1
= GetRanks1();
683 for(uint32 i
=0;i
<nb
;i
++) Ranks1
[i
] = i
;
689 /* Prepare for temporal coherence */
690 uint32
* const Ranks1
= GetRanks1();
692 uint32
* Indices
= Ranks1
;
693 float PrevVal
= input2
[*Indices
];
697 /* Read input input2 in previous sorted order */
698 float Val
= input2
[*Indices
++];
699 /* Check whether already sorted or not */
700 if(Val
<PrevVal
) { AlreadySorted
= false; break; } /* Early out */
701 /* Update for next iteration */
704 /* Create histograms */
705 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
708 /* If all input values are already sorted, we just have to return and leave the */
709 /* previous list unchanged. That way the routine may take advantage of temporal */
710 /* coherence, for example when used to sort transparent faces. */
711 if(AlreadySorted
) { return Ranks1
; }
714 /* Else there has been an early out and we must finish computing the histograms */
717 /* Create histograms without the previous overhead */
718 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
722 // Compute #negative values involved if needed
723 uint32 NbNegativeValues
= 0;
725 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
726 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
727 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
728 uint32
* h3
= &mHistogram
[768];
729 for(uint32 i
=128;i
<256;i
++) NbNegativeValues
+= h3
[i
]; // 768 for last histogram, 128 for negative part
731 // Radix sort, j is the pass number (0=LSB, 3=MSB)
732 for(uint32 j
=0;j
<4;j
++)
734 // Should we care about negative values?
737 // Here we deal with positive values only
738 CHECK_PASS_VALIDITY(j
);
742 uint32
* const Ranks2
= GetRanks2();
745 for(uint32 i
=1;i
<256;i
++) mLink
[i
] = mLink
[i
-1] + CurCount
[i
-1];
747 // Perform Radix Sort
748 const uint8
* InputBytes
= _type_cast_union(input
).asUInts8
;
750 if (!AreRanksValid())
752 for(uint32 i
=0;i
<nb
;i
++)
754 *mLink
[InputBytes
[i
<<2]]++ = i
;
761 uint32
* const Ranks1
= GetRanks1();
763 uint32
* Indices
= Ranks1
;
764 uint32
* const IndicesEnd
= Ranks1
+ nb
;
765 while(Indices
!=IndicesEnd
)
767 uint32 id
= *Indices
++;
768 *mLink
[InputBytes
[id
<<2]]++ = id
;
772 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
778 // This is a special case to correctly handle negative values
779 CHECK_PASS_VALIDITY(j
);
783 uint32
* const Ranks2
= GetRanks2();
785 // Create biased offsets, in order for negative numbers to be sorted as well
786 mLink
[0] = Ranks2
+ NbNegativeValues
; // First positive number takes place after the negative ones
787 for(uint32 i
=1;i
<128;i
++) mLink
[i
] = mLink
[i
-1] + CurCount
[i
-1]; // 1 to 128 for positive numbers
789 // We must reverse the sorting order for negative numbers!
791 for(uint32 i
=0;i
<127;i
++) mLink
[254-i
] = mLink
[255-i
] + CurCount
[255-i
]; // Fixing the wrong order for negative values
792 for(uint32 i
=128;i
<256;i
++) mLink
[i
] += CurCount
[i
]; // Fixing the wrong place for negative values
794 // Perform Radix Sort
795 if (!AreRanksValid())
797 for(uint32 i
=0;i
<nb
;i
++)
799 uint32 Radix
= input
[i
]>>24; // Radix byte, same as above. AND is useless here (uint32).
800 // ### cmp to be killed. Not good. Later.
801 if(Radix
<128) *mLink
[Radix
]++ = i
; // Number is positive, same as above
802 else *(--mLink
[Radix
]) = i
; // Number is negative, flip the sorting order
809 uint32
* const Ranks1
= GetRanks1();
811 for(uint32 i
=0;i
<nb
;i
++)
813 uint32 Radix
= input
[Ranks1
[i
]]>>24; // Radix byte, same as above. AND is useless here (uint32).
814 // ### cmp to be killed. Not good. Later.
815 if(Radix
<128) *mLink
[Radix
]++ = Ranks1
[i
]; // Number is positive, same as above
816 else *(--mLink
[Radix
]) = Ranks1
[i
]; // Number is negative, flip the sorting order
819 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
824 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
827 if (!AreRanksValid())
829 uint32
* const Ranks2
= GetRanks2();
831 for(uint32 i
=0;i
<nb
;i
++)
840 uint32
* const Ranks1
= GetRanks1();
841 uint32
* const Ranks2
= GetRanks2();
842 for(uint32 i
=0;i
<nb
;i
++) Ranks2
[i
] = Ranks1
[nb
-i
-1];
845 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
853 uint32
* const Ranks1
= GetRanks1();