Changed: Header inclusion order corrected
[ode.git] / ode / src / collision_sapspace.cpp
blobd85388f48d36e4eef4213f2f70d1368687a53b56
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
24 * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
25 * Additional work by David Walters
26 * Original code:
27 * OPCODE - Optimized Collision Detection
28 * Copyright (C) 2001 Pierre Terdiman
29 * Homepage: http://www.codercorner.com/Opcode.htm
31 * This version does complete radix sort, not "classical" SAP. So, we
32 * have no temporal coherence, but are able to handle any movement
33 * velocities equally well.
36 #include <ode/common.h>
37 #include <ode/matrix.h>
38 #include <ode/collision_space.h>
39 #include <ode/collision.h>
41 #include "config.h"
42 #include "collision_kernel.h"
43 #include "collision_space_internal.h"
45 // Reference counting helper for radix sort global data.
46 //static void RadixSortRef();
47 //static void RadixSortDeref();
50 // --------------------------------------------------------------------------
51 // Radix Sort Context
52 // --------------------------------------------------------------------------
54 struct RaixSortContext
56 public:
57 RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL), mPrimaryRanks(NULL) {}
58 ~RaixSortContext() { FreeRanks(); }
60 // OPCODE's Radix Sorting, returns a list of indices in sorted order
61 const uint32* RadixSort( const float* input2, uint32 nb );
63 private:
64 void FreeRanks();
65 void AllocateRanks(size_t nNewSize);
67 void ReallocateRanksIfNecessary(size_t nNewSize);
69 private:
70 void SetCurrentSize(size_t nValue) { mCurrentSize = nValue; }
71 size_t GetCurrentSize() const { return mCurrentSize; }
73 void SetCurrentUtilization(size_t nValue) { mCurrentUtilization = nValue; }
74 size_t GetCurrentUtilization() const { return mCurrentUtilization; }
76 uint32 *GetRanks1() const { return mPrimaryRanks; }
77 uint32 *GetRanks2() const { return mRanksBuffer + ((mRanksBuffer + mCurrentSize) - mPrimaryRanks); }
78 void SwapRanks() { mPrimaryRanks = GetRanks2(); }
80 bool AreRanksValid() const { return mRanksValid; }
81 void InvalidateRanks() { mRanksValid = false; }
82 void ValidateRanks() { mRanksValid = true; }
84 private:
85 size_t mCurrentSize; //!< Current size of the indices list
86 size_t mCurrentUtilization; //!< Current utilization of the indices list
87 bool mRanksValid;
88 uint32* mRanksBuffer; //!< Two lists allocated sequentially in a single block
89 uint32* mPrimaryRanks;
92 void RaixSortContext::AllocateRanks(size_t nNewSize)
94 dIASSERT(GetCurrentSize() == 0);
96 mRanksBuffer = new uint32[2 * nNewSize];
97 mPrimaryRanks = mRanksBuffer;
99 SetCurrentSize(nNewSize);
102 void RaixSortContext::FreeRanks()
104 SetCurrentSize(0);
106 delete[] mRanksBuffer;
109 void RaixSortContext::ReallocateRanksIfNecessary(size_t nNewSize)
111 size_t nCurUtilization = GetCurrentUtilization();
113 if (nNewSize != nCurUtilization)
115 size_t nCurSize = GetCurrentSize();
117 if ( nNewSize > nCurSize )
119 // Free previously used ram
120 FreeRanks();
122 // Get some fresh one
123 AllocateRanks(nNewSize);
126 InvalidateRanks();
127 SetCurrentUtilization(nNewSize);
131 // --------------------------------------------------------------------------
132 // SAP space code
133 // --------------------------------------------------------------------------
135 struct dxSAPSpace : public dxSpace
137 // Constructor / Destructor
138 dxSAPSpace( dSpaceID _space, int sortaxis );
139 ~dxSAPSpace();
141 // dxSpace
142 virtual dxGeom* getGeom(int i);
143 virtual void add(dxGeom* g);
144 virtual void remove(dxGeom* g);
145 virtual void dirty(dxGeom* g);
146 virtual void computeAABB();
147 virtual void cleanGeoms();
148 virtual void collide( void *data, dNearCallback *callback );
149 virtual void collide2( void *data, dxGeom *geom, dNearCallback *callback );
151 private:
153 //--------------------------------------------------------------------------
154 // Local Declarations
155 //--------------------------------------------------------------------------
157 //! A generic couple structure
158 struct Pair
160 uint32 id0; //!< First index of the pair
161 uint32 id1; //!< Second index of the pair
163 // Default and Value Constructor
164 Pair() {}
165 Pair( uint32 i0, uint32 i1 ) : id0( i0 ), id1( i1 ) {}
168 //--------------------------------------------------------------------------
169 // Helpers
170 //--------------------------------------------------------------------------
173 * Complete box pruning.
174 * Returns a list of overlapping pairs of boxes, each box of the pair
175 * belongs to the same set.
177 * @param count [in] number of boxes.
178 * @param geoms [in] geoms of boxes.
179 * @param pairs [out] array of overlapping pairs.
181 void BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs );
184 //--------------------------------------------------------------------------
185 // Implementation Data
186 //--------------------------------------------------------------------------
188 // We have two lists (arrays of pointers) to dirty and clean
189 // geoms. Each geom knows it's index into the corresponding list
190 // (see macros above).
191 dArray<dxGeom*> DirtyList; // dirty geoms
192 dArray<dxGeom*> GeomList; // clean geoms
194 // For SAP, we ultimately separate "normal" geoms and the ones that have
195 // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
196 // infinite geoms anyway).
197 dArray<dxGeom*> TmpGeomList; // temporary for normal geoms
198 dArray<dxGeom*> TmpInfGeomList; // temporary for geoms with infinite AABBs
200 // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
201 // Axis indices into geom's aabb are: min=idx, max=idx+1
202 uint32 ax0idx;
203 uint32 ax1idx;
204 uint32 ax2idx;
206 // pruning position array scratch pad
207 // NOTE: this is float not dReal because of the OPCODE radix sorter
208 dArray< float > poslist;
209 RaixSortContext sortContext;
212 // Creation
213 dSpaceID dSweepAndPruneSpaceCreate( dxSpace* space, int axisorder ) {
214 return new dxSAPSpace( space, axisorder );
218 //==============================================================================
220 #define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
222 // HACK: We abuse 'next' and 'tome' members of dxGeom to store indice into dirty/geom lists.
223 #define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next = (dxGeom*)(size_t)(idx); }
224 #define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome = (dxGeom**)(size_t)(idx); }
225 #define GEOM_GET_DIRTY_IDX(g) ((int)(size_t)(g)->next)
226 #define GEOM_GET_GEOM_IDX(g) ((int)(size_t)(g)->tome)
227 #define GEOM_INVALID_IDX (-1)
231 * A bit of repetitive work - similar to collideAABBs, but doesn't check
232 * if AABBs intersect (because SAP returns pairs with overlapping AABBs).
234 static void collideGeomsNoAABBs( dxGeom *g1, dxGeom *g2, void *data, dNearCallback *callback )
236 dIASSERT( (g1->gflags & GEOM_AABB_BAD)==0 );
237 dIASSERT( (g2->gflags & GEOM_AABB_BAD)==0 );
239 // no contacts if both geoms on the same body, and the body is not 0
240 if (g1->body == g2->body && g1->body) return;
242 // test if the category and collide bitfields match
243 if ( ((g1->category_bits & g2->collide_bits) ||
244 (g2->category_bits & g1->collide_bits)) == 0) {
245 return;
248 dReal *bounds1 = g1->aabb;
249 dReal *bounds2 = g2->aabb;
251 // check if either object is able to prove that it doesn't intersect the
252 // AABB of the other
253 if (g1->AABBTest (g2,bounds2) == 0) return;
254 if (g2->AABBTest (g1,bounds1) == 0) return;
256 // the objects might actually intersect - call the space callback function
257 callback (data,g1,g2);
261 dxSAPSpace::dxSAPSpace( dSpaceID _space, int axisorder ) : dxSpace( _space )
263 type = dSweepAndPruneSpaceClass;
265 // Init AABB to infinity
266 aabb[0] = -dInfinity;
267 aabb[1] = dInfinity;
268 aabb[2] = -dInfinity;
269 aabb[3] = dInfinity;
270 aabb[4] = -dInfinity;
271 aabb[5] = dInfinity;
273 ax0idx = ( ( axisorder ) & 3 ) << 1;
274 ax1idx = ( ( axisorder >> 2 ) & 3 ) << 1;
275 ax2idx = ( ( axisorder >> 4 ) & 3 ) << 1;
278 dxSAPSpace::~dxSAPSpace()
280 CHECK_NOT_LOCKED(this);
281 if ( cleanup ) {
282 // note that destroying each geom will call remove()
283 for ( ; DirtyList.size(); dGeomDestroy( DirtyList[ 0 ] ) ) {}
284 for ( ; GeomList.size(); dGeomDestroy( GeomList[ 0 ] ) ) {}
286 else {
287 // just unhook them
288 for ( ; DirtyList.size(); remove( DirtyList[ 0 ] ) ) {}
289 for ( ; GeomList.size(); remove( GeomList[ 0 ] ) ) {}
293 dxGeom* dxSAPSpace::getGeom( int i )
295 dUASSERT( i >= 0 && i < count, "index out of range" );
296 int dirtySize = DirtyList.size();
297 if( i < dirtySize )
298 return DirtyList[i];
299 else
300 return GeomList[i-dirtySize];
303 void dxSAPSpace::add( dxGeom* g )
305 CHECK_NOT_LOCKED (this);
306 dAASSERT(g);
307 dUASSERT(g->parent_space == 0 && g->next == 0, "geom is already in a space");
309 g->gflags |= GEOM_DIRTY | GEOM_AABB_BAD;
311 // add to dirty list
312 GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
313 GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
314 DirtyList.push( g );
316 g->parent_space = this;
317 this->count++;
319 dGeomMoved(this);
322 void dxSAPSpace::remove( dxGeom* g )
324 CHECK_NOT_LOCKED(this);
325 dAASSERT(g);
326 dUASSERT(g->parent_space == this,"object is not in this space");
328 // remove
329 int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
330 int geomIdx = GEOM_GET_GEOM_IDX(g);
331 // must be in one list, not in both
332 dUASSERT(
333 (dirtyIdx==GEOM_INVALID_IDX && geomIdx>=0 && geomIdx<GeomList.size()) ||
334 (geomIdx==GEOM_INVALID_IDX && dirtyIdx>=0 && dirtyIdx<DirtyList.size()),
335 "geom indices messed up" );
336 if( dirtyIdx != GEOM_INVALID_IDX ) {
337 // we're in dirty list, remove
338 int dirtySize = DirtyList.size();
339 dxGeom* lastG = DirtyList[dirtySize-1];
340 DirtyList[dirtyIdx] = lastG;
341 GEOM_SET_DIRTY_IDX(lastG,dirtyIdx);
342 GEOM_SET_DIRTY_IDX(g,GEOM_INVALID_IDX);
343 DirtyList.setSize( dirtySize-1 );
344 } else {
345 // we're in geom list, remove
346 int geomSize = GeomList.size();
347 dxGeom* lastG = GeomList[geomSize-1];
348 GeomList[geomIdx] = lastG;
349 GEOM_SET_GEOM_IDX(lastG,geomIdx);
350 GEOM_SET_GEOM_IDX(g,GEOM_INVALID_IDX);
351 GeomList.setSize( geomSize-1 );
353 count--;
355 // safeguard
356 g->parent_space = 0;
358 // the bounding box of this space (and that of all the parents) may have
359 // changed as a consequence of the removal.
360 dGeomMoved(this);
363 void dxSAPSpace::dirty( dxGeom* g )
365 dAASSERT(g);
366 dUASSERT(g->parent_space == this,"object is not in this space");
368 // check if already dirtied
369 int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
370 if( dirtyIdx != GEOM_INVALID_IDX )
371 return;
373 int geomIdx = GEOM_GET_GEOM_IDX(g);
374 dUASSERT( geomIdx>=0 && geomIdx<GeomList.size(), "geom indices messed up" );
376 // remove from geom list, place last in place of this
377 int geomSize = GeomList.size();
378 dxGeom* lastG = GeomList[geomSize-1];
379 GeomList[geomIdx] = lastG;
380 GEOM_SET_GEOM_IDX(lastG,geomIdx);
381 GeomList.setSize( geomSize-1 );
383 // add to dirty list
384 GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
385 GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
386 DirtyList.push( g );
389 void dxSAPSpace::computeAABB()
391 // TODO?
394 void dxSAPSpace::cleanGeoms()
396 int dirtySize = DirtyList.size();
397 if( !dirtySize )
398 return;
400 // compute the AABBs of all dirty geoms, clear the dirty flags,
401 // remove from dirty list, place into geom list
402 lock_count++;
404 int geomSize = GeomList.size();
405 GeomList.setSize( geomSize + dirtySize ); // ensure space in geom list
407 for( int i = 0; i < dirtySize; ++i ) {
408 dxGeom* g = DirtyList[i];
409 if( IS_SPACE(g) ) {
410 ((dxSpace*)g)->cleanGeoms();
412 g->recomputeAABB();
413 g->gflags &= (~(GEOM_DIRTY|GEOM_AABB_BAD));
414 // remove from dirty list, add to geom list
415 GEOM_SET_DIRTY_IDX( g, GEOM_INVALID_IDX );
416 GEOM_SET_GEOM_IDX( g, geomSize + i );
417 GeomList[geomSize+i] = g;
419 // clear dirty list
420 DirtyList.setSize( 0 );
422 lock_count--;
425 void dxSAPSpace::collide( void *data, dNearCallback *callback )
427 dAASSERT (callback);
429 lock_count++;
431 cleanGeoms();
433 // by now all geoms are in GeomList, and DirtyList must be empty
434 int geom_count = GeomList.size();
435 dUASSERT( geom_count == count, "geom counts messed up" );
437 // separate all ENABLED geoms into infinite AABBs and normal AABBs
438 TmpGeomList.setSize(0);
439 TmpInfGeomList.setSize(0);
440 int axis0max = ax0idx + 1;
441 for( int i = 0; i < geom_count; ++i ) {
442 dxGeom* g = GeomList[i];
443 if( !GEOM_ENABLED(g) ) // skip disabled ones
444 continue;
445 const dReal& amax = g->aabb[axis0max];
446 if( amax == dInfinity ) // HACK? probably not...
447 TmpInfGeomList.push( g );
448 else
449 TmpGeomList.push( g );
452 // do SAP on normal AABBs
453 dArray< Pair > overlapBoxes;
454 int tmp_geom_count = TmpGeomList.size();
455 if ( tmp_geom_count > 0 )
457 // Size the poslist (+1 for infinity end cap)
458 poslist.setSize( tmp_geom_count + 1 );
460 // Generate a list of overlapping boxes
461 BoxPruning( tmp_geom_count, (const dxGeom**)TmpGeomList.data(), overlapBoxes );
464 // collide overlapping
465 int overlapCount = overlapBoxes.size();
466 for( int j = 0; j < overlapCount; ++j )
468 const Pair& pair = overlapBoxes[ j ];
469 dxGeom* g1 = TmpGeomList[ pair.id0 ];
470 dxGeom* g2 = TmpGeomList[ pair.id1 ];
471 collideGeomsNoAABBs( g1, g2, data, callback );
474 int infSize = TmpInfGeomList.size();
475 int normSize = TmpGeomList.size();
476 int m, n;
478 for ( m = 0; m < infSize; ++m )
480 dxGeom* g1 = TmpInfGeomList[ m ];
482 // collide infinite ones
483 for( n = m+1; n < infSize; ++n ) {
484 dxGeom* g2 = TmpInfGeomList[n];
485 collideGeomsNoAABBs( g1, g2, data, callback );
488 // collide infinite ones with normal ones
489 for( n = 0; n < normSize; ++n ) {
490 dxGeom* g2 = TmpGeomList[n];
491 collideGeomsNoAABBs( g1, g2, data, callback );
495 lock_count--;
498 void dxSAPSpace::collide2( void *data, dxGeom *geom, dNearCallback *callback )
500 dAASSERT (geom && callback);
502 // TODO: This is just a simple N^2 implementation
504 lock_count++;
506 cleanGeoms();
507 geom->recomputeAABB();
509 // intersect bounding boxes
510 int geom_count = GeomList.size();
511 for ( int i = 0; i < geom_count; ++i ) {
512 dxGeom* g = GeomList[i];
513 if ( GEOM_ENABLED(g) )
514 collideAABBs (g,geom,data,callback);
517 lock_count--;
521 void dxSAPSpace::BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs )
523 // 1) Build main list using the primary axis
524 // NOTE: uses floats instead of dReals because that's what radix sort wants
525 for( int i = 0; i < count; ++i )
526 poslist[ i ] = (float)TmpGeomList[i]->aabb[ ax0idx ];
527 poslist[ count++ ] = FLT_MAX;
529 // 2) Sort the list
530 const uint32* Sorted = sortContext.RadixSort( poslist.data(), count );
532 // 3) Prune the list
533 const uint32* const LastSorted = Sorted + count;
534 const uint32* RunningAddress = Sorted;
536 Pair IndexPair;
537 while ( RunningAddress < LastSorted && Sorted < LastSorted )
539 IndexPair.id0 = *Sorted++;
541 // empty, this loop just advances RunningAddress
542 while ( poslist[*RunningAddress++] < poslist[IndexPair.id0] ) {}
544 if ( RunningAddress < LastSorted )
546 const uint32* RunningAddress2 = RunningAddress;
548 const dReal idx0ax0max = geoms[IndexPair.id0]->aabb[ax0idx+1];
549 const dReal idx0ax1max = geoms[IndexPair.id0]->aabb[ax1idx+1];
550 const dReal idx0ax2max = geoms[IndexPair.id0]->aabb[ax2idx+1];
552 while ( poslist[ IndexPair.id1 = *RunningAddress2++ ] <= idx0ax0max )
554 const dReal* aabb0 = geoms[ IndexPair.id0 ]->aabb;
555 const dReal* aabb1 = geoms[ IndexPair.id1 ]->aabb;
557 // Intersection?
558 if ( idx0ax1max >= aabb1[ax1idx] && aabb1[ax1idx+1] >= aabb0[ax1idx] )
559 if ( idx0ax2max >= aabb1[ax2idx] && aabb1[ax2idx+1] >= aabb0[ax2idx] )
561 pairs.push( IndexPair );
566 }; // while ( RunningAddress < LastSorted && Sorted < LastSorted )
570 //==============================================================================
572 //------------------------------------------------------------------------------
573 // Radix Sort
574 //------------------------------------------------------------------------------
578 #define CHECK_PASS_VALIDITY(pass) \
579 /* Shortcut to current counters */ \
580 uint32* CurCount = &mHistogram[pass<<8]; \
582 /* Reset flag. The sorting pass is supposed to be performed. (default) */ \
583 bool PerformPass = true; \
585 /* Check pass validity */ \
587 /* If all values have the same byte, sorting is useless. */ \
588 /* It may happen when sorting bytes or words instead of dwords. */ \
589 /* This routine actually sorts words faster than dwords, and bytes */ \
590 /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
591 /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
593 /* Get first byte */ \
594 uint8 UniqueVal = *(((uint8*)input)+pass); \
596 /* Check that byte's counter */ \
597 if(CurCount[UniqueVal]==nb) PerformPass=false;
599 // WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
600 const uint32* RaixSortContext::RadixSort( const float* input2, uint32 nb )
602 uint32* input = (uint32*)input2;
604 // Resize lists if needed
605 ReallocateRanksIfNecessary(nb);
607 // Allocate histograms & offsets on the stack
608 uint32 mHistogram[256*4];
609 uint32* mLink[256];
611 // Create histograms (counters). Counters for all passes are created in one run.
612 // Pros: read input buffer once instead of four times
613 // Cons: mHistogram is 4Kb instead of 1Kb
614 // Floating-point values are always supposed to be signed values, so there's only one code path there.
615 // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
616 // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
617 // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
618 // wouldn't work with mixed positive/negative values....
620 /* Clear counters/histograms */
621 memset(mHistogram, 0, 256*4*sizeof(uint32));
623 /* Prepare to count */
624 uint8* p = (uint8*)input;
625 uint8* pe = &p[nb*4];
626 uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */
627 uint32* h1= &mHistogram[256]; /* Histogram for second pass */
628 uint32* h2= &mHistogram[512]; /* Histogram for third pass */
629 uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */
631 bool AlreadySorted = true; /* Optimism... */
633 if (!AreRanksValid())
635 /* Prepare for temporal coherence */
636 float* Running = (float*)input2;
637 float PrevVal = *Running;
639 while(p!=pe)
641 /* Read input input2 in previous sorted order */
642 float Val = *Running++;
643 /* Check whether already sorted or not */
644 if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */
645 /* Update for next iteration */
646 PrevVal = Val;
648 /* Create histograms */
649 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
652 /* If all input values are already sorted, we just have to return and leave the */
653 /* previous list unchanged. That way the routine may take advantage of temporal */
654 /* coherence, for example when used to sort transparent faces. */
655 if(AlreadySorted)
657 uint32* const Ranks1 = GetRanks1();
658 for(uint32 i=0;i<nb;i++) Ranks1[i] = i;
659 return Ranks1;
662 else
664 /* Prepare for temporal coherence */
665 uint32* const Ranks1 = GetRanks1();
667 uint32* Indices = Ranks1;
668 float PrevVal = (float)input2[*Indices];
670 while(p!=pe)
672 /* Read input input2 in previous sorted order */
673 float Val = (float)input2[*Indices++];
674 /* Check whether already sorted or not */
675 if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */
676 /* Update for next iteration */
677 PrevVal = Val;
679 /* Create histograms */
680 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
683 /* If all input values are already sorted, we just have to return and leave the */
684 /* previous list unchanged. That way the routine may take advantage of temporal */
685 /* coherence, for example when used to sort transparent faces. */
686 if(AlreadySorted) { return Ranks1; }
689 /* Else there has been an early out and we must finish computing the histograms */
690 while(p!=pe)
692 /* Create histograms without the previous overhead */
693 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
697 // Compute #negative values involved if needed
698 uint32 NbNegativeValues = 0;
700 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
701 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
702 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
703 uint32* h3= &mHistogram[768];
704 for(uint32 i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
706 // Radix sort, j is the pass number (0=LSB, 3=MSB)
707 for(uint32 j=0;j<4;j++)
709 // Should we care about negative values?
710 if(j!=3)
712 // Here we deal with positive values only
713 CHECK_PASS_VALIDITY(j);
715 if(PerformPass)
717 uint32* const Ranks2 = GetRanks2();
718 // Create offsets
719 mLink[0] = Ranks2;
720 for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
722 // Perform Radix Sort
723 uint8* InputBytes = (uint8*)input;
724 InputBytes += j;
725 if (!AreRanksValid())
727 for(uint32 i=0;i<nb;i++)
729 *mLink[InputBytes[i<<2]]++ = i;
732 ValidateRanks();
734 else
736 uint32* const Ranks1 = GetRanks1();
738 uint32* Indices = Ranks1;
739 uint32* const IndicesEnd = Ranks1 + nb;
740 while(Indices!=IndicesEnd)
742 uint32 id = *Indices++;
743 *mLink[InputBytes[id<<2]]++ = id;
747 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
748 SwapRanks();
751 else
753 // This is a special case to correctly handle negative values
754 CHECK_PASS_VALIDITY(j);
756 if(PerformPass)
758 uint32* const Ranks2 = GetRanks2();
760 // Create biased offsets, in order for negative numbers to be sorted as well
761 mLink[0] = Ranks2 + NbNegativeValues; // First positive number takes place after the negative ones
762 for(uint32 i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
764 // We must reverse the sorting order for negative numbers!
765 mLink[255] = Ranks2;
766 for(uint32 i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
767 for(uint32 i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values
769 // Perform Radix Sort
770 if (!AreRanksValid())
772 for(uint32 i=0;i<nb;i++)
774 uint32 Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (uint32).
775 // ### cmp to be killed. Not good. Later.
776 if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above
777 else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order
780 ValidateRanks();
782 else
784 uint32* const Ranks1 = GetRanks1();
786 for(uint32 i=0;i<nb;i++)
788 uint32 Radix = input[Ranks1[i]]>>24; // Radix byte, same as above. AND is useless here (uint32).
789 // ### cmp to be killed. Not good. Later.
790 if(Radix<128) *mLink[Radix]++ = Ranks1[i]; // Number is positive, same as above
791 else *(--mLink[Radix]) = Ranks1[i]; // Number is negative, flip the sorting order
794 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
795 SwapRanks();
797 else
799 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
800 if(UniqueVal>=128)
802 if (!AreRanksValid())
804 uint32* const Ranks2 = GetRanks2();
805 // ###Possible?
806 for(uint32 i=0;i<nb;i++)
808 Ranks2[i] = nb-i-1;
811 ValidateRanks();
813 else
815 uint32* const Ranks1 = GetRanks1();
816 uint32* const Ranks2 = GetRanks2();
817 for(uint32 i=0;i<nb;i++) Ranks2[i] = Ranks1[nb-i-1];
820 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
821 SwapRanks();
827 // Return indices
828 uint32* const Ranks1 = GetRanks1();
829 return Ranks1;