Cosmetic: Cosmetic code corrections in QuickStep
[ode.git] / ode / src / collision_sapspace.cpp
blob76258bf7a408c2c85960735a496c74c8030e0f8e
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
24 * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
25 * Additional work by David Walters
26 * Original code:
27 * OPCODE - Optimized Collision Detection
28 * Copyright (C) 2001 Pierre Terdiman
29 * Homepage: http://www.codercorner.com/Opcode.htm
31 * This version does complete radix sort, not "classical" SAP. So, we
32 * have no temporal coherence, but are able to handle any movement
33 * velocities equally well.
36 #include <ode/common.h>
37 #include <ode/collision_space.h>
38 #include <ode/collision.h>
40 #include "config.h"
41 #include "matrix.h"
42 #include "collision_kernel.h"
43 #include "collision_space_internal.h"
45 // Reference counting helper for radix sort global data.
46 //static void RadixSortRef();
47 //static void RadixSortDeref();
50 // --------------------------------------------------------------------------
51 // Radix Sort Context
52 // --------------------------------------------------------------------------
54 struct RaixSortContext
56 public:
57 RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL), mPrimaryRanks(NULL) {}
58 ~RaixSortContext() { FreeRanks(); }
60 // OPCODE's Radix Sorting, returns a list of indices in sorted order
61 const uint32* RadixSort( const float* input2, uint32 nb );
63 private:
64 void FreeRanks();
65 void AllocateRanks(sizeint nNewSize);
67 void ReallocateRanksIfNecessary(sizeint nNewSize);
69 private:
70 void SetCurrentSize(sizeint nValue) { mCurrentSize = nValue; }
71 sizeint GetCurrentSize() const { return mCurrentSize; }
73 void SetCurrentUtilization(sizeint nValue) { mCurrentUtilization = nValue; }
74 sizeint GetCurrentUtilization() const { return mCurrentUtilization; }
76 uint32 *GetRanks1() const { return mPrimaryRanks; }
77 uint32 *GetRanks2() const { return mRanksBuffer + ((mRanksBuffer + mCurrentSize) - mPrimaryRanks); }
78 void SwapRanks() { mPrimaryRanks = GetRanks2(); }
80 bool AreRanksValid() const { return mRanksValid; }
81 void InvalidateRanks() { mRanksValid = false; }
82 void ValidateRanks() { mRanksValid = true; }
84 private:
85 sizeint mCurrentSize; //!< Current size of the indices list
86 sizeint mCurrentUtilization; //!< Current utilization of the indices list
87 bool mRanksValid;
88 uint32* mRanksBuffer; //!< Two lists allocated sequentially in a single block
89 uint32* mPrimaryRanks;
92 void RaixSortContext::AllocateRanks(sizeint nNewSize)
94 dIASSERT(GetCurrentSize() == 0);
96 mRanksBuffer = new uint32[2 * nNewSize];
97 mPrimaryRanks = mRanksBuffer;
99 SetCurrentSize(nNewSize);
102 void RaixSortContext::FreeRanks()
104 SetCurrentSize(0);
106 delete[] mRanksBuffer;
109 void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize)
111 sizeint nCurUtilization = GetCurrentUtilization();
113 if (nNewSize != nCurUtilization)
115 sizeint nCurSize = GetCurrentSize();
117 if ( nNewSize > nCurSize )
119 // Free previously used ram
120 FreeRanks();
122 // Get some fresh one
123 AllocateRanks(nNewSize);
126 InvalidateRanks();
127 SetCurrentUtilization(nNewSize);
131 // --------------------------------------------------------------------------
132 // SAP space code
133 // --------------------------------------------------------------------------
135 struct dxSAPSpace : public dxSpace
137 // Constructor / Destructor
138 dxSAPSpace( dSpaceID _space, int sortaxis );
139 ~dxSAPSpace();
141 // dxSpace
142 virtual dxGeom* getGeom(int i);
143 virtual void add(dxGeom* g);
144 virtual void remove(dxGeom* g);
145 virtual void dirty(dxGeom* g);
146 virtual void computeAABB();
147 virtual void cleanGeoms();
148 virtual void collide( void *data, dNearCallback *callback );
149 virtual void collide2( void *data, dxGeom *geom, dNearCallback *callback );
151 private:
153 //--------------------------------------------------------------------------
154 // Local Declarations
155 //--------------------------------------------------------------------------
157 //! A generic couple structure
158 struct Pair
160 uint32 id0; //!< First index of the pair
161 uint32 id1; //!< Second index of the pair
163 // Default and Value Constructor
164 Pair() {}
165 Pair( uint32 i0, uint32 i1 ) : id0( i0 ), id1( i1 ) {}
168 //--------------------------------------------------------------------------
169 // Helpers
170 //--------------------------------------------------------------------------
173 * Complete box pruning.
174 * Returns a list of overlapping pairs of boxes, each box of the pair
175 * belongs to the same set.
177 * @param count [in] number of boxes.
178 * @param geoms [in] geoms of boxes.
179 * @param pairs [out] array of overlapping pairs.
181 void BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs );
184 //--------------------------------------------------------------------------
185 // Implementation Data
186 //--------------------------------------------------------------------------
188 // We have two lists (arrays of pointers) to dirty and clean
189 // geoms. Each geom knows it's index into the corresponding list
190 // (see macros above).
191 dArray<dxGeom*> DirtyList; // dirty geoms
192 dArray<dxGeom*> GeomList; // clean geoms
194 // For SAP, we ultimately separate "normal" geoms and the ones that have
195 // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
196 // infinite geoms anyway).
197 dArray<dxGeom*> TmpGeomList; // temporary for normal geoms
198 dArray<dxGeom*> TmpInfGeomList; // temporary for geoms with infinite AABBs
200 // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
201 // Axis indices into geom's aabb are: min=idx, max=idx+1
202 uint32 ax0idx;
203 uint32 ax1idx;
204 uint32 ax2idx;
206 // pruning position array scratch pad
207 // NOTE: this is float not dReal because of the OPCODE radix sorter
208 dArray< float > poslist;
209 RaixSortContext sortContext;
212 // Creation
213 dSpaceID dSweepAndPruneSpaceCreate( dxSpace* space, int axisorder ) {
214 return new dxSAPSpace( space, axisorder );
218 //==============================================================================
220 #define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
222 // HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists.
223 #define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); }
224 #define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); }
225 #define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex)
226 #define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex)
227 #define GEOM_INVALID_IDX (-1)
231 * A bit of repetitive work - similar to collideAABBs, but doesn't check
232 * if AABBs intersect (because SAP returns pairs with overlapping AABBs).
234 static void collideGeomsNoAABBs( dxGeom *g1, dxGeom *g2, void *data, dNearCallback *callback )
236 dIASSERT( (g1->gflags & GEOM_AABB_BAD)==0 );
237 dIASSERT( (g2->gflags & GEOM_AABB_BAD)==0 );
239 // no contacts if both geoms on the same body, and the body is not 0
240 if (g1->body == g2->body && g1->body) return;
242 // test if the category and collide bitfields match
243 if ( ((g1->category_bits & g2->collide_bits) ||
244 (g2->category_bits & g1->collide_bits)) == 0) {
245 return;
248 dReal *bounds1 = g1->aabb;
249 dReal *bounds2 = g2->aabb;
251 // check if either object is able to prove that it doesn't intersect the
252 // AABB of the other
253 if (g1->AABBTest (g2,bounds2) == 0) return;
254 if (g2->AABBTest (g1,bounds1) == 0) return;
256 // the objects might actually intersect - call the space callback function
257 callback (data,g1,g2);
261 dxSAPSpace::dxSAPSpace( dSpaceID _space, int axisorder ) : dxSpace( _space )
263 type = dSweepAndPruneSpaceClass;
265 // Init AABB to infinity
266 aabb[0] = -dInfinity;
267 aabb[1] = dInfinity;
268 aabb[2] = -dInfinity;
269 aabb[3] = dInfinity;
270 aabb[4] = -dInfinity;
271 aabb[5] = dInfinity;
273 ax0idx = ( ( axisorder ) & 3 ) << 1;
274 ax1idx = ( ( axisorder >> 2 ) & 3 ) << 1;
275 ax2idx = ( ( axisorder >> 4 ) & 3 ) << 1;
278 dxSAPSpace::~dxSAPSpace()
280 CHECK_NOT_LOCKED(this);
281 if ( cleanup ) {
282 // note that destroying each geom will call remove()
283 for ( ; DirtyList.size(); dGeomDestroy( DirtyList[ 0 ] ) ) {}
284 for ( ; GeomList.size(); dGeomDestroy( GeomList[ 0 ] ) ) {}
286 else {
287 // just unhook them
288 for ( ; DirtyList.size(); remove( DirtyList[ 0 ] ) ) {}
289 for ( ; GeomList.size(); remove( GeomList[ 0 ] ) ) {}
293 dxGeom* dxSAPSpace::getGeom( int i )
295 dUASSERT( i >= 0 && i < count, "index out of range" );
296 int dirtySize = DirtyList.size();
297 if( i < dirtySize )
298 return DirtyList[i];
299 else
300 return GeomList[i-dirtySize];
303 void dxSAPSpace::add( dxGeom* g )
305 CHECK_NOT_LOCKED (this);
306 dAASSERT(g);
307 dUASSERT(g->tome_ex == 0 && g->next_ex == 0, "geom is already in a space");
310 // add to dirty list
311 GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
312 GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
313 DirtyList.push( g );
315 dxSpace::add(g);
318 void dxSAPSpace::remove( dxGeom* g )
320 CHECK_NOT_LOCKED(this);
321 dAASSERT(g);
322 dUASSERT(g->parent_space == this,"object is not in this space");
324 // remove
325 int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
326 int geomIdx = GEOM_GET_GEOM_IDX(g);
327 // must be in one list, not in both
328 dUASSERT(
329 (dirtyIdx==GEOM_INVALID_IDX && geomIdx>=0 && geomIdx<GeomList.size()) ||
330 (geomIdx==GEOM_INVALID_IDX && dirtyIdx>=0 && dirtyIdx<DirtyList.size()),
331 "geom indices messed up" );
332 if( dirtyIdx != GEOM_INVALID_IDX ) {
333 // we're in dirty list, remove
334 int dirtySize = DirtyList.size();
335 if (dirtyIdx != dirtySize-1) {
336 dxGeom* lastG = DirtyList[dirtySize-1];
337 DirtyList[dirtyIdx] = lastG;
338 GEOM_SET_DIRTY_IDX(lastG,dirtyIdx);
340 GEOM_SET_DIRTY_IDX(g,GEOM_INVALID_IDX);
341 DirtyList.setSize( dirtySize-1 );
342 } else {
343 // we're in geom list, remove
344 int geomSize = GeomList.size();
345 if (geomIdx != geomSize-1) {
346 dxGeom* lastG = GeomList[geomSize-1];
347 GeomList[geomIdx] = lastG;
348 GEOM_SET_GEOM_IDX(lastG,geomIdx);
350 GEOM_SET_GEOM_IDX(g,GEOM_INVALID_IDX);
351 GeomList.setSize( geomSize-1 );
354 dxSpace::remove(g);
357 void dxSAPSpace::dirty( dxGeom* g )
359 dAASSERT(g);
360 dUASSERT(g->parent_space == this, "object is not in this space");
362 // check if already dirtied
363 int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
364 if( dirtyIdx != GEOM_INVALID_IDX )
365 return;
367 int geomIdx = GEOM_GET_GEOM_IDX(g);
368 dUASSERT( geomIdx>=0 && geomIdx<GeomList.size(), "geom indices messed up" );
370 // remove from geom list, place last in place of this
371 int geomSize = GeomList.size();
372 if (geomIdx != geomSize-1) {
373 dxGeom* lastG = GeomList[geomSize-1];
374 GeomList[geomIdx] = lastG;
375 GEOM_SET_GEOM_IDX(lastG,geomIdx);
377 GeomList.setSize( geomSize-1 );
379 // add to dirty list
380 GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
381 GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
382 DirtyList.push( g );
385 void dxSAPSpace::computeAABB()
387 // TODO?
390 void dxSAPSpace::cleanGeoms()
392 int dirtySize = DirtyList.size();
393 if( !dirtySize )
394 return;
396 // compute the AABBs of all dirty geoms, clear the dirty flags,
397 // remove from dirty list, place into geom list
398 lock_count++;
400 int geomSize = GeomList.size();
401 GeomList.setSize( geomSize + dirtySize ); // ensure space in geom list
403 for( int i = 0; i < dirtySize; ++i ) {
404 dxGeom* g = DirtyList[i];
405 if( IS_SPACE(g) ) {
406 ((dxSpace*)g)->cleanGeoms();
409 g->recomputeAABB();
410 dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
412 g->gflags &= ~GEOM_DIRTY;
414 // remove from dirty list, add to geom list
415 GEOM_SET_DIRTY_IDX( g, GEOM_INVALID_IDX );
416 GEOM_SET_GEOM_IDX( g, geomSize + i );
417 GeomList[geomSize+i] = g;
419 // clear dirty list
420 DirtyList.setSize( 0 );
422 lock_count--;
425 void dxSAPSpace::collide( void *data, dNearCallback *callback )
427 dAASSERT (callback);
429 lock_count++;
431 cleanGeoms();
433 // by now all geoms are in GeomList, and DirtyList must be empty
434 int geom_count = GeomList.size();
435 dUASSERT( geom_count == count, "geom counts messed up" );
437 // separate all ENABLED geoms into infinite AABBs and normal AABBs
438 TmpGeomList.setSize(0);
439 TmpInfGeomList.setSize(0);
440 int axis0max = ax0idx + 1;
441 for( int i = 0; i < geom_count; ++i ) {
442 dxGeom* g = GeomList[i];
443 if( !GEOM_ENABLED(g) ) // skip disabled ones
444 continue;
445 const dReal& amax = g->aabb[axis0max];
446 if( amax == dInfinity ) // HACK? probably not...
447 TmpInfGeomList.push( g );
448 else
449 TmpGeomList.push( g );
452 // do SAP on normal AABBs
453 dArray< Pair > overlapBoxes;
454 int tmp_geom_count = TmpGeomList.size();
455 if ( tmp_geom_count > 0 )
457 // Generate a list of overlapping boxes
458 BoxPruning( tmp_geom_count, (const dxGeom**)TmpGeomList.data(), overlapBoxes );
461 // collide overlapping
462 int overlapCount = overlapBoxes.size();
463 for( int j = 0; j < overlapCount; ++j )
465 const Pair& pair = overlapBoxes[ j ];
466 dxGeom* g1 = TmpGeomList[ pair.id0 ];
467 dxGeom* g2 = TmpGeomList[ pair.id1 ];
468 collideGeomsNoAABBs( g1, g2, data, callback );
471 int infSize = TmpInfGeomList.size();
472 int normSize = TmpGeomList.size();
473 int m, n;
475 for ( m = 0; m < infSize; ++m )
477 dxGeom* g1 = TmpInfGeomList[ m ];
479 // collide infinite ones
480 for( n = m+1; n < infSize; ++n ) {
481 dxGeom* g2 = TmpInfGeomList[n];
482 collideGeomsNoAABBs( g1, g2, data, callback );
485 // collide infinite ones with normal ones
486 for( n = 0; n < normSize; ++n ) {
487 dxGeom* g2 = TmpGeomList[n];
488 collideGeomsNoAABBs( g1, g2, data, callback );
492 lock_count--;
495 void dxSAPSpace::collide2( void *data, dxGeom *geom, dNearCallback *callback )
497 dAASSERT (geom && callback);
499 // TODO: This is just a simple N^2 implementation
501 lock_count++;
503 cleanGeoms();
504 geom->recomputeAABB();
506 // intersect bounding boxes
507 int geom_count = GeomList.size();
508 for ( int i = 0; i < geom_count; ++i ) {
509 dxGeom* g = GeomList[i];
510 if ( GEOM_ENABLED(g) )
511 collideAABBs (g,geom,data,callback);
514 lock_count--;
518 void dxSAPSpace::BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs )
520 // Size the poslist (+1 for infinity end cap)
521 poslist.setSize( count );
523 // 1) Build main list using the primary axis
524 // NOTE: uses floats instead of dReals because that's what radix sort wants
525 for( int i = 0; i < count; ++i )
526 poslist[ i ] = (float)TmpGeomList[i]->aabb[ ax0idx ];
528 // 2) Sort the list
529 const uint32* Sorted = sortContext.RadixSort( poslist.data(), count );
531 // 3) Prune the list
532 const uint32* const LastSorted = Sorted + count;
533 const uint32* RunningAddress = Sorted;
535 bool bExitLoop;
536 Pair IndexPair;
537 while ( Sorted < LastSorted )
539 IndexPair.id0 = *Sorted++;
541 // empty, this loop just advances RunningAddress
542 for (bExitLoop = false; poslist[*RunningAddress++] < poslist[IndexPair.id0]; )
544 if (RunningAddress == LastSorted)
546 bExitLoop = true;
547 break;
551 if ( bExitLoop || RunningAddress == LastSorted) // Not a bug!!!
553 break;
556 const float idx0ax0max = (float)geoms[IndexPair.id0]->aabb[ax0idx+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning
557 const dReal idx0ax1max = geoms[IndexPair.id0]->aabb[ax1idx+1];
558 const dReal idx0ax2max = geoms[IndexPair.id0]->aabb[ax2idx+1];
560 for (const uint32* RunningAddress2 = RunningAddress; poslist[ IndexPair.id1 = *RunningAddress2++ ] <= idx0ax0max; )
562 const dReal* aabb0 = geoms[ IndexPair.id0 ]->aabb;
563 const dReal* aabb1 = geoms[ IndexPair.id1 ]->aabb;
565 // Intersection?
566 if ( idx0ax1max >= aabb1[ax1idx] && aabb1[ax1idx+1] >= aabb0[ax1idx]
567 && idx0ax2max >= aabb1[ax2idx] && aabb1[ax2idx+1] >= aabb0[ax2idx] )
569 pairs.push( IndexPair );
572 if (RunningAddress2 == LastSorted)
574 break;
578 } // while ( RunningAddress < LastSorted && Sorted < LastSorted )
582 //==============================================================================
584 //------------------------------------------------------------------------------
585 // Radix Sort
586 //------------------------------------------------------------------------------
590 #define CHECK_PASS_VALIDITY(pass) \
591 /* Shortcut to current counters */ \
592 const uint32* CurCount = &mHistogram[pass<<8]; \
594 /* Reset flag. The sorting pass is supposed to be performed. (default) */ \
595 bool PerformPass = true; \
597 /* Check pass validity */ \
599 /* If all values have the same byte, sorting is useless. */ \
600 /* It may happen when sorting bytes or words instead of dwords. */ \
601 /* This routine actually sorts words faster than dwords, and bytes */ \
602 /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
603 /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
605 /* Get first byte */ \
606 uint8 UniqueVal = *(((const uint8*)input)+pass); \
608 /* Check that byte's counter */ \
609 if(CurCount[UniqueVal]==nb) PerformPass=false;
611 // WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
612 const uint32* RaixSortContext::RadixSort( const float* input2, uint32 nb )
614 union _type_cast_union
616 _type_cast_union(const float *floats): asFloats(floats) {}
617 _type_cast_union(const uint32 *uints32): asUInts32(uints32) {}
619 const float *asFloats;
620 const uint32 *asUInts32;
621 const uint8 *asUInts8;
624 const uint32* input = _type_cast_union(input2).asUInts32;
626 // Resize lists if needed
627 ReallocateRanksIfNecessary(nb);
629 // Allocate histograms & offsets on the stack
630 uint32 mHistogram[256*4];
631 uint32* mLink[256];
633 // Create histograms (counters). Counters for all passes are created in one run.
634 // Pros: read input buffer once instead of four times
635 // Cons: mHistogram is 4Kb instead of 1Kb
636 // Floating-point values are always supposed to be signed values, so there's only one code path there.
637 // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
638 // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
639 // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
640 // wouldn't work with mixed positive/negative values....
642 /* Clear counters/histograms */
643 memset(mHistogram, 0, 256*4*sizeof(uint32));
645 /* Prepare to count */
646 const uint8* p = _type_cast_union(input).asUInts8;
647 const uint8* pe = &p[nb*4];
648 uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */
649 uint32* h1= &mHistogram[256]; /* Histogram for second pass */
650 uint32* h2= &mHistogram[512]; /* Histogram for third pass */
651 uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */
653 bool AlreadySorted = true; /* Optimism... */
655 if (!AreRanksValid())
657 /* Prepare for temporal coherence */
658 const float* Running = input2;
659 float PrevVal = *Running;
661 while(p!=pe)
663 /* Read input input2 in previous sorted order */
664 float Val = *Running++;
665 /* Check whether already sorted or not */
666 if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */
667 /* Update for next iteration */
668 PrevVal = Val;
670 /* Create histograms */
671 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
674 /* If all input values are already sorted, we just have to return and leave the */
675 /* previous list unchanged. That way the routine may take advantage of temporal */
676 /* coherence, for example when used to sort transparent faces. */
677 if(AlreadySorted)
679 uint32* const Ranks1 = GetRanks1();
680 for(uint32 i=0;i<nb;i++) Ranks1[i] = i;
681 return Ranks1;
684 else
686 /* Prepare for temporal coherence */
687 uint32* const Ranks1 = GetRanks1();
689 uint32* Indices = Ranks1;
690 float PrevVal = input2[*Indices];
692 while(p!=pe)
694 /* Read input input2 in previous sorted order */
695 float Val = input2[*Indices++];
696 /* Check whether already sorted or not */
697 if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */
698 /* Update for next iteration */
699 PrevVal = Val;
701 /* Create histograms */
702 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
705 /* If all input values are already sorted, we just have to return and leave the */
706 /* previous list unchanged. That way the routine may take advantage of temporal */
707 /* coherence, for example when used to sort transparent faces. */
708 if(AlreadySorted) { return Ranks1; }
711 /* Else there has been an early out and we must finish computing the histograms */
712 while(p!=pe)
714 /* Create histograms without the previous overhead */
715 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
719 // Compute #negative values involved if needed
720 uint32 NbNegativeValues = 0;
722 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
723 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
724 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
725 uint32* h3= &mHistogram[768];
726 for(uint32 i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
728 // Radix sort, j is the pass number (0=LSB, 3=MSB)
729 for(uint32 j=0;j<4;j++)
731 // Should we care about negative values?
732 if(j!=3)
734 // Here we deal with positive values only
735 CHECK_PASS_VALIDITY(j);
737 if(PerformPass)
739 uint32* const Ranks2 = GetRanks2();
740 // Create offsets
741 mLink[0] = Ranks2;
742 for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
744 // Perform Radix Sort
745 const uint8* InputBytes = _type_cast_union(input).asUInts8;
746 InputBytes += j;
747 if (!AreRanksValid())
749 for(uint32 i=0;i<nb;i++)
751 *mLink[InputBytes[i<<2]]++ = i;
754 ValidateRanks();
756 else
758 uint32* const Ranks1 = GetRanks1();
760 uint32* Indices = Ranks1;
761 uint32* const IndicesEnd = Ranks1 + nb;
762 while(Indices!=IndicesEnd)
764 uint32 id = *Indices++;
765 *mLink[InputBytes[id<<2]]++ = id;
769 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
770 SwapRanks();
773 else
775 // This is a special case to correctly handle negative values
776 CHECK_PASS_VALIDITY(j);
778 if(PerformPass)
780 uint32* const Ranks2 = GetRanks2();
782 // Create biased offsets, in order for negative numbers to be sorted as well
783 mLink[0] = Ranks2 + NbNegativeValues; // First positive number takes place after the negative ones
784 for(uint32 i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
786 // We must reverse the sorting order for negative numbers!
787 mLink[255] = Ranks2;
788 for(uint32 i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
789 for(uint32 i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values
791 // Perform Radix Sort
792 if (!AreRanksValid())
794 for(uint32 i=0;i<nb;i++)
796 uint32 Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (uint32).
797 // ### cmp to be killed. Not good. Later.
798 if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above
799 else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order
802 ValidateRanks();
804 else
806 uint32* const Ranks1 = GetRanks1();
808 for(uint32 i=0;i<nb;i++)
810 uint32 Radix = input[Ranks1[i]]>>24; // Radix byte, same as above. AND is useless here (uint32).
811 // ### cmp to be killed. Not good. Later.
812 if(Radix<128) *mLink[Radix]++ = Ranks1[i]; // Number is positive, same as above
813 else *(--mLink[Radix]) = Ranks1[i]; // Number is negative, flip the sorting order
816 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
817 SwapRanks();
819 else
821 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
822 if(UniqueVal>=128)
824 if (!AreRanksValid())
826 uint32* const Ranks2 = GetRanks2();
827 // ###Possible?
828 for(uint32 i=0;i<nb;i++)
830 Ranks2[i] = nb-i-1;
833 ValidateRanks();
835 else
837 uint32* const Ranks1 = GetRanks1();
838 uint32* const Ranks2 = GetRanks2();
839 for(uint32 i=0;i<nb;i++) Ranks2[i] = Ranks1[nb-i-1];
842 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
843 SwapRanks();
849 // Return indices
850 uint32* const Ranks1 = GetRanks1();
851 return Ranks1;