Add a TriMesh to TriMesh collision demo.
[ode.git] / ode / src / collision_sapspace.cpp
blob42b57bd7d65b070d9dc945fdff83f0e1a1c6d5b2
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
24 * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
25 * Additional work by David Walters
26 * Original code:
27 * OPCODE - Optimized Collision Detection
28 * Copyright (C) 2001 Pierre Terdiman
29 * Homepage: http://www.codercorner.com/Opcode.htm
31 * This version does complete radix sort, not "classical" SAP. So, we
32 * have no temporal coherence, but are able to handle any movement
33 * velocities equally well.
36 #include <ode/common.h>
37 #include <ode/collision_space.h>
38 #include <ode/collision.h>
40 #include "config.h"
41 #include "matrix.h"
42 #include "collision_kernel.h"
43 #include "collision_space_internal.h"
45 // Reference counting helper for radix sort global data.
46 //static void RadixSortRef();
47 //static void RadixSortDeref();
50 // --------------------------------------------------------------------------
51 // Radix Sort Context
52 // --------------------------------------------------------------------------
54 struct RaixSortContext
56 public:
57 RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL), mPrimaryRanks(NULL) {}
58 ~RaixSortContext() { FreeRanks(); }
60 // OPCODE's Radix Sorting, returns a list of indices in sorted order
61 const uint32* RadixSort( const float* input2, uint32 nb );
63 private:
64 void FreeRanks();
65 void AllocateRanks(sizeint nNewSize);
67 void ReallocateRanksIfNecessary(sizeint nNewSize);
69 private:
70 void SetCurrentSize(sizeint nValue) { mCurrentSize = nValue; }
71 sizeint GetCurrentSize() const { return mCurrentSize; }
73 void SetCurrentUtilization(sizeint nValue) { mCurrentUtilization = nValue; }
74 sizeint GetCurrentUtilization() const { return mCurrentUtilization; }
76 uint32 *GetRanks1() const { return mPrimaryRanks; }
77 uint32 *GetRanks2() const { return mRanksBuffer + ((mRanksBuffer + mCurrentSize) - mPrimaryRanks); }
78 void SwapRanks() { mPrimaryRanks = GetRanks2(); }
80 bool AreRanksValid() const { return mRanksValid; }
81 void InvalidateRanks() { mRanksValid = false; }
82 void ValidateRanks() { mRanksValid = true; }
84 private:
85 sizeint mCurrentSize; //!< Current size of the indices list
86 sizeint mCurrentUtilization; //!< Current utilization of the indices list
87 bool mRanksValid;
88 uint32* mRanksBuffer; //!< Two lists allocated sequentially in a single block
89 uint32* mPrimaryRanks;
92 void RaixSortContext::AllocateRanks(sizeint nNewSize)
94 dIASSERT(GetCurrentSize() == 0);
96 mRanksBuffer = new uint32[2 * nNewSize];
97 mPrimaryRanks = mRanksBuffer;
99 SetCurrentSize(nNewSize);
102 void RaixSortContext::FreeRanks()
104 SetCurrentSize(0);
106 delete[] mRanksBuffer;
109 void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize)
111 sizeint nCurUtilization = GetCurrentUtilization();
113 if (nNewSize != nCurUtilization)
115 sizeint nCurSize = GetCurrentSize();
117 if ( nNewSize > nCurSize )
119 // Free previously used ram
120 FreeRanks();
122 // Get some fresh one
123 AllocateRanks(nNewSize);
126 InvalidateRanks();
127 SetCurrentUtilization(nNewSize);
131 // --------------------------------------------------------------------------
132 // SAP space code
133 // --------------------------------------------------------------------------
135 struct dxSAPSpace : public dxSpace
137 // Constructor / Destructor
138 dxSAPSpace( dSpaceID _space, int sortaxis );
139 ~dxSAPSpace();
141 // dxSpace
142 virtual dxGeom* getGeom(int i);
143 virtual void add(dxGeom* g);
144 virtual void remove(dxGeom* g);
145 virtual void dirty(dxGeom* g);
146 virtual void computeAABB();
147 virtual void cleanGeoms();
148 virtual void collide( void *data, dNearCallback *callback );
149 virtual void collide2( void *data, dxGeom *geom, dNearCallback *callback );
151 private:
153 //--------------------------------------------------------------------------
154 // Local Declarations
155 //--------------------------------------------------------------------------
157 //! A generic couple structure
158 struct Pair
160 uint32 id0; //!< First index of the pair
161 uint32 id1; //!< Second index of the pair
163 // Default and Value Constructor
164 Pair() {}
165 Pair( uint32 i0, uint32 i1 ) : id0( i0 ), id1( i1 ) {}
168 //--------------------------------------------------------------------------
169 // Helpers
170 //--------------------------------------------------------------------------
173 * Complete box pruning.
174 * Returns a list of overlapping pairs of boxes, each box of the pair
175 * belongs to the same set.
177 * @param count [in] number of boxes.
178 * @param geoms [in] geoms of boxes.
179 * @param pairs [out] array of overlapping pairs.
181 void BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs );
184 //--------------------------------------------------------------------------
185 // Implementation Data
186 //--------------------------------------------------------------------------
188 // We have two lists (arrays of pointers) to dirty and clean
189 // geoms. Each geom knows it's index into the corresponding list
190 // (see macros above).
191 dArray<dxGeom*> DirtyList; // dirty geoms
192 dArray<dxGeom*> GeomList; // clean geoms
194 // For SAP, we ultimately separate "normal" geoms and the ones that have
195 // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
196 // infinite geoms anyway).
197 dArray<dxGeom*> TmpGeomList; // temporary for normal geoms
198 dArray<dxGeom*> TmpInfGeomList; // temporary for geoms with infinite AABBs
200 // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
201 // Axis indices into geom's aabb are: min=idx, max=idx+1
202 uint32 ax0idx;
203 uint32 ax1idx;
204 uint32 ax2idx;
206 // pruning position array scratch pad
207 // NOTE: this is float not dReal because of the OPCODE radix sorter
208 dArray< float > poslist;
209 RaixSortContext sortContext;
212 // Creation
213 dSpaceID dSweepAndPruneSpaceCreate( dxSpace* space, int axisorder ) {
214 return new dxSAPSpace( space, axisorder );
218 //==============================================================================
220 #define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
222 // HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists.
223 #define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); }
224 #define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); }
225 #define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex)
226 #define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex)
227 #define GEOM_INVALID_IDX (-1)
231 * A bit of repetitive work - similar to collideAABBs, but doesn't check
232 * if AABBs intersect (because SAP returns pairs with overlapping AABBs).
234 static void collideGeomsNoAABBs( dxGeom *g1, dxGeom *g2, void *data, dNearCallback *callback )
236 dIASSERT( (g1->gflags & GEOM_AABB_BAD)==0 );
237 dIASSERT( (g2->gflags & GEOM_AABB_BAD)==0 );
239 // no contacts if both geoms on the same body, and the body is not 0
240 if (g1->body == g2->body && g1->body) return;
242 // test if the category and collide bitfields match
243 if ( ((g1->category_bits & g2->collide_bits) ||
244 (g2->category_bits & g1->collide_bits)) == 0) {
245 return;
248 dReal *bounds1 = g1->aabb;
249 dReal *bounds2 = g2->aabb;
251 // check if either object is able to prove that it doesn't intersect the
252 // AABB of the other
253 if (g1->AABBTest (g2,bounds2) == 0) return;
254 if (g2->AABBTest (g1,bounds1) == 0) return;
256 // the objects might actually intersect - call the space callback function
257 callback (data,g1,g2);
261 dxSAPSpace::dxSAPSpace( dSpaceID _space, int axisorder ) : dxSpace( _space )
263 type = dSweepAndPruneSpaceClass;
265 // Init AABB to infinity
266 aabb[0] = -dInfinity;
267 aabb[1] = dInfinity;
268 aabb[2] = -dInfinity;
269 aabb[3] = dInfinity;
270 aabb[4] = -dInfinity;
271 aabb[5] = dInfinity;
273 ax0idx = ( ( axisorder ) & 3 ) << 1;
274 ax1idx = ( ( axisorder >> 2 ) & 3 ) << 1;
275 ax2idx = ( ( axisorder >> 4 ) & 3 ) << 1;
278 dxSAPSpace::~dxSAPSpace()
280 CHECK_NOT_LOCKED(this);
281 if ( cleanup ) {
282 // note that destroying each geom will call remove()
283 for ( ; DirtyList.size(); dGeomDestroy( DirtyList[ 0 ] ) ) {}
284 for ( ; GeomList.size(); dGeomDestroy( GeomList[ 0 ] ) ) {}
286 else {
287 // just unhook them
288 for ( ; DirtyList.size(); remove( DirtyList[ 0 ] ) ) {}
289 for ( ; GeomList.size(); remove( GeomList[ 0 ] ) ) {}
293 dxGeom* dxSAPSpace::getGeom( int i )
295 dUASSERT( i >= 0 && i < count, "index out of range" );
296 int dirtySize = DirtyList.size();
297 if( i < dirtySize )
298 return DirtyList[i];
299 else
300 return GeomList[i-dirtySize];
303 void dxSAPSpace::add( dxGeom* g )
305 CHECK_NOT_LOCKED (this);
306 dAASSERT(g);
307 dUASSERT(g->tome_ex == 0 && g->next_ex == 0, "geom is already in a space");
310 // add to dirty list
311 GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
312 GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
313 DirtyList.push( g );
315 dxSpace::add(g);
318 void dxSAPSpace::remove( dxGeom* g )
320 CHECK_NOT_LOCKED(this);
321 dAASSERT(g);
322 dUASSERT(g->parent_space == this,"object is not in this space");
324 // remove
325 int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
326 int geomIdx = GEOM_GET_GEOM_IDX(g);
327 // must be in one list, not in both
328 dUASSERT(
329 (dirtyIdx==GEOM_INVALID_IDX && geomIdx>=0 && geomIdx<GeomList.size()) ||
330 (geomIdx==GEOM_INVALID_IDX && dirtyIdx>=0 && dirtyIdx<DirtyList.size()),
331 "geom indices messed up" );
332 if( dirtyIdx != GEOM_INVALID_IDX ) {
333 // we're in dirty list, remove
334 int dirtySize = DirtyList.size();
335 if (dirtyIdx != dirtySize-1) {
336 dxGeom* lastG = DirtyList[dirtySize-1];
337 DirtyList[dirtyIdx] = lastG;
338 GEOM_SET_DIRTY_IDX(lastG,dirtyIdx);
340 GEOM_SET_DIRTY_IDX(g,GEOM_INVALID_IDX);
341 DirtyList.setSize( dirtySize-1 );
342 } else {
343 // we're in geom list, remove
344 int geomSize = GeomList.size();
345 if (geomIdx != geomSize-1) {
346 dxGeom* lastG = GeomList[geomSize-1];
347 GeomList[geomIdx] = lastG;
348 GEOM_SET_GEOM_IDX(lastG,geomIdx);
350 GEOM_SET_GEOM_IDX(g,GEOM_INVALID_IDX);
351 GeomList.setSize( geomSize-1 );
354 g->tome_ex = 0;
355 dUASSERT((g->next_ex = 0, true), "Needed for an assertion check only");
357 dxSpace::remove(g);
360 void dxSAPSpace::dirty( dxGeom* g )
362 dAASSERT(g);
363 dUASSERT(g->parent_space == this, "object is not in this space");
365 // check if already dirtied
366 int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
367 if( dirtyIdx != GEOM_INVALID_IDX )
368 return;
370 int geomIdx = GEOM_GET_GEOM_IDX(g);
371 dUASSERT( geomIdx>=0 && geomIdx<GeomList.size(), "geom indices messed up" );
373 // remove from geom list, place last in place of this
374 int geomSize = GeomList.size();
375 if (geomIdx != geomSize-1) {
376 dxGeom* lastG = GeomList[geomSize-1];
377 GeomList[geomIdx] = lastG;
378 GEOM_SET_GEOM_IDX(lastG,geomIdx);
380 GeomList.setSize( geomSize-1 );
382 // add to dirty list
383 GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
384 GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
385 DirtyList.push( g );
388 void dxSAPSpace::computeAABB()
390 // TODO?
393 void dxSAPSpace::cleanGeoms()
395 int dirtySize = DirtyList.size();
396 if( !dirtySize )
397 return;
399 // compute the AABBs of all dirty geoms, clear the dirty flags,
400 // remove from dirty list, place into geom list
401 lock_count++;
403 int geomSize = GeomList.size();
404 GeomList.setSize( geomSize + dirtySize ); // ensure space in geom list
406 for( int i = 0; i < dirtySize; ++i ) {
407 dxGeom* g = DirtyList[i];
408 if( IS_SPACE(g) ) {
409 ((dxSpace*)g)->cleanGeoms();
412 g->recomputeAABB();
413 dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
415 g->gflags &= ~GEOM_DIRTY;
417 // remove from dirty list, add to geom list
418 GEOM_SET_DIRTY_IDX( g, GEOM_INVALID_IDX );
419 GEOM_SET_GEOM_IDX( g, geomSize + i );
420 GeomList[geomSize+i] = g;
422 // clear dirty list
423 DirtyList.setSize( 0 );
425 lock_count--;
428 void dxSAPSpace::collide( void *data, dNearCallback *callback )
430 dAASSERT (callback);
432 lock_count++;
434 cleanGeoms();
436 // by now all geoms are in GeomList, and DirtyList must be empty
437 int geom_count = GeomList.size();
438 dUASSERT( geom_count == count, "geom counts messed up" );
440 // separate all ENABLED geoms into infinite AABBs and normal AABBs
441 TmpGeomList.setSize(0);
442 TmpInfGeomList.setSize(0);
443 int axis0max = ax0idx + 1;
444 for( int i = 0; i < geom_count; ++i ) {
445 dxGeom* g = GeomList[i];
446 if( !GEOM_ENABLED(g) ) // skip disabled ones
447 continue;
448 const dReal& amax = g->aabb[axis0max];
449 if( amax == dInfinity ) // HACK? probably not...
450 TmpInfGeomList.push( g );
451 else
452 TmpGeomList.push( g );
455 // do SAP on normal AABBs
456 dArray< Pair > overlapBoxes;
457 int tmp_geom_count = TmpGeomList.size();
458 if ( tmp_geom_count > 0 )
460 // Generate a list of overlapping boxes
461 BoxPruning( tmp_geom_count, (const dxGeom**)TmpGeomList.data(), overlapBoxes );
464 // collide overlapping
465 int overlapCount = overlapBoxes.size();
466 for( int j = 0; j < overlapCount; ++j )
468 const Pair& pair = overlapBoxes[ j ];
469 dxGeom* g1 = TmpGeomList[ pair.id0 ];
470 dxGeom* g2 = TmpGeomList[ pair.id1 ];
471 collideGeomsNoAABBs( g1, g2, data, callback );
474 int infSize = TmpInfGeomList.size();
475 int normSize = TmpGeomList.size();
476 int m, n;
478 for ( m = 0; m < infSize; ++m )
480 dxGeom* g1 = TmpInfGeomList[ m ];
482 // collide infinite ones
483 for( n = m+1; n < infSize; ++n ) {
484 dxGeom* g2 = TmpInfGeomList[n];
485 collideGeomsNoAABBs( g1, g2, data, callback );
488 // collide infinite ones with normal ones
489 for( n = 0; n < normSize; ++n ) {
490 dxGeom* g2 = TmpGeomList[n];
491 collideGeomsNoAABBs( g1, g2, data, callback );
495 lock_count--;
498 void dxSAPSpace::collide2( void *data, dxGeom *geom, dNearCallback *callback )
500 dAASSERT (geom && callback);
502 // TODO: This is just a simple N^2 implementation
504 lock_count++;
506 cleanGeoms();
507 geom->recomputeAABB();
509 // intersect bounding boxes
510 int geom_count = GeomList.size();
511 for ( int i = 0; i < geom_count; ++i ) {
512 dxGeom* g = GeomList[i];
513 if ( GEOM_ENABLED(g) )
514 collideAABBs (g,geom,data,callback);
517 lock_count--;
521 void dxSAPSpace::BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs )
523 // Size the poslist (+1 for infinity end cap)
524 poslist.setSize( count );
526 // 1) Build main list using the primary axis
527 // NOTE: uses floats instead of dReals because that's what radix sort wants
528 for( int i = 0; i < count; ++i )
529 poslist[ i ] = (float)TmpGeomList[i]->aabb[ ax0idx ];
531 // 2) Sort the list
532 const uint32* Sorted = sortContext.RadixSort( poslist.data(), count );
534 // 3) Prune the list
535 const uint32* const LastSorted = Sorted + count;
536 const uint32* RunningAddress = Sorted;
538 bool bExitLoop;
539 Pair IndexPair;
540 while ( Sorted < LastSorted )
542 IndexPair.id0 = *Sorted++;
544 // empty, this loop just advances RunningAddress
545 for (bExitLoop = false; poslist[*RunningAddress++] < poslist[IndexPair.id0]; )
547 if (RunningAddress == LastSorted)
549 bExitLoop = true;
550 break;
554 if ( bExitLoop || RunningAddress == LastSorted) // Not a bug!!!
556 break;
559 const float idx0ax0max = (float)geoms[IndexPair.id0]->aabb[ax0idx+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning
560 const dReal idx0ax1max = geoms[IndexPair.id0]->aabb[ax1idx+1];
561 const dReal idx0ax2max = geoms[IndexPair.id0]->aabb[ax2idx+1];
563 for (const uint32* RunningAddress2 = RunningAddress; poslist[ IndexPair.id1 = *RunningAddress2++ ] <= idx0ax0max; )
565 const dReal* aabb0 = geoms[ IndexPair.id0 ]->aabb;
566 const dReal* aabb1 = geoms[ IndexPair.id1 ]->aabb;
568 // Intersection?
569 if ( idx0ax1max >= aabb1[ax1idx] && aabb1[ax1idx+1] >= aabb0[ax1idx]
570 && idx0ax2max >= aabb1[ax2idx] && aabb1[ax2idx+1] >= aabb0[ax2idx] )
572 pairs.push( IndexPair );
575 if (RunningAddress2 == LastSorted)
577 break;
581 } // while ( RunningAddress < LastSorted && Sorted < LastSorted )
585 //==============================================================================
587 //------------------------------------------------------------------------------
588 // Radix Sort
589 //------------------------------------------------------------------------------
593 #define CHECK_PASS_VALIDITY(pass) \
594 /* Shortcut to current counters */ \
595 const uint32* CurCount = &mHistogram[pass<<8]; \
597 /* Reset flag. The sorting pass is supposed to be performed. (default) */ \
598 bool PerformPass = true; \
600 /* Check pass validity */ \
602 /* If all values have the same byte, sorting is useless. */ \
603 /* It may happen when sorting bytes or words instead of dwords. */ \
604 /* This routine actually sorts words faster than dwords, and bytes */ \
605 /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
606 /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
608 /* Get first byte */ \
609 uint8 UniqueVal = *(((const uint8*)input)+pass); \
611 /* Check that byte's counter */ \
612 if(CurCount[UniqueVal]==nb) PerformPass=false;
614 // WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
615 const uint32* RaixSortContext::RadixSort( const float* input2, uint32 nb )
617 union _type_cast_union
619 _type_cast_union(const float *floats): asFloats(floats) {}
620 _type_cast_union(const uint32 *uints32): asUInts32(uints32) {}
622 const float *asFloats;
623 const uint32 *asUInts32;
624 const uint8 *asUInts8;
627 const uint32* input = _type_cast_union(input2).asUInts32;
629 // Resize lists if needed
630 ReallocateRanksIfNecessary(nb);
632 // Allocate histograms & offsets on the stack
633 uint32 mHistogram[256*4];
634 uint32* mLink[256];
636 // Create histograms (counters). Counters for all passes are created in one run.
637 // Pros: read input buffer once instead of four times
638 // Cons: mHistogram is 4Kb instead of 1Kb
639 // Floating-point values are always supposed to be signed values, so there's only one code path there.
640 // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
641 // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
642 // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
643 // wouldn't work with mixed positive/negative values....
645 /* Clear counters/histograms */
646 memset(mHistogram, 0, 256*4*sizeof(uint32));
648 /* Prepare to count */
649 const uint8* p = _type_cast_union(input).asUInts8;
650 const uint8* pe = &p[nb*4];
651 uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */
652 uint32* h1= &mHistogram[256]; /* Histogram for second pass */
653 uint32* h2= &mHistogram[512]; /* Histogram for third pass */
654 uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */
656 bool AlreadySorted = true; /* Optimism... */
658 if (!AreRanksValid())
660 /* Prepare for temporal coherence */
661 const float* Running = input2;
662 float PrevVal = *Running;
664 while(p!=pe)
666 /* Read input input2 in previous sorted order */
667 float Val = *Running++;
668 /* Check whether already sorted or not */
669 if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */
670 /* Update for next iteration */
671 PrevVal = Val;
673 /* Create histograms */
674 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
677 /* If all input values are already sorted, we just have to return and leave the */
678 /* previous list unchanged. That way the routine may take advantage of temporal */
679 /* coherence, for example when used to sort transparent faces. */
680 if(AlreadySorted)
682 uint32* const Ranks1 = GetRanks1();
683 for(uint32 i=0;i<nb;i++) Ranks1[i] = i;
684 return Ranks1;
687 else
689 /* Prepare for temporal coherence */
690 uint32* const Ranks1 = GetRanks1();
692 uint32* Indices = Ranks1;
693 float PrevVal = input2[*Indices];
695 while(p!=pe)
697 /* Read input input2 in previous sorted order */
698 float Val = input2[*Indices++];
699 /* Check whether already sorted or not */
700 if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */
701 /* Update for next iteration */
702 PrevVal = Val;
704 /* Create histograms */
705 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
708 /* If all input values are already sorted, we just have to return and leave the */
709 /* previous list unchanged. That way the routine may take advantage of temporal */
710 /* coherence, for example when used to sort transparent faces. */
711 if(AlreadySorted) { return Ranks1; }
714 /* Else there has been an early out and we must finish computing the histograms */
715 while(p!=pe)
717 /* Create histograms without the previous overhead */
718 h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++;
722 // Compute #negative values involved if needed
723 uint32 NbNegativeValues = 0;
725 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
726 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
727 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
728 uint32* h3= &mHistogram[768];
729 for(uint32 i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
731 // Radix sort, j is the pass number (0=LSB, 3=MSB)
732 for(uint32 j=0;j<4;j++)
734 // Should we care about negative values?
735 if(j!=3)
737 // Here we deal with positive values only
738 CHECK_PASS_VALIDITY(j);
740 if(PerformPass)
742 uint32* const Ranks2 = GetRanks2();
743 // Create offsets
744 mLink[0] = Ranks2;
745 for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
747 // Perform Radix Sort
748 const uint8* InputBytes = _type_cast_union(input).asUInts8;
749 InputBytes += j;
750 if (!AreRanksValid())
752 for(uint32 i=0;i<nb;i++)
754 *mLink[InputBytes[i<<2]]++ = i;
757 ValidateRanks();
759 else
761 uint32* const Ranks1 = GetRanks1();
763 uint32* Indices = Ranks1;
764 uint32* const IndicesEnd = Ranks1 + nb;
765 while(Indices!=IndicesEnd)
767 uint32 id = *Indices++;
768 *mLink[InputBytes[id<<2]]++ = id;
772 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
773 SwapRanks();
776 else
778 // This is a special case to correctly handle negative values
779 CHECK_PASS_VALIDITY(j);
781 if(PerformPass)
783 uint32* const Ranks2 = GetRanks2();
785 // Create biased offsets, in order for negative numbers to be sorted as well
786 mLink[0] = Ranks2 + NbNegativeValues; // First positive number takes place after the negative ones
787 for(uint32 i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
789 // We must reverse the sorting order for negative numbers!
790 mLink[255] = Ranks2;
791 for(uint32 i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
792 for(uint32 i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values
794 // Perform Radix Sort
795 if (!AreRanksValid())
797 for(uint32 i=0;i<nb;i++)
799 uint32 Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (uint32).
800 // ### cmp to be killed. Not good. Later.
801 if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above
802 else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order
805 ValidateRanks();
807 else
809 uint32* const Ranks1 = GetRanks1();
811 for(uint32 i=0;i<nb;i++)
813 uint32 Radix = input[Ranks1[i]]>>24; // Radix byte, same as above. AND is useless here (uint32).
814 // ### cmp to be killed. Not good. Later.
815 if(Radix<128) *mLink[Radix]++ = Ranks1[i]; // Number is positive, same as above
816 else *(--mLink[Radix]) = Ranks1[i]; // Number is negative, flip the sorting order
819 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
820 SwapRanks();
822 else
824 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
825 if(UniqueVal>=128)
827 if (!AreRanksValid())
829 uint32* const Ranks2 = GetRanks2();
830 // ###Possible?
831 for(uint32 i=0;i<nb;i++)
833 Ranks2[i] = nb-i-1;
836 ValidateRanks();
838 else
840 uint32* const Ranks1 = GetRanks1();
841 uint32* const Ranks2 = GetRanks2();
842 for(uint32 i=0;i<nb;i++) Ranks2[i] = Ranks1[nb-i-1];
845 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
846 SwapRanks();
852 // Return indices
853 uint32* const Ranks1 = GetRanks1();
854 return Ranks1;