1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
24 * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
25 * Additional work by David Walters
27 * OPCODE - Optimized Collision Detection
28 * Copyright (C) 2001 Pierre Terdiman
29 * Homepage: http://www.codercorner.com/Opcode.htm
31 * This version does complete radix sort, not "classical" SAP. So, we
32 * have no temporal coherence, but are able to handle any movement
33 * velocities equally well.
36 #include <ode/common.h>
37 #include <ode/matrix.h>
38 #include <ode/collision_space.h>
39 #include <ode/collision.h>
42 #include "collision_kernel.h"
43 #include "collision_space_internal.h"
45 // Reference counting helper for radix sort global data.
46 //static void RadixSortRef();
47 //static void RadixSortDeref();
50 // --------------------------------------------------------------------------
52 // --------------------------------------------------------------------------
54 struct RaixSortContext
57 RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL
), mPrimaryRanks(NULL
) {}
58 ~RaixSortContext() { FreeRanks(); }
60 // OPCODE's Radix Sorting, returns a list of indices in sorted order
61 const uint32
* RadixSort( const float* input2
, uint32 nb
);
65 void AllocateRanks(size_t nNewSize
);
67 void ReallocateRanksIfNecessary(size_t nNewSize
);
70 void SetCurrentSize(size_t nValue
) { mCurrentSize
= nValue
; }
71 size_t GetCurrentSize() const { return mCurrentSize
; }
73 void SetCurrentUtilization(size_t nValue
) { mCurrentUtilization
= nValue
; }
74 size_t GetCurrentUtilization() const { return mCurrentUtilization
; }
76 uint32
*GetRanks1() const { return mPrimaryRanks
; }
77 uint32
*GetRanks2() const { return mRanksBuffer
+ ((mRanksBuffer
+ mCurrentSize
) - mPrimaryRanks
); }
78 void SwapRanks() { mPrimaryRanks
= GetRanks2(); }
80 bool AreRanksValid() const { return mRanksValid
; }
81 void InvalidateRanks() { mRanksValid
= false; }
82 void ValidateRanks() { mRanksValid
= true; }
85 size_t mCurrentSize
; //!< Current size of the indices list
86 size_t mCurrentUtilization
; //!< Current utilization of the indices list
88 uint32
* mRanksBuffer
; //!< Two lists allocated sequentially in a single block
89 uint32
* mPrimaryRanks
;
92 void RaixSortContext::AllocateRanks(size_t nNewSize
)
94 dIASSERT(GetCurrentSize() == 0);
96 mRanksBuffer
= new uint32
[2 * nNewSize
];
97 mPrimaryRanks
= mRanksBuffer
;
99 SetCurrentSize(nNewSize
);
102 void RaixSortContext::FreeRanks()
106 delete[] mRanksBuffer
;
109 void RaixSortContext::ReallocateRanksIfNecessary(size_t nNewSize
)
111 size_t nCurUtilization
= GetCurrentUtilization();
113 if (nNewSize
!= nCurUtilization
)
115 size_t nCurSize
= GetCurrentSize();
117 if ( nNewSize
> nCurSize
)
119 // Free previously used ram
122 // Get some fresh one
123 AllocateRanks(nNewSize
);
127 SetCurrentUtilization(nNewSize
);
131 // --------------------------------------------------------------------------
133 // --------------------------------------------------------------------------
135 struct dxSAPSpace
: public dxSpace
137 // Constructor / Destructor
138 dxSAPSpace( dSpaceID _space
, int sortaxis
);
142 virtual dxGeom
* getGeom(int i
);
143 virtual void add(dxGeom
* g
);
144 virtual void remove(dxGeom
* g
);
145 virtual void dirty(dxGeom
* g
);
146 virtual void computeAABB();
147 virtual void cleanGeoms();
148 virtual void collide( void *data
, dNearCallback
*callback
);
149 virtual void collide2( void *data
, dxGeom
*geom
, dNearCallback
*callback
);
153 //--------------------------------------------------------------------------
154 // Local Declarations
155 //--------------------------------------------------------------------------
157 //! A generic couple structure
160 uint32 id0
; //!< First index of the pair
161 uint32 id1
; //!< Second index of the pair
163 // Default and Value Constructor
165 Pair( uint32 i0
, uint32 i1
) : id0( i0
), id1( i1
) {}
168 //--------------------------------------------------------------------------
170 //--------------------------------------------------------------------------
173 * Complete box pruning.
174 * Returns a list of overlapping pairs of boxes, each box of the pair
175 * belongs to the same set.
177 * @param count [in] number of boxes.
178 * @param geoms [in] geoms of boxes.
179 * @param pairs [out] array of overlapping pairs.
181 void BoxPruning( int count
, const dxGeom
** geoms
, dArray
< Pair
>& pairs
);
184 //--------------------------------------------------------------------------
185 // Implementation Data
186 //--------------------------------------------------------------------------
188 // We have two lists (arrays of pointers) to dirty and clean
189 // geoms. Each geom knows it's index into the corresponding list
190 // (see macros above).
191 dArray
<dxGeom
*> DirtyList
; // dirty geoms
192 dArray
<dxGeom
*> GeomList
; // clean geoms
194 // For SAP, we ultimately separate "normal" geoms and the ones that have
195 // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
196 // infinite geoms anyway).
197 dArray
<dxGeom
*> TmpGeomList
; // temporary for normal geoms
198 dArray
<dxGeom
*> TmpInfGeomList
; // temporary for geoms with infinite AABBs
200 // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
201 // Axis indices into geom's aabb are: min=idx, max=idx+1
206 // pruning position array scratch pad
207 // NOTE: this is float not dReal because of the OPCODE radix sorter
208 dArray
< float > poslist
;
209 RaixSortContext sortContext
;
213 dSpaceID
dSweepAndPruneSpaceCreate( dxSpace
* space
, int axisorder
) {
214 return new dxSAPSpace( space
, axisorder
);
218 //==============================================================================
220 #define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
222 // HACK: We abuse 'next' and 'tome' members of dxGeom to store indice into dirty/geom lists.
223 #define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next = (dxGeom*)(size_t)(idx); }
224 #define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome = (dxGeom**)(size_t)(idx); }
225 #define GEOM_GET_DIRTY_IDX(g) ((int)(size_t)(g)->next)
226 #define GEOM_GET_GEOM_IDX(g) ((int)(size_t)(g)->tome)
227 #define GEOM_INVALID_IDX (-1)
231 * A bit of repetitive work - similar to collideAABBs, but doesn't check
232 * if AABBs intersect (because SAP returns pairs with overlapping AABBs).
234 static void collideGeomsNoAABBs( dxGeom
*g1
, dxGeom
*g2
, void *data
, dNearCallback
*callback
)
236 dIASSERT( (g1
->gflags
& GEOM_AABB_BAD
)==0 );
237 dIASSERT( (g2
->gflags
& GEOM_AABB_BAD
)==0 );
239 // no contacts if both geoms on the same body, and the body is not 0
240 if (g1
->body
== g2
->body
&& g1
->body
) return;
242 // test if the category and collide bitfields match
243 if ( ((g1
->category_bits
& g2
->collide_bits
) ||
244 (g2
->category_bits
& g1
->collide_bits
)) == 0) {
248 dReal
*bounds1
= g1
->aabb
;
249 dReal
*bounds2
= g2
->aabb
;
251 // check if either object is able to prove that it doesn't intersect the
253 if (g1
->AABBTest (g2
,bounds2
) == 0) return;
254 if (g2
->AABBTest (g1
,bounds1
) == 0) return;
256 // the objects might actually intersect - call the space callback function
257 callback (data
,g1
,g2
);
261 dxSAPSpace::dxSAPSpace( dSpaceID _space
, int axisorder
) : dxSpace( _space
)
263 type
= dSweepAndPruneSpaceClass
;
265 // Init AABB to infinity
266 aabb
[0] = -dInfinity
;
268 aabb
[2] = -dInfinity
;
270 aabb
[4] = -dInfinity
;
273 ax0idx
= ( ( axisorder
) & 3 ) << 1;
274 ax1idx
= ( ( axisorder
>> 2 ) & 3 ) << 1;
275 ax2idx
= ( ( axisorder
>> 4 ) & 3 ) << 1;
278 dxSAPSpace::~dxSAPSpace()
280 CHECK_NOT_LOCKED(this);
282 // note that destroying each geom will call remove()
283 for ( ; DirtyList
.size(); dGeomDestroy( DirtyList
[ 0 ] ) ) {}
284 for ( ; GeomList
.size(); dGeomDestroy( GeomList
[ 0 ] ) ) {}
288 for ( ; DirtyList
.size(); remove( DirtyList
[ 0 ] ) ) {}
289 for ( ; GeomList
.size(); remove( GeomList
[ 0 ] ) ) {}
293 dxGeom
* dxSAPSpace::getGeom( int i
)
295 dUASSERT( i
>= 0 && i
< count
, "index out of range" );
296 int dirtySize
= DirtyList
.size();
300 return GeomList
[i
-dirtySize
];
303 void dxSAPSpace::add( dxGeom
* g
)
305 CHECK_NOT_LOCKED (this);
307 dUASSERT(g
->parent_space
== 0 && g
->next
== 0, "geom is already in a space");
309 g
->gflags
|= GEOM_DIRTY
| GEOM_AABB_BAD
;
312 GEOM_SET_DIRTY_IDX( g
, DirtyList
.size() );
313 GEOM_SET_GEOM_IDX( g
, GEOM_INVALID_IDX
);
316 g
->parent_space
= this;
322 void dxSAPSpace::remove( dxGeom
* g
)
324 CHECK_NOT_LOCKED(this);
326 dUASSERT(g
->parent_space
== this,"object is not in this space");
329 int dirtyIdx
= GEOM_GET_DIRTY_IDX(g
);
330 int geomIdx
= GEOM_GET_GEOM_IDX(g
);
331 // must be in one list, not in both
333 (dirtyIdx
==GEOM_INVALID_IDX
&& geomIdx
>=0 && geomIdx
<GeomList
.size()) ||
334 (geomIdx
==GEOM_INVALID_IDX
&& dirtyIdx
>=0 && dirtyIdx
<DirtyList
.size()),
335 "geom indices messed up" );
336 if( dirtyIdx
!= GEOM_INVALID_IDX
) {
337 // we're in dirty list, remove
338 int dirtySize
= DirtyList
.size();
339 dxGeom
* lastG
= DirtyList
[dirtySize
-1];
340 DirtyList
[dirtyIdx
] = lastG
;
341 GEOM_SET_DIRTY_IDX(lastG
,dirtyIdx
);
342 GEOM_SET_DIRTY_IDX(g
,GEOM_INVALID_IDX
);
343 DirtyList
.setSize( dirtySize
-1 );
345 // we're in geom list, remove
346 int geomSize
= GeomList
.size();
347 dxGeom
* lastG
= GeomList
[geomSize
-1];
348 GeomList
[geomIdx
] = lastG
;
349 GEOM_SET_GEOM_IDX(lastG
,geomIdx
);
350 GEOM_SET_GEOM_IDX(g
,GEOM_INVALID_IDX
);
351 GeomList
.setSize( geomSize
-1 );
358 // the bounding box of this space (and that of all the parents) may have
359 // changed as a consequence of the removal.
363 void dxSAPSpace::dirty( dxGeom
* g
)
366 dUASSERT(g
->parent_space
== this,"object is not in this space");
368 // check if already dirtied
369 int dirtyIdx
= GEOM_GET_DIRTY_IDX(g
);
370 if( dirtyIdx
!= GEOM_INVALID_IDX
)
373 int geomIdx
= GEOM_GET_GEOM_IDX(g
);
374 dUASSERT( geomIdx
>=0 && geomIdx
<GeomList
.size(), "geom indices messed up" );
376 // remove from geom list, place last in place of this
377 int geomSize
= GeomList
.size();
378 dxGeom
* lastG
= GeomList
[geomSize
-1];
379 GeomList
[geomIdx
] = lastG
;
380 GEOM_SET_GEOM_IDX(lastG
,geomIdx
);
381 GeomList
.setSize( geomSize
-1 );
384 GEOM_SET_GEOM_IDX( g
, GEOM_INVALID_IDX
);
385 GEOM_SET_DIRTY_IDX( g
, DirtyList
.size() );
389 void dxSAPSpace::computeAABB()
394 void dxSAPSpace::cleanGeoms()
396 int dirtySize
= DirtyList
.size();
400 // compute the AABBs of all dirty geoms, clear the dirty flags,
401 // remove from dirty list, place into geom list
404 int geomSize
= GeomList
.size();
405 GeomList
.setSize( geomSize
+ dirtySize
); // ensure space in geom list
407 for( int i
= 0; i
< dirtySize
; ++i
) {
408 dxGeom
* g
= DirtyList
[i
];
410 ((dxSpace
*)g
)->cleanGeoms();
413 g
->gflags
&= (~(GEOM_DIRTY
|GEOM_AABB_BAD
));
414 // remove from dirty list, add to geom list
415 GEOM_SET_DIRTY_IDX( g
, GEOM_INVALID_IDX
);
416 GEOM_SET_GEOM_IDX( g
, geomSize
+ i
);
417 GeomList
[geomSize
+i
] = g
;
420 DirtyList
.setSize( 0 );
425 void dxSAPSpace::collide( void *data
, dNearCallback
*callback
)
433 // by now all geoms are in GeomList, and DirtyList must be empty
434 int geom_count
= GeomList
.size();
435 dUASSERT( geom_count
== count
, "geom counts messed up" );
437 // separate all ENABLED geoms into infinite AABBs and normal AABBs
438 TmpGeomList
.setSize(0);
439 TmpInfGeomList
.setSize(0);
440 int axis0max
= ax0idx
+ 1;
441 for( int i
= 0; i
< geom_count
; ++i
) {
442 dxGeom
* g
= GeomList
[i
];
443 if( !GEOM_ENABLED(g
) ) // skip disabled ones
445 const dReal
& amax
= g
->aabb
[axis0max
];
446 if( amax
== dInfinity
) // HACK? probably not...
447 TmpInfGeomList
.push( g
);
449 TmpGeomList
.push( g
);
452 // do SAP on normal AABBs
453 dArray
< Pair
> overlapBoxes
;
454 int tmp_geom_count
= TmpGeomList
.size();
455 if ( tmp_geom_count
> 0 )
457 // Size the poslist (+1 for infinity end cap)
458 poslist
.setSize( tmp_geom_count
+ 1 );
460 // Generate a list of overlapping boxes
461 BoxPruning( tmp_geom_count
, (const dxGeom
**)TmpGeomList
.data(), overlapBoxes
);
464 // collide overlapping
465 int overlapCount
= overlapBoxes
.size();
466 for( int j
= 0; j
< overlapCount
; ++j
)
468 const Pair
& pair
= overlapBoxes
[ j
];
469 dxGeom
* g1
= TmpGeomList
[ pair
.id0
];
470 dxGeom
* g2
= TmpGeomList
[ pair
.id1
];
471 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
474 int infSize
= TmpInfGeomList
.size();
475 int normSize
= TmpGeomList
.size();
478 for ( m
= 0; m
< infSize
; ++m
)
480 dxGeom
* g1
= TmpInfGeomList
[ m
];
482 // collide infinite ones
483 for( n
= m
+1; n
< infSize
; ++n
) {
484 dxGeom
* g2
= TmpInfGeomList
[n
];
485 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
488 // collide infinite ones with normal ones
489 for( n
= 0; n
< normSize
; ++n
) {
490 dxGeom
* g2
= TmpGeomList
[n
];
491 collideGeomsNoAABBs( g1
, g2
, data
, callback
);
498 void dxSAPSpace::collide2( void *data
, dxGeom
*geom
, dNearCallback
*callback
)
500 dAASSERT (geom
&& callback
);
502 // TODO: This is just a simple N^2 implementation
507 geom
->recomputeAABB();
509 // intersect bounding boxes
510 int geom_count
= GeomList
.size();
511 for ( int i
= 0; i
< geom_count
; ++i
) {
512 dxGeom
* g
= GeomList
[i
];
513 if ( GEOM_ENABLED(g
) )
514 collideAABBs (g
,geom
,data
,callback
);
521 void dxSAPSpace::BoxPruning( int count
, const dxGeom
** geoms
, dArray
< Pair
>& pairs
)
523 // 1) Build main list using the primary axis
524 // NOTE: uses floats instead of dReals because that's what radix sort wants
525 for( int i
= 0; i
< count
; ++i
)
526 poslist
[ i
] = (float)TmpGeomList
[i
]->aabb
[ ax0idx
];
527 poslist
[ count
++ ] = FLT_MAX
;
530 const uint32
* Sorted
= sortContext
.RadixSort( poslist
.data(), count
);
533 const uint32
* const LastSorted
= Sorted
+ count
;
534 const uint32
* RunningAddress
= Sorted
;
537 while ( RunningAddress
< LastSorted
&& Sorted
< LastSorted
)
539 IndexPair
.id0
= *Sorted
++;
541 // empty, this loop just advances RunningAddress
542 while ( poslist
[*RunningAddress
++] < poslist
[IndexPair
.id0
] ) {}
544 if ( RunningAddress
< LastSorted
)
546 const uint32
* RunningAddress2
= RunningAddress
;
548 const dReal idx0ax0max
= geoms
[IndexPair
.id0
]->aabb
[ax0idx
+1];
549 const dReal idx0ax1max
= geoms
[IndexPair
.id0
]->aabb
[ax1idx
+1];
550 const dReal idx0ax2max
= geoms
[IndexPair
.id0
]->aabb
[ax2idx
+1];
552 while ( poslist
[ IndexPair
.id1
= *RunningAddress2
++ ] <= idx0ax0max
)
554 const dReal
* aabb0
= geoms
[ IndexPair
.id0
]->aabb
;
555 const dReal
* aabb1
= geoms
[ IndexPair
.id1
]->aabb
;
558 if ( idx0ax1max
>= aabb1
[ax1idx
] && aabb1
[ax1idx
+1] >= aabb0
[ax1idx
] )
559 if ( idx0ax2max
>= aabb1
[ax2idx
] && aabb1
[ax2idx
+1] >= aabb0
[ax2idx
] )
561 pairs
.push( IndexPair
);
566 }; // while ( RunningAddress < LastSorted && Sorted < LastSorted )
570 //==============================================================================
572 //------------------------------------------------------------------------------
574 //------------------------------------------------------------------------------
578 #define CHECK_PASS_VALIDITY(pass) \
579 /* Shortcut to current counters */ \
580 uint32* CurCount = &mHistogram[pass<<8]; \
582 /* Reset flag. The sorting pass is supposed to be performed. (default) */ \
583 bool PerformPass = true; \
585 /* Check pass validity */ \
587 /* If all values have the same byte, sorting is useless. */ \
588 /* It may happen when sorting bytes or words instead of dwords. */ \
589 /* This routine actually sorts words faster than dwords, and bytes */ \
590 /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
591 /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
593 /* Get first byte */ \
594 uint8 UniqueVal = *(((uint8*)input)+pass); \
596 /* Check that byte's counter */ \
597 if(CurCount[UniqueVal]==nb) PerformPass=false;
599 // WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
600 const uint32
* RaixSortContext::RadixSort( const float* input2
, uint32 nb
)
602 uint32
* input
= (uint32
*)input2
;
604 // Resize lists if needed
605 ReallocateRanksIfNecessary(nb
);
607 // Allocate histograms & offsets on the stack
608 uint32 mHistogram
[256*4];
611 // Create histograms (counters). Counters for all passes are created in one run.
612 // Pros: read input buffer once instead of four times
613 // Cons: mHistogram is 4Kb instead of 1Kb
614 // Floating-point values are always supposed to be signed values, so there's only one code path there.
615 // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
616 // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
617 // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
618 // wouldn't work with mixed positive/negative values....
620 /* Clear counters/histograms */
621 memset(mHistogram
, 0, 256*4*sizeof(uint32
));
623 /* Prepare to count */
624 uint8
* p
= (uint8
*)input
;
625 uint8
* pe
= &p
[nb
*4];
626 uint32
* h0
= &mHistogram
[0]; /* Histogram for first pass (LSB) */
627 uint32
* h1
= &mHistogram
[256]; /* Histogram for second pass */
628 uint32
* h2
= &mHistogram
[512]; /* Histogram for third pass */
629 uint32
* h3
= &mHistogram
[768]; /* Histogram for last pass (MSB) */
631 bool AlreadySorted
= true; /* Optimism... */
633 if (!AreRanksValid())
635 /* Prepare for temporal coherence */
636 float* Running
= (float*)input2
;
637 float PrevVal
= *Running
;
641 /* Read input input2 in previous sorted order */
642 float Val
= *Running
++;
643 /* Check whether already sorted or not */
644 if(Val
<PrevVal
) { AlreadySorted
= false; break; } /* Early out */
645 /* Update for next iteration */
648 /* Create histograms */
649 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
652 /* If all input values are already sorted, we just have to return and leave the */
653 /* previous list unchanged. That way the routine may take advantage of temporal */
654 /* coherence, for example when used to sort transparent faces. */
657 uint32
* const Ranks1
= GetRanks1();
658 for(uint32 i
=0;i
<nb
;i
++) Ranks1
[i
] = i
;
664 /* Prepare for temporal coherence */
665 uint32
* const Ranks1
= GetRanks1();
667 uint32
* Indices
= Ranks1
;
668 float PrevVal
= (float)input2
[*Indices
];
672 /* Read input input2 in previous sorted order */
673 float Val
= (float)input2
[*Indices
++];
674 /* Check whether already sorted or not */
675 if(Val
<PrevVal
) { AlreadySorted
= false; break; } /* Early out */
676 /* Update for next iteration */
679 /* Create histograms */
680 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
683 /* If all input values are already sorted, we just have to return and leave the */
684 /* previous list unchanged. That way the routine may take advantage of temporal */
685 /* coherence, for example when used to sort transparent faces. */
686 if(AlreadySorted
) { return Ranks1
; }
689 /* Else there has been an early out and we must finish computing the histograms */
692 /* Create histograms without the previous overhead */
693 h0
[*p
++]++; h1
[*p
++]++; h2
[*p
++]++; h3
[*p
++]++;
697 // Compute #negative values involved if needed
698 uint32 NbNegativeValues
= 0;
700 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
701 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
702 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
703 uint32
* h3
= &mHistogram
[768];
704 for(uint32 i
=128;i
<256;i
++) NbNegativeValues
+= h3
[i
]; // 768 for last histogram, 128 for negative part
706 // Radix sort, j is the pass number (0=LSB, 3=MSB)
707 for(uint32 j
=0;j
<4;j
++)
709 // Should we care about negative values?
712 // Here we deal with positive values only
713 CHECK_PASS_VALIDITY(j
);
717 uint32
* const Ranks2
= GetRanks2();
720 for(uint32 i
=1;i
<256;i
++) mLink
[i
] = mLink
[i
-1] + CurCount
[i
-1];
722 // Perform Radix Sort
723 uint8
* InputBytes
= (uint8
*)input
;
725 if (!AreRanksValid())
727 for(uint32 i
=0;i
<nb
;i
++)
729 *mLink
[InputBytes
[i
<<2]]++ = i
;
736 uint32
* const Ranks1
= GetRanks1();
738 uint32
* Indices
= Ranks1
;
739 uint32
* const IndicesEnd
= Ranks1
+ nb
;
740 while(Indices
!=IndicesEnd
)
742 uint32 id
= *Indices
++;
743 *mLink
[InputBytes
[id
<<2]]++ = id
;
747 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
753 // This is a special case to correctly handle negative values
754 CHECK_PASS_VALIDITY(j
);
758 uint32
* const Ranks2
= GetRanks2();
760 // Create biased offsets, in order for negative numbers to be sorted as well
761 mLink
[0] = Ranks2
+ NbNegativeValues
; // First positive number takes place after the negative ones
762 for(uint32 i
=1;i
<128;i
++) mLink
[i
] = mLink
[i
-1] + CurCount
[i
-1]; // 1 to 128 for positive numbers
764 // We must reverse the sorting order for negative numbers!
766 for(uint32 i
=0;i
<127;i
++) mLink
[254-i
] = mLink
[255-i
] + CurCount
[255-i
]; // Fixing the wrong order for negative values
767 for(uint32 i
=128;i
<256;i
++) mLink
[i
] += CurCount
[i
]; // Fixing the wrong place for negative values
769 // Perform Radix Sort
770 if (!AreRanksValid())
772 for(uint32 i
=0;i
<nb
;i
++)
774 uint32 Radix
= input
[i
]>>24; // Radix byte, same as above. AND is useless here (uint32).
775 // ### cmp to be killed. Not good. Later.
776 if(Radix
<128) *mLink
[Radix
]++ = i
; // Number is positive, same as above
777 else *(--mLink
[Radix
]) = i
; // Number is negative, flip the sorting order
784 uint32
* const Ranks1
= GetRanks1();
786 for(uint32 i
=0;i
<nb
;i
++)
788 uint32 Radix
= input
[Ranks1
[i
]]>>24; // Radix byte, same as above. AND is useless here (uint32).
789 // ### cmp to be killed. Not good. Later.
790 if(Radix
<128) *mLink
[Radix
]++ = Ranks1
[i
]; // Number is positive, same as above
791 else *(--mLink
[Radix
]) = Ranks1
[i
]; // Number is negative, flip the sorting order
794 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
799 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
802 if (!AreRanksValid())
804 uint32
* const Ranks2
= GetRanks2();
806 for(uint32 i
=0;i
<nb
;i
++)
815 uint32
* const Ranks1
= GetRanks1();
816 uint32
* const Ranks2
= GetRanks2();
817 for(uint32 i
=0;i
<nb
;i
++) Ranks2
[i
] = Ranks1
[nb
-i
-1];
820 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
828 uint32
* const Ranks1
= GetRanks1();