1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
23 #include <ode/odeconfig.h>
24 #include <ode/rotation.h>
25 #include <ode/timer.h>
26 #include <ode/error.h>
31 #include "joints/joint.h"
34 #include "threadingutils.h"
39 #define dMIN(A,B) ((A)>(B) ? (B) : (A))
40 #define dMAX(A,B) ((B)>(A) ? (B) : (A))
42 //****************************************************************************
51 #define IFTIMING(x) ((void)0)
55 struct dJointWithInfo1
63 RCE_RHS
= dxJoint::GI2_RHS
,
64 RCE_CFM
= dxJoint::GI2_CFM
,
66 // Elements for array reuse
70 RCE__RHS_CFM_MAX
= dxJoint::GI2__RHS_CFM_MAX
,
71 RLE__RHS_LAMBDA_MAX
= RCE__RHS_CFM_MAX
,
76 LHE_LO
= dxJoint::GI2_LO
,
77 LHE_HI
= dxJoint::GI2_HI
,
79 LHE__LO_HI_MAX
= dxJoint::GI2__LO_HI_MAX
,
82 enum dxJacobiVectorElement
86 JVE__L_MIN
= JVE__MIN
+ dDA__L_MIN
,
88 JVE_LX
= JVE__L_MIN
+ dSA_X
,
89 JVE_LY
= JVE__L_MIN
+ dSA_Y
,
90 JVE_LZ
= JVE__L_MIN
+ dSA_Z
,
92 JVE__L_MAX
= JVE__L_MIN
+ dSA__MAX
,
94 JVE__A_MIN
= JVE__MIN
+ dDA__A_MIN
,
96 JVE_AX
= JVE__A_MIN
+ dSA_X
,
97 JVE_AY
= JVE__A_MIN
+ dSA_Y
,
98 JVE_AZ
= JVE__A_MIN
+ dSA_Z
,
100 JVE__A_MAX
= JVE__A_MIN
+ dSA__MAX
,
102 JVE__MAX
= JVE__MIN
+ dDA__MAX
,
104 JVE__L_COUNT
= JVE__L_MAX
- JVE__L_MIN
,
105 JVE__A_COUNT
= JVE__A_MAX
- JVE__A_MIN
,
109 enum dxJacobiMatrixElement
113 JME__J_MIN
= JME__MIN
,
114 JME__JL_MIN
= JME__J_MIN
+ JVE__L_MIN
,
116 JME_JLX
= JME__J_MIN
+ JVE_LX
,
117 JME_JLY
= JME__J_MIN
+ JVE_LY
,
118 JME_JLZ
= JME__J_MIN
+ JVE_LZ
,
120 JME__JL_MAX
= JME__J_MIN
+ JVE__L_MAX
,
122 JME__JA_MIN
= JME__J_MIN
+ JVE__A_MIN
,
124 JME_JAX
= JME__J_MIN
+ JVE_AX
,
125 JME_JAY
= JME__J_MIN
+ JVE_AY
,
126 JME_JAZ
= JME__J_MIN
+ JVE_AZ
,
128 JME__JA_MAX
= JME__J_MIN
+ JVE__A_MAX
,
129 JME__J_MAX
= JME__J_MIN
+ JVE__MAX
,
131 JME__MAX
= JME__J_MAX
,
133 JME__J_COUNT
= JME__J_MAX
- JME__J_MIN
,
140 JIM__L_MIN
= JIM__MIN
+ dMD_LINEAR
* dV3E__MAX
,
142 JIM__L_AXES_MIN
= JIM__L_MIN
+ dV3E__AXES_MIN
,
144 JIM_LX
= JIM__L_MIN
+ dV3E_X
,
145 JIM_LY
= JIM__L_MIN
+ dV3E_Y
,
146 JIM_LZ
= JIM__L_MIN
+ dV3E_Z
,
148 JIM__L_AXES_MAX
= JIM__L_MIN
+ dV3E__AXES_MAX
,
150 JIM_LPAD
= JIM__L_MIN
+ dV3E_PAD
,
152 JIM__L_MAX
= JIM__L_MIN
+ dV3E__MAX
,
154 JIM__A_MIN
= JIM__MIN
+ dMD_ANGULAR
* dV3E__MAX
,
156 JIM__A_AXES_MIN
= JIM__A_MIN
+ dV3E__AXES_MIN
,
158 JIM_AX
= JIM__A_MIN
+ dV3E_X
,
159 JIM_AY
= JIM__A_MIN
+ dV3E_Y
,
160 JIM_AZ
= JIM__A_MIN
+ dV3E_Z
,
162 JIM__A_AXES_MAX
= JIM__A_MIN
+ dV3E__AXES_MAX
,
164 JIM_APAD
= JIM__A_MIN
+ dV3E_PAD
,
166 JIM__A_MAX
= JIM__A_MIN
+ dV3E__MAX
,
168 JIM__MAX
= JIM__MIN
+ dMD__MAX
* dV3E__MAX
,
171 enum dxContactForceElement
175 CFE__DYNAMICS_MIN
= CFE__MIN
,
177 CFE__L_MIN
= CFE__DYNAMICS_MIN
+ dDA__L_MIN
,
179 CFE_LX
= CFE__DYNAMICS_MIN
+ dDA_LX
,
180 CFE_LY
= CFE__DYNAMICS_MIN
+ dDA_LY
,
181 CFE_LZ
= CFE__DYNAMICS_MIN
+ dDA_LZ
,
183 CFE__L_MAX
= CFE__DYNAMICS_MIN
+ dDA__L_MAX
,
185 CFE__A_MIN
= CFE__DYNAMICS_MIN
+ dDA__A_MIN
,
187 CFE_AX
= CFE__DYNAMICS_MIN
+ dDA_AX
,
188 CFE_AY
= CFE__DYNAMICS_MIN
+ dDA_AY
,
189 CFE_AZ
= CFE__DYNAMICS_MIN
+ dDA_AZ
,
191 CFE__A_MAX
= CFE__DYNAMICS_MIN
+ dDA__A_MAX
,
193 CFE__DYNAMICS_MAX
= CFE__DYNAMICS_MIN
+ dDA__MAX
,
195 CFE__MAX
= CFE__DYNAMICS_MAX
,
199 #define AMATRIX_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
200 #define INVI_ALIGNMENT dMAX(32, EFFICIENT_ALIGNMENT)
201 #define JINVM_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
203 struct dxStepperStage0Outputs
211 struct dxStepperStage1CallContext
213 void Initialize(const dxStepperProcessingCallContext
*stepperCallContext
, void *stageMemArenaState
, dReal
*invI
, dJointWithInfo1
*jointinfos
)
215 m_stepperCallContext
= stepperCallContext
;
216 m_stageMemArenaState
= stageMemArenaState
;
218 m_jointinfos
= jointinfos
;
221 const dxStepperProcessingCallContext
*m_stepperCallContext
;
222 void *m_stageMemArenaState
;
224 dJointWithInfo1
*m_jointinfos
;
225 dxStepperStage0Outputs m_stage0Outputs
;
228 struct dxStepperStage0BodiesCallContext
230 void Initialize(const dxStepperProcessingCallContext
*stepperCallContext
, dReal
*invI
)
232 m_stepperCallContext
= stepperCallContext
;
236 m_inertiaBodyIndex
= 0;
239 const dxStepperProcessingCallContext
*m_stepperCallContext
;
241 atomicord32 m_tagsTaken
;
242 atomicord32 m_gravityTaken
;
243 volatile atomicord32 m_inertiaBodyIndex
;
246 struct dxStepperStage0JointsCallContext
248 void Initialize(const dxStepperProcessingCallContext
*stepperCallContext
, dJointWithInfo1
*jointinfos
, dxStepperStage0Outputs
*stage0Outputs
)
250 m_stepperCallContext
= stepperCallContext
;
251 m_jointinfos
= jointinfos
;
252 m_stage0Outputs
= stage0Outputs
;
255 const dxStepperProcessingCallContext
*m_stepperCallContext
;
256 dJointWithInfo1
*m_jointinfos
;
257 dxStepperStage0Outputs
*m_stage0Outputs
;
260 static int dxStepIsland_Stage0_Bodies_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
261 // static int dxStepIsland_Stage0_Joints_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
262 static int dxStepIsland_Stage1_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
264 static void dxStepIsland_Stage0_Bodies(dxStepperStage0BodiesCallContext
*callContext
);
265 static void dxStepIsland_Stage0_Joints(dxStepperStage0JointsCallContext
*callContext
);
266 static void dxStepIsland_Stage1(dxStepperStage1CallContext
*callContext
);
269 struct dxStepperLocalContext
271 void Initialize(dReal
*invI
, dJointWithInfo1
*jointinfos
, unsigned int nj
,
272 unsigned int m
, unsigned int nub
, const unsigned int *mindex
, int *findex
,
273 dReal
*J
, dReal
*A
, dReal
*pairsRhsCfm
, dReal
*pairsLoHi
,
274 atomicord32
*bodyStartJoints
, atomicord32
*bodyJointLinks
)
277 m_jointinfos
= jointinfos
;
285 m_pairsRhsCfm
= pairsRhsCfm
;
286 m_pairsLoHi
= pairsLoHi
;
287 m_bodyStartJoints
= bodyStartJoints
;
288 m_bodyJointLinks
= bodyJointLinks
;
292 dJointWithInfo1
*m_jointinfos
;
296 const unsigned int *m_mindex
;
300 dReal
*m_pairsRhsCfm
;
302 atomicord32
*m_bodyStartJoints
;
303 atomicord32
*m_bodyJointLinks
;
306 struct dxStepperStage2CallContext
308 void Initialize(const dxStepperProcessingCallContext
*callContext
, const dxStepperLocalContext
*localContext
,
309 dReal
*JinvM
, dReal
*rhs_tmp
)
311 m_stepperCallContext
= callContext
;
312 m_localContext
= localContext
;
323 const dxStepperProcessingCallContext
*m_stepperCallContext
;
324 const dxStepperLocalContext
*m_localContext
;
327 volatile atomicord32 m_ji_J
;
328 volatile atomicord32 m_ji_Ainit
;
329 volatile atomicord32 m_ji_JinvM
;
330 volatile atomicord32 m_ji_Aaddjb
;
331 volatile atomicord32 m_bi_rhs_tmp
;
332 volatile atomicord32 m_ji_rhs
;
335 struct dxStepperStage3CallContext
337 void Initialize(const dxStepperProcessingCallContext
*callContext
, const dxStepperLocalContext
*localContext
,
338 void *stage1MemArenaState
)
340 m_stepperCallContext
= callContext
;
341 m_localContext
= localContext
;
342 m_stage1MemArenaState
= stage1MemArenaState
;
345 const dxStepperProcessingCallContext
*m_stepperCallContext
;
346 const dxStepperLocalContext
*m_localContext
;
347 void *m_stage1MemArenaState
;
350 struct dxStepperStage4CallContext
352 void Initialize(const dxStepperProcessingCallContext
*callContext
, const dxStepperLocalContext
*localContext
/*,
353 void *stage3MemarenaState*/)
355 m_stepperCallContext
= callContext
;
356 m_localContext
= localContext
;
357 // m_stage3MemarenaState = stage3MemarenaState;
358 m_bi_constrForce
= 0;
361 const dxStepperProcessingCallContext
*m_stepperCallContext
;
362 const dxStepperLocalContext
*m_localContext
;
363 // void *m_stage3MemarenaState;
364 volatile atomicord32 m_bi_constrForce
;
367 static int dxStepIsland_Stage2a_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
368 static int dxStepIsland_Stage2aSync_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
369 static int dxStepIsland_Stage2b_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
370 static int dxStepIsland_Stage2bSync_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
371 static int dxStepIsland_Stage2c_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
372 static int dxStepIsland_Stage3_Callback(void *callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
374 static void dxStepIsland_Stage2a(dxStepperStage2CallContext
*callContext
);
375 static void dxStepIsland_Stage2b(dxStepperStage2CallContext
*callContext
);
376 static void dxStepIsland_Stage2c(dxStepperStage2CallContext
*callContext
);
377 static void dxStepIsland_Stage3(dxStepperStage3CallContext
*callContext
);
379 static int dxStepIsland_Stage4_Callback(void *_stage4CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
);
380 static void dxStepIsland_Stage4(dxStepperStage4CallContext
*stage4CallContext
);
383 //****************************************************************************
384 // special matrix multipliers
387 // this assumes the 4th and 8th rows of B and C are zero.
390 void MultiplyAddJinvMxJToA (dReal
*Arow
, const dReal
*JinvMRow
, const dReal
*JRow
,
391 unsigned int infomJinvM
, unsigned int infomJ
, unsigned int mskip
)
393 dIASSERT (infomJinvM
> 0 && infomJ
> 0 && Arow
&& JinvMRow
&& JRow
);
394 const unsigned int mskip_munus_infomJ_plus_1
= mskip
- infomJ
+ 1;
395 dIASSERT(mskip
>= infomJ
);
397 const dReal
*currJinvM
= JinvMRow
;
398 for (unsigned int i
= infomJinvM
; ; ) {
399 dReal JiM0
= currJinvM
[JIM_LX
];
400 dReal JiM1
= currJinvM
[JIM_LY
];
401 dReal JiM2
= currJinvM
[JIM_LZ
];
402 dReal JiM4
= currJinvM
[JIM_AX
];
403 dReal JiM5
= currJinvM
[JIM_AY
];
404 dReal JiM6
= currJinvM
[JIM_AZ
];
405 const dReal
*currJ
= JRow
;
406 for (unsigned int j
= infomJ
; ; ) {
408 sum
= JiM0
* currJ
[JME_JLX
];
409 sum
+= JiM1
* currJ
[JME_JLY
];
410 sum
+= JiM2
* currJ
[JME_JLZ
];
411 sum
+= JiM4
* currJ
[JME_JAX
];
412 sum
+= JiM5
* currJ
[JME_JAY
];
413 sum
+= JiM6
* currJ
[JME_JAZ
];
424 currJinvM
+= JIM__MAX
;
425 currA
+= mskip_munus_infomJ_plus_1
;
430 // this assumes the 4th and 8th rows of B are zero.
433 void MultiplySubJxRhsTmpFromRHS (dReal
*rowRhsCfm
, const dReal
*JRow
, const dReal
*rowRhsTmp
, unsigned int infom
)
435 dIASSERT (infom
> 0 && rowRhsCfm
&& JRow
&& rowRhsTmp
);
436 dReal
*currRhs
= rowRhsCfm
+ RCE_RHS
;
437 const dReal
*currJ
= JRow
;
438 const dReal RT_LX
= rowRhsTmp
[dDA_LX
], RT_LY
= rowRhsTmp
[dDA_LY
], RT_LZ
= rowRhsTmp
[dDA_LZ
];
439 const dReal RT_AX
= rowRhsTmp
[dDA_AX
], RT_AY
= rowRhsTmp
[dDA_AY
], RT_AZ
= rowRhsTmp
[dDA_AZ
];
440 for (unsigned int i
= infom
; ; ) {
442 sum
= currJ
[JME_JLX
] * RT_LX
;
443 sum
+= currJ
[JME_JLY
] * RT_LY
;
444 sum
+= currJ
[JME_JLZ
] * RT_LZ
;
445 sum
+= currJ
[JME_JAX
] * RT_AX
;
446 sum
+= currJ
[JME_JAY
] * RT_AY
;
447 sum
+= currJ
[JME_JAZ
] * RT_AZ
;
452 currRhs
+= RCE__RHS_CFM_MAX
;
459 void MultiplyAddJxLambdaToCForce(dReal cforce
[CFE__MAX
],
460 const dReal
*JRow
, const dReal
*rowRhsLambda
, unsigned int infom
,
461 dJointFeedback
*fb
/*=NULL*/, unsigned jointBodyIndex
)
463 dIASSERT (infom
> 0 && cforce
&& JRow
&& rowRhsLambda
);
464 dReal sumLX
= 0, sumLY
= 0, sumLZ
= 0, sumAX
=0, sumAY
= 0, sumAZ
= 0;
465 const dReal
*currJ
= JRow
, *currLambda
= rowRhsLambda
+ RLE_LAMBDA
;
466 for (unsigned int k
= infom
; ; ) {
467 const dReal lambda
= *currLambda
;
468 sumLX
+= currJ
[JME_JLX
] * lambda
;
469 sumLY
+= currJ
[JME_JLY
] * lambda
;
470 sumLZ
+= currJ
[JME_JLZ
] * lambda
;
471 sumAX
+= currJ
[JME_JAX
] * lambda
;
472 sumAY
+= currJ
[JME_JAY
] * lambda
;
473 sumAZ
+= currJ
[JME_JAZ
] * lambda
;
478 currLambda
+= RLE__RHS_LAMBDA_MAX
;
481 if (jointBodyIndex
== dJCB__MIN
) {
482 fb
->f1
[dV3E_X
] = sumLX
;
483 fb
->f1
[dV3E_Y
] = sumLY
;
484 fb
->f1
[dV3E_Z
] = sumLZ
;
485 fb
->t1
[dV3E_X
] = sumAX
;
486 fb
->t1
[dV3E_Y
] = sumAY
;
487 fb
->t1
[dV3E_Z
] = sumAZ
;
490 dIASSERT(jointBodyIndex
== dJCB__MIN
+ 1);
491 dSASSERT(dJCB__MAX
== 2);
493 fb
->f2
[dV3E_X
] = sumLX
;
494 fb
->f2
[dV3E_Y
] = sumLY
;
495 fb
->f2
[dV3E_Z
] = sumLZ
;
496 fb
->t2
[dV3E_X
] = sumAX
;
497 fb
->t2
[dV3E_Y
] = sumAY
;
498 fb
->t2
[dV3E_Z
] = sumAZ
;
501 cforce
[CFE_LX
] += sumLX
;
502 cforce
[CFE_LY
] += sumLY
;
503 cforce
[CFE_LZ
] += sumLZ
;
504 cforce
[CFE_AX
] += sumAX
;
505 cforce
[CFE_AY
] += sumAY
;
506 cforce
[CFE_AZ
] += sumAZ
;
510 //****************************************************************************
513 void dxStepIsland(const dxStepperProcessingCallContext
*callContext
)
515 IFTIMING(dTimerStart("preprocessing"));
517 dxWorldProcessMemArena
*memarena
= callContext
->m_stepperArena
;
518 dxWorld
*world
= callContext
->m_world
;
519 unsigned int nb
= callContext
->m_islandBodiesCount
;
520 unsigned int _nj
= callContext
->m_islandJointsCount
;
522 dReal
*invI
= memarena
->AllocateOveralignedArray
<dReal
>(dM3E__MAX
* (sizeint
)nb
, INVI_ALIGNMENT
);
523 // Reserve twice as much memory and start from the middle so that regardless of
524 // what direction the array grows to there would be sufficient room available.
525 const sizeint ji_reserve_count
= 2 * (sizeint
)_nj
;
526 dJointWithInfo1
*const jointinfos
= memarena
->AllocateArray
<dJointWithInfo1
>(ji_reserve_count
);
528 const unsigned allowedThreads
= callContext
->m_stepperAllowedThreads
;
529 dIASSERT(allowedThreads
!= 0);
531 void *stagesMemArenaState
= memarena
->SaveState();
533 dxStepperStage1CallContext
*stage1CallContext
= (dxStepperStage1CallContext
*)memarena
->AllocateBlock(sizeof(dxStepperStage1CallContext
));
534 stage1CallContext
->Initialize(callContext
, stagesMemArenaState
, invI
, jointinfos
);
536 dxStepperStage0BodiesCallContext
*stage0BodiesCallContext
= (dxStepperStage0BodiesCallContext
*)memarena
->AllocateBlock(sizeof(dxStepperStage0BodiesCallContext
));
537 stage0BodiesCallContext
->Initialize(callContext
, invI
);
539 dxStepperStage0JointsCallContext
*stage0JointsCallContext
= (dxStepperStage0JointsCallContext
*)memarena
->AllocateBlock(sizeof(dxStepperStage0JointsCallContext
));
540 stage0JointsCallContext
->Initialize(callContext
, jointinfos
, &stage1CallContext
->m_stage0Outputs
);
542 if (allowedThreads
== 1)
544 dxStepIsland_Stage0_Bodies(stage0BodiesCallContext
);
545 dxStepIsland_Stage0_Joints(stage0JointsCallContext
);
546 dxStepIsland_Stage1(stage1CallContext
);
550 unsigned bodyThreads
= allowedThreads
;
551 unsigned jointThreads
= 1;
553 dCallReleaseeID stage1CallReleasee
;
554 world
->PostThreadedCallForUnawareReleasee(NULL
, &stage1CallReleasee
, bodyThreads
+ jointThreads
, callContext
->m_finalReleasee
,
555 NULL
, &dxStepIsland_Stage1_Callback
, stage1CallContext
, 0, "StepIsland Stage1");
557 world
->PostThreadedCallsGroup(NULL
, bodyThreads
, stage1CallReleasee
, &dxStepIsland_Stage0_Bodies_Callback
, stage0BodiesCallContext
, "StepIsland Stage0-Bodies");
559 dxStepIsland_Stage0_Joints(stage0JointsCallContext
);
560 world
->AlterThreadedCallDependenciesCount(stage1CallReleasee
, -1);
561 dIASSERT(jointThreads
== 1);
566 int dxStepIsland_Stage0_Bodies_Callback(void *_callContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
568 (void)callInstanceIndex
; // unused
569 (void)callThisReleasee
; // unused
570 dxStepperStage0BodiesCallContext
*callContext
= (dxStepperStage0BodiesCallContext
*)_callContext
;
571 dxStepIsland_Stage0_Bodies(callContext
);
576 void dxStepIsland_Stage0_Bodies(dxStepperStage0BodiesCallContext
*callContext
)
578 dxBody
* const *body
= callContext
->m_stepperCallContext
->m_islandBodiesStart
;
579 unsigned int nb
= callContext
->m_stepperCallContext
->m_islandBodiesCount
;
581 if (ThrsafeExchange(&callContext
->m_tagsTaken
, 1) == 0)
583 // number all bodies in the body list - set their tag values
584 for (unsigned int i
=0; i
<nb
; i
++) body
[i
]->tag
= i
;
587 if (ThrsafeExchange(&callContext
->m_gravityTaken
, 1) == 0)
589 dxWorld
*world
= callContext
->m_stepperCallContext
->m_world
;
591 // add the gravity force to all bodies
592 // since gravity does normally have only one component it's more efficient
593 // to run three loops for each individual component
594 dxBody
*const *const bodyend
= body
+ nb
;
595 dReal gravity_x
= world
->gravity
[0];
597 for (dxBody
*const *bodycurr
= body
; bodycurr
!= bodyend
; ++bodycurr
) {
598 dxBody
*b
= *bodycurr
;
599 if ((b
->flags
& dxBodyNoGravity
) == 0) {
600 b
->facc
[dV3E_X
] += b
->mass
.mass
* gravity_x
;
604 dReal gravity_y
= world
->gravity
[1];
606 for (dxBody
*const *bodycurr
= body
; bodycurr
!= bodyend
; ++bodycurr
) {
607 dxBody
*b
= *bodycurr
;
608 if ((b
->flags
& dxBodyNoGravity
) == 0) {
609 b
->facc
[dV3E_Y
] += b
->mass
.mass
* gravity_y
;
613 dReal gravity_z
= world
->gravity
[2];
615 for (dxBody
*const *bodycurr
= body
; bodycurr
!= bodyend
; ++bodycurr
) {
616 dxBody
*b
= *bodycurr
;
617 if ((b
->flags
& dxBodyNoGravity
) == 0) {
618 b
->facc
[dV3E_Z
] += b
->mass
.mass
* gravity_z
;
624 // for all bodies, compute the inertia tensor and its inverse in the global
625 // frame, and compute the rotational force and add it to the torque
626 // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
628 dReal
*invIrow
= callContext
->m_invI
;
629 unsigned int bodyIndex
= ThrsafeIncrementIntUpToLimit(&callContext
->m_inertiaBodyIndex
, nb
);
631 for (unsigned int i
= 0; i
!= nb
; invIrow
+= dM3E__MAX
, ++i
) {
632 if (i
== bodyIndex
) {
636 // compute inverse inertia tensor in global frame
637 dMultiply2_333 (tmp
, b
->invI
, b
->posr
.R
);
638 dMultiply0_333 (invIrow
, b
->posr
.R
, tmp
);
640 // Don't apply gyroscopic torques to bodies
641 // if not flagged or the body is kinematic
642 if ((b
->flags
& dxBodyGyroscopic
) && (b
->invMass
> 0)) {
644 // compute inertia tensor in global frame
645 dMultiply2_333 (tmp
,b
->mass
.I
,b
->posr
.R
);
646 dMultiply0_333 (I
,b
->posr
.R
,tmp
);
647 // compute rotational force
649 // Explicit computation
650 dMultiply0_331 (tmp
,I
,b
->avel
);
651 dSubtractVectorCross3(b
->tacc
,b
->avel
,tmp
);
653 // Do the implicit computation based on
654 //"Stabilizing Gyroscopic Forces in Rigid Multibody Simulations"
655 // (Lacoursière 2006)
656 dReal h
= callContext
->m_stepperCallContext
->m_stepSize
; // Step size
657 dVector3 L
; // Compute angular momentum
658 dMultiply0_331(L
, I
, b
->avel
);
660 // Compute a new effective 'inertia tensor'
661 // for the implicit step: the cross-product
662 // matrix of the angular momentum plus the
663 // old tensor scaled by the timestep.
664 // Itild may not be symmetric pos-definite,
665 // but we can still use it to compute implicit
666 // gyroscopic torques.
667 dMatrix3 Itild
= { 0 };
668 dSetCrossMatrixMinus(Itild
, L
, dV3E__MAX
);
669 for (int ii
= dM3E__MIN
; ii
!= dM3E__MAX
; ++ii
) {
670 Itild
[ii
] = Itild
[ii
] * h
+ I
[ii
];
673 // Scale momentum by inverse time to get
674 // a sort of "torque"
675 dScaleVector3(L
, dRecip(h
));
676 // Invert the pseudo-tensor
678 // This is a closed-form inversion.
679 // It's probably not numerically stable
680 // when dealing with small masses with
681 // a large asymmetry.
682 // An LU decomposition might be better.
683 if (dInvertMatrix3(itInv
, Itild
) != 0) {
684 // "Divide" the original tensor
685 // by the pseudo-tensor (on the right)
686 dMultiply0_333(Itild
, I
, itInv
);
687 // Subtract an identity matrix
688 Itild
[dM3E_XX
] -= 1; Itild
[dM3E_YY
] -= 1; Itild
[dM3E_ZZ
] -= 1;
690 // This new inertia matrix rotates the
691 // momentum to get a new set of torques
692 // that will work correctly when applied
693 // to the old inertia matrix as explicit
694 // torques with a semi-implicit update
697 dMultiply0_331(tau0
,Itild
,L
);
699 // Add the gyro torques to the torque
701 for (int ii
= dSA__MIN
; ii
!= dSA__MAX
; ++ii
) {
702 b
->tacc
[dV3E__AXES_MIN
+ ii
] += tau0
[dV3E__AXES_MIN
+ ii
];
708 bodyIndex
= ThrsafeIncrementIntUpToLimit(&callContext
->m_inertiaBodyIndex
, nb
);
715 // int dxStepIsland_Stage0_Joints_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
717 // (void)callInstanceIndex; // unused
718 // (void)callThisReleasee; // unused
719 // dxStepperStage0JointsCallContext *callContext = (dxStepperStage0JointsCallContext *)_callContext;
720 // dxStepIsland_Stage0_Joints(callContext);
725 void dxStepIsland_Stage0_Joints(dxStepperStage0JointsCallContext
*callContext
)
727 dxJoint
* const *_joint
= callContext
->m_stepperCallContext
->m_islandJointsStart
;
728 dJointWithInfo1
*jointinfos
= callContext
->m_jointinfos
;
729 unsigned int _nj
= callContext
->m_stepperCallContext
->m_islandJointsCount
;
731 // get m = total constraint dimension, nub = number of unbounded variables.
732 // create constraint offset array and number-of-rows array for all joints.
733 // the constraints are re-ordered as follows: the purely unbounded
734 // constraints, the mixed unbounded + LCP constraints, and last the purely
735 // LCP constraints. this assists the LCP solver to put all unbounded
736 // variables at the start for a quick factorization.
738 // joints with m=0 are inactive and are removed from the joints array
739 // entirely, so that the code that follows does not consider them.
740 // also number all active joints in the joint list (set their tag values).
741 // inactive joints receive a tag value of -1.
743 sizeint ji_start
, ji_end
;
745 unsigned int mcurr
= 0;
746 sizeint unb_start
, mix_start
, mix_end
, lcp_end
;
747 unb_start
= mix_start
= mix_end
= lcp_end
= _nj
;
749 dJointWithInfo1
*jicurr
= jointinfos
+ lcp_end
;
750 dxJoint
*const *const _jend
= _joint
+ _nj
;
751 dxJoint
*const *_jcurr
= _joint
;
753 // -------------------------------------------------------------------------
754 // Switch to growing array forward
756 bool fwd_end_reached
= false;
757 dJointWithInfo1
*jimixend
= jointinfos
+ mix_end
;
758 while (true) { // jicurr=dest, _jcurr=src
759 if (_jcurr
== _jend
) {
760 lcp_end
= jicurr
- jointinfos
;
761 fwd_end_reached
= true;
764 dxJoint
*j
= *_jcurr
++;
765 j
->getInfo1 (&jicurr
->info
);
766 dIASSERT (/*jicurr->info.m >= 0 && */jicurr
->info
.m
<= 6 && /*jicurr->info.nub >= 0 && */jicurr
->info
.nub
<= jicurr
->info
.m
);
767 if (jicurr
->info
.m
!= 0) {
768 mcurr
+= jicurr
->info
.m
;
769 if (jicurr
->info
.nub
== 0) { // A lcp info - a correct guess!!!
772 } else if (jicurr
->info
.nub
< jicurr
->info
.m
) { // A mixed case
773 if (unb_start
== mix_start
) { // no unbounded infos yet - just move to opposite side of mixed-s
774 unb_start
= mix_start
= mix_start
- 1;
775 dJointWithInfo1
*jimixstart
= jointinfos
+ mix_start
;
776 jimixstart
->info
= jicurr
->info
;
777 jimixstart
->joint
= j
;
778 } else if (jimixend
!= jicurr
) { // have to swap to the tail of mixed-s
779 dxJoint::Info1 tmp_info
= jicurr
->info
;
781 jimixend
->info
= tmp_info
;
783 ++jimixend
; ++jicurr
;
784 } else { // no need to swap as there are no LCP info-s yet
786 jimixend
= jicurr
= jicurr
+ 1;
788 } else { // A purely unbounded case -- break out and proceed growing in opposite direction
789 unb_start
= unb_start
- 1;
790 dJointWithInfo1
*jiunbstart
= jointinfos
+ unb_start
;
791 jiunbstart
->info
= jicurr
->info
;
792 jiunbstart
->joint
= j
;
793 lcp_end
= jicurr
- jointinfos
;
794 mix_end
= jimixend
- jointinfos
;
795 jicurr
= jiunbstart
- 1;
802 if (fwd_end_reached
) {
806 // -------------------------------------------------------------------------
807 // Switch to growing array backward
809 bool bkw_end_reached
= false;
810 dJointWithInfo1
*jimixstart
= jointinfos
+ mix_start
- 1;
811 while (true) { // jicurr=dest, _jcurr=src
812 if (_jcurr
== _jend
) {
813 unb_start
= (jicurr
+ 1) - jointinfos
;
814 mix_start
= (jimixstart
+ 1) - jointinfos
;
815 bkw_end_reached
= true;
818 dxJoint
*j
= *_jcurr
++;
819 j
->getInfo1 (&jicurr
->info
);
820 dIASSERT (/*jicurr->info.m >= 0 && */jicurr
->info
.m
<= 6 && /*jicurr->info.nub >= 0 && */jicurr
->info
.nub
<= jicurr
->info
.m
);
821 if (jicurr
->info
.m
!= 0) {
822 mcurr
+= jicurr
->info
.m
;
823 if (jicurr
->info
.nub
== jicurr
->info
.m
) { // An unbounded info - a correct guess!!!
826 } else if (jicurr
->info
.nub
!= 0) { // A mixed case
827 if (mix_end
== lcp_end
) { // no lcp infos yet - just move to opposite side of mixed-s
828 dJointWithInfo1
*jimixend
= jointinfos
+ mix_end
;
829 lcp_end
= mix_end
= mix_end
+ 1;
830 jimixend
->info
= jicurr
->info
;
832 } else if (jimixstart
!= jicurr
) { // have to swap to the head of mixed-s
833 dxJoint::Info1 tmp_info
= jicurr
->info
;
834 *jicurr
= *jimixstart
;
835 jimixstart
->info
= tmp_info
;
836 jimixstart
->joint
= j
;
837 --jimixstart
; --jicurr
;
838 } else { // no need to swap as there are no unbounded info-s yet
840 jimixstart
= jicurr
= jicurr
- 1;
842 } else { // A purely lcp case -- break out and proceed growing in opposite direction
843 dJointWithInfo1
*jilcpend
= jointinfos
+ lcp_end
;
844 lcp_end
= lcp_end
+ 1;
845 jilcpend
->info
= jicurr
->info
;
847 unb_start
= (jicurr
+ 1) - jointinfos
;
848 mix_start
= (jimixstart
+ 1) - jointinfos
;
849 jicurr
= jilcpend
+ 1;
856 if (bkw_end_reached
) {
862 callContext
->m_stage0Outputs
->m
= mcurr
;
863 callContext
->m_stage0Outputs
->nub
= (unsigned)(mix_start
- unb_start
);
864 dIASSERT((sizeint
)(mix_start
- unb_start
) <= (sizeint
)UINT_MAX
);
865 ji_start
= unb_start
;
870 const dJointWithInfo1
*jicurr
= jointinfos
+ ji_start
;
871 const dJointWithInfo1
*const jiend
= jointinfos
+ ji_end
;
872 for (unsigned int i
= 0; jicurr
!= jiend
; i
++, ++jicurr
) {
873 jicurr
->joint
->tag
= i
;
877 callContext
->m_stage0Outputs
->ji_start
= ji_start
;
878 callContext
->m_stage0Outputs
->ji_end
= ji_end
;
882 int dxStepIsland_Stage1_Callback(void *_stage1CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
884 (void)callInstanceIndex
; // unused
885 (void)callThisReleasee
; // unused
886 dxStepperStage1CallContext
*stage1CallContext
= (dxStepperStage1CallContext
*)_stage1CallContext
;
887 dxStepIsland_Stage1(stage1CallContext
);
892 void dxStepIsland_Stage1(dxStepperStage1CallContext
*stage1CallContext
)
894 const dxStepperProcessingCallContext
*callContext
= stage1CallContext
->m_stepperCallContext
;
895 dJointWithInfo1
*_jointinfos
= stage1CallContext
->m_jointinfos
;
896 dReal
*invI
= stage1CallContext
->m_invI
;
897 sizeint ji_start
= stage1CallContext
->m_stage0Outputs
.ji_start
;
898 sizeint ji_end
= stage1CallContext
->m_stage0Outputs
.ji_end
;
899 unsigned int m
= stage1CallContext
->m_stage0Outputs
.m
;
900 unsigned int nub
= stage1CallContext
->m_stage0Outputs
.nub
;
902 dxWorldProcessMemArena
*memarena
= callContext
->m_stepperArena
;
904 memarena
->RestoreState(stage1CallContext
->m_stageMemArenaState
);
905 stage1CallContext
= NULL
; // WARNING! _stage1CallContext is not valid after this point!
906 dIVERIFY(stage1CallContext
== NULL
); // To suppress compiler warnings about unused variable assignment
908 unsigned int _nj
= callContext
->m_islandJointsCount
;
909 const sizeint ji_reserve_count
= 2 * (sizeint
)_nj
;
910 memarena
->ShrinkArray
<dJointWithInfo1
>(_jointinfos
, ji_reserve_count
, ji_end
);
913 dJointWithInfo1
*jointinfos
= _jointinfos
+ ji_start
;
914 unsigned int nj
= (unsigned int)(ji_end
- ji_start
);
915 dIASSERT((sizeint
)(ji_end
- ji_start
) <= (sizeint
)UINT_MAX
);
917 unsigned int *mindex
= NULL
;
918 dReal
*J
= NULL
, *A
= NULL
, *pairsRhsCfm
= NULL
, *pairsLoHi
= NULL
;
920 atomicord32
*bodyStartJoints
= NULL
, *bodyJointLinks
= NULL
;
922 // if there are constraints, compute constrForce
924 mindex
= memarena
->AllocateArray
<unsigned int>((sizeint
)(nj
+ 1));
926 unsigned int *mcurr
= mindex
;
927 unsigned int moffs
= 0;
931 const dJointWithInfo1
*const jiend
= jointinfos
+ nj
;
932 for (const dJointWithInfo1
*jicurr
= jointinfos
; jicurr
!= jiend
; ++jicurr
) {
933 //dxJoint *joint = jicurr->joint;
934 moffs
+= jicurr
->info
.m
;
940 // create a constraint equation right hand side vector `c', a constraint
941 // force mixing vector `cfm', and LCP low and high bound vectors, and an
943 findex
= memarena
->AllocateArray
<int>(m
);
944 J
= memarena
->AllocateArray
<dReal
>((sizeint
)m
* (2 * JME__MAX
));
945 A
= memarena
->AllocateOveralignedArray
<dReal
>((sizeint
)m
* dPAD(m
), AMATRIX_ALIGNMENT
);
946 pairsRhsCfm
= memarena
->AllocateArray
<dReal
>((sizeint
)m
* RCE__RHS_CFM_MAX
);
947 pairsLoHi
= memarena
->AllocateArray
<dReal
>((sizeint
)m
* LHE__LO_HI_MAX
);
948 const unsigned int nb
= callContext
->m_islandBodiesCount
;
949 bodyStartJoints
= memarena
->AllocateArray
<atomicord32
>(nb
);
950 bodyJointLinks
= memarena
->AllocateArray
<atomicord32
>((sizeint
)nj
* dJCB__MAX
);
951 dICHECK(nj
< ~((atomicord32
)0) / dJCB__MAX
); // If larger joint counts are to be used, pointers (or sizeint) need to be stored rather than atomicord32 indices
954 dxStepperLocalContext
*localContext
= (dxStepperLocalContext
*)memarena
->AllocateBlock(sizeof(dxStepperLocalContext
));
955 localContext
->Initialize(invI
, jointinfos
, nj
, m
, nub
, mindex
, findex
, J
, A
, pairsRhsCfm
, pairsLoHi
, bodyStartJoints
, bodyJointLinks
);
957 void *stage1MemarenaState
= memarena
->SaveState();
958 dxStepperStage3CallContext
*stage3CallContext
= (dxStepperStage3CallContext
*)memarena
->AllocateBlock(sizeof(dxStepperStage3CallContext
));
959 stage3CallContext
->Initialize(callContext
, localContext
, stage1MemarenaState
);
962 dReal
*JinvM
= memarena
->AllocateOveralignedArray
<dReal
>((sizeint
)m
* (2 * JIM__MAX
), JINVM_ALIGNMENT
);
963 const unsigned int nb
= callContext
->m_islandBodiesCount
;
964 dReal
*rhs_tmp
= memarena
->AllocateArray
<dReal
>((sizeint
)nb
* dDA__MAX
);
966 dxStepperStage2CallContext
*stage2CallContext
= (dxStepperStage2CallContext
*)memarena
->AllocateBlock(sizeof(dxStepperStage2CallContext
));
967 stage2CallContext
->Initialize(callContext
, localContext
, JinvM
, rhs_tmp
);
969 const unsigned allowedThreads
= callContext
->m_stepperAllowedThreads
;
970 dIASSERT(allowedThreads
!= 0);
972 if (allowedThreads
== 1) {
973 IFTIMING(dTimerNow("create J"));
974 dxStepIsland_Stage2a(stage2CallContext
);
975 IFTIMING(dTimerNow("compute Adiag, JinvM and rhs_tmp"));
976 dxStepIsland_Stage2b(stage2CallContext
);
977 IFTIMING(dTimerNow("compute A and rhs"));
978 dxStepIsland_Stage2c(stage2CallContext
);
979 dxStepIsland_Stage3(stage3CallContext
);
982 dxWorld
*world
= callContext
->m_world
;
983 dCallReleaseeID stage3CallReleasee
;
984 world
->PostThreadedCallForUnawareReleasee(NULL
, &stage3CallReleasee
, 1, callContext
->m_finalReleasee
,
985 NULL
, &dxStepIsland_Stage3_Callback
, stage3CallContext
, 0, "StepIsland Stage3");
987 dCallReleaseeID stage2bSyncReleasee
;
988 world
->PostThreadedCall(NULL
, &stage2bSyncReleasee
, 1, stage3CallReleasee
,
989 NULL
, &dxStepIsland_Stage2bSync_Callback
, stage2CallContext
, 0, "StepIsland Stage2b Sync");
991 dCallReleaseeID stage2aSyncReleasee
;
992 world
->PostThreadedCall(NULL
, &stage2aSyncReleasee
, allowedThreads
, stage2bSyncReleasee
,
993 NULL
, &dxStepIsland_Stage2aSync_Callback
, stage2CallContext
, 0, "StepIsland Stage2a Sync");
995 dIASSERT(allowedThreads
> 1); /*if (allowedThreads > 1) */{
996 world
->PostThreadedCallsGroup(NULL
, allowedThreads
- 1, stage2aSyncReleasee
, &dxStepIsland_Stage2a_Callback
, stage2CallContext
, "StepIsland Stage2a");
998 dxStepIsland_Stage2a(stage2CallContext
);
999 world
->AlterThreadedCallDependenciesCount(stage2aSyncReleasee
, -1);
1003 dxStepIsland_Stage3(stage3CallContext
);
1009 int dxStepIsland_Stage2a_Callback(void *_stage2CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1011 (void)callInstanceIndex
; // unused
1012 (void)callThisReleasee
; // unused
1013 dxStepperStage2CallContext
*stage2CallContext
= (dxStepperStage2CallContext
*)_stage2CallContext
;
1014 dxStepIsland_Stage2a(stage2CallContext
);
1019 void dxStepIsland_Stage2a(dxStepperStage2CallContext
*stage2CallContext
)
1021 const dxStepperProcessingCallContext
*callContext
= stage2CallContext
->m_stepperCallContext
;
1022 const dxStepperLocalContext
*localContext
= stage2CallContext
->m_localContext
;
1023 dJointWithInfo1
*jointinfos
= localContext
->m_jointinfos
;
1024 unsigned int nj
= localContext
->m_nj
;
1025 const unsigned int *mindex
= localContext
->m_mindex
;
1027 const dReal stepsizeRecip
= dRecip(callContext
->m_stepSize
);
1028 dxWorld
*world
= callContext
->m_world
;
1031 int *findex
= localContext
->m_findex
;
1032 dReal
*J
= localContext
->m_J
;
1033 dReal
*pairsRhsCfm
= localContext
->m_pairsRhsCfm
;
1034 dReal
*pairsLoHi
= localContext
->m_pairsLoHi
;
1036 // get jacobian data from constraints. a (2*m)x8 matrix will be created
1037 // to store the two jacobian blocks from each constraint. it has this
1040 // l l l 0 a a a 0 \ .
1041 // l l l 0 a a a 0 }-- jacobian body 1 block for joint 0 (3 rows)
1042 // l l l 0 a a a 0 /
1043 // l l l 0 a a a 0 \ .
1044 // l l l 0 a a a 0 }-- jacobian body 2 block for joint 0 (3 rows)
1045 // l l l 0 a a a 0 /
1046 // l l l 0 a a a 0 }--- jacobian body 1 block for joint 1 (1 row)
1047 // l l l 0 a a a 0 }--- jacobian body 2 block for joint 1 (1 row)
1050 // (lll) = linear jacobian data
1051 // (aaa) = angular jacobian data
1054 const dReal worldERP
= world
->global_erp
;
1055 const dReal worldCFM
= world
->global_cfm
;
1058 while ((ji
= ThrsafeIncrementIntUpToLimit(&stage2CallContext
->m_ji_J
, nj
)) != nj
) {
1059 const unsigned ofsi
= mindex
[ji
];
1060 const unsigned int infom
= mindex
[ji
+ 1] - ofsi
;
1062 dReal
*const JRow
= J
+ (sizeint
)ofsi
* (2 * JME__MAX
);
1063 dReal
*rowRhsCfm
= pairsRhsCfm
+ (sizeint
)ofsi
* RCE__RHS_CFM_MAX
;
1064 dReal
*rowLoHi
= pairsLoHi
+ (sizeint
)ofsi
* LHE__LO_HI_MAX
;
1066 dSetZero (JRow
, infom
* (2 * JME__MAX
));
1068 dReal
*const endRhsCfm
= rowRhsCfm
+ infom
* RCE__RHS_CFM_MAX
;
1069 for (dReal
*currRhsCfm
= rowRhsCfm
; currRhsCfm
!= endRhsCfm
; currRhsCfm
+= RCE__RHS_CFM_MAX
) {
1070 currRhsCfm
[RCE_RHS
] = REAL(0.0);
1071 currRhsCfm
[RCE_CFM
] = worldCFM
;
1074 dReal
*const endLoHi
= rowLoHi
+ infom
* LHE__LO_HI_MAX
;
1075 for (dReal
*currLoHi
= rowLoHi
; currLoHi
!= endLoHi
; currLoHi
+= LHE__LO_HI_MAX
) {
1076 currLoHi
[LHE_LO
] = -dInfinity
;
1077 currLoHi
[LHE_HI
] = dInfinity
;
1080 int *findexRow
= findex
+ ofsi
;
1081 dSetValue(findexRow
, infom
, -1);
1083 dxJoint
*joint
= jointinfos
[ji
].joint
;
1084 joint
->getInfo2(stepsizeRecip
, worldERP
, JME__MAX
, JRow
+ JME__J_MIN
, JRow
+ infom
* JME__MAX
+ JME__J_MIN
, RCE__RHS_CFM_MAX
, rowRhsCfm
, rowLoHi
, findexRow
);
1085 dSASSERT((int)LHE__LO_HI_MAX
== RCE__RHS_CFM_MAX
); // To make sure same step fits for both pairs in the call to dxJoint::getInfo2() above
1087 // findex iteration is compact and is not going to pollute caches - do it first
1089 // adjust returned findex values for global index numbering
1090 int *const findicesEnd
= findexRow
+ infom
;
1091 for (int *findexCurr
= findexRow
; findexCurr
!= findicesEnd
; ++findexCurr
) {
1092 int fival
= *findexCurr
;
1094 *findexCurr
= fival
+ ofsi
;
1099 dReal
*const endRhsCfm
= rowRhsCfm
+ infom
* RCE__RHS_CFM_MAX
;
1100 for (dReal
*currRhsCfm
= rowRhsCfm
; currRhsCfm
!= endRhsCfm
; currRhsCfm
+= RCE__RHS_CFM_MAX
) {
1101 currRhsCfm
[RCE_RHS
] *= stepsizeRecip
;
1102 currRhsCfm
[RCE_CFM
] *= stepsizeRecip
;
1110 int dxStepIsland_Stage2aSync_Callback(void *_stage2CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1112 (void)callInstanceIndex
; // unused
1113 dxStepperStage2CallContext
*stage2CallContext
= (dxStepperStage2CallContext
*)_stage2CallContext
;
1114 const dxStepperProcessingCallContext
*callContext
= stage2CallContext
->m_stepperCallContext
;
1115 const unsigned allowedThreads
= callContext
->m_stepperAllowedThreads
;
1117 dIASSERT(allowedThreads
> 1); /*if (allowedThreads > 1) */{ // The allowed thread count is greater than one as otherwise current function would not be scheduled for execution from the previous stage
1118 dxWorld
*world
= callContext
->m_world
;
1119 world
->AlterThreadedCallDependenciesCount(callThisReleasee
, allowedThreads
- 1);
1120 world
->PostThreadedCallsGroup(NULL
, allowedThreads
- 1, callThisReleasee
, &dxStepIsland_Stage2b_Callback
, stage2CallContext
, "StepIsland Stage2b");
1122 dxStepIsland_Stage2b(stage2CallContext
);
1128 int dxStepIsland_Stage2b_Callback(void *_stage2CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1130 (void)callInstanceIndex
; // unused
1131 (void)callThisReleasee
; // unused
1132 dxStepperStage2CallContext
*stage2CallContext
= (dxStepperStage2CallContext
*)_stage2CallContext
;
1133 dxStepIsland_Stage2b(stage2CallContext
);
1138 void dxStepIsland_Stage2b(dxStepperStage2CallContext
*stage2CallContext
)
1140 const dxStepperProcessingCallContext
*callContext
= stage2CallContext
->m_stepperCallContext
;
1141 const dxStepperLocalContext
*localContext
= stage2CallContext
->m_localContext
;
1142 dJointWithInfo1
*jointinfos
= localContext
->m_jointinfos
;
1143 unsigned int nj
= localContext
->m_nj
;
1144 const unsigned int *mindex
= localContext
->m_mindex
;
1148 // This code depends on cfm elements and therefore must be in different sub-stage
1149 // from Jacobian construction in Stage2a to ensure proper synchronization
1150 // and avoid accessing numbers being modified.
1152 dReal
*A
= localContext
->m_A
;
1153 const dReal
*pairsRhsCfm
= localContext
->m_pairsRhsCfm
;
1154 const unsigned m
= localContext
->m_m
;
1156 const unsigned int mskip
= dPAD(m
);
1159 while ((ji
= ThrsafeIncrementIntUpToLimit(&stage2CallContext
->m_ji_Ainit
, nj
)) != nj
) {
1160 const unsigned ofsi
= mindex
[ji
];
1161 const unsigned int infom
= mindex
[ji
+ 1] - ofsi
;
1163 dReal
*Arow
= A
+ (sizeint
)mskip
* ofsi
;
1164 dSetZero(Arow
, (sizeint
)mskip
* infom
);
1165 dReal
*Adiag
= Arow
+ ofsi
;
1166 const dReal
*rowRfsCrm
= pairsRhsCfm
+ (sizeint
)ofsi
* RCE__RHS_CFM_MAX
;
1167 for (unsigned int i
= 0; i
!= infom
; Adiag
+= mskip
, ++i
) {
1168 Adiag
[i
] = (rowRfsCrm
+ i
* RCE__RHS_CFM_MAX
)[RCE_CFM
];
1175 // This code depends on J elements and therefore must be in different sub-stage
1176 // from Jacobian construction in Stage2a to ensure proper synchronization
1177 // and avoid accessing numbers being modified.
1179 const dReal
*invI
= localContext
->m_invI
;
1180 const dReal
*J
= localContext
->m_J
;
1181 dReal
*JinvM
= stage2CallContext
->m_JinvM
;
1183 // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
1184 // format as J so we just go through the constraints in J multiplying by
1185 // the appropriate scalars and matrices.
1187 while ((ji
= ThrsafeIncrementIntUpToLimit(&stage2CallContext
->m_ji_JinvM
, nj
)) != nj
) {
1188 const unsigned ofsi
= mindex
[ji
];
1189 const unsigned int infom
= mindex
[ji
+ 1] - ofsi
;
1191 dReal
*Jdst
= JinvM
+ (sizeint
)ofsi
* (2 * JIM__MAX
);
1192 dSetZero(Jdst
, infom
* (2 * JIM__MAX
));
1194 const dReal
*Jsrc
= J
+ (sizeint
)ofsi
* (2 * JME__MAX
);
1195 dxJoint
*joint
= jointinfos
[ji
].joint
;
1197 dxBody
*jb0
= joint
->node
[0].body
;
1198 if (true || jb0
!= NULL
) { // -- always true
1199 dReal body_invMass0
= jb0
->invMass
;
1200 const dReal
*body_invI0
= invI
+ (sizeint
)(unsigned int)jb0
->tag
* dM3E__MAX
;
1201 for (unsigned int j
= infom
; j
!= 0; --j
) {
1202 for (unsigned int k
= dSA__MIN
; k
!= dSA__MAX
; ++k
) Jdst
[JIM__L_AXES_MIN
+ k
] = Jsrc
[JME__JL_MIN
+ k
] * body_invMass0
;
1203 dMultiply0_133(Jdst
+ JIM__A_AXES_MIN
, Jsrc
+ JME__JA_MIN
, body_invI0
);
1209 dxBody
*jb1
= joint
->node
[1].body
;
1211 dReal body_invMass1
= jb1
->invMass
;
1212 const dReal
*body_invI1
= invI
+ (sizeint
)(unsigned int)jb1
->tag
* dM3E__MAX
;
1213 for (unsigned int j
= infom
; j
!= 0; --j
) {
1214 for (unsigned int k
= dSA__MIN
; k
!= dSA__MAX
; ++k
) Jdst
[JIM__L_AXES_MIN
+ k
] = Jsrc
[JME__JL_MIN
+ k
] * body_invMass1
;
1215 dMultiply0_133 (Jdst
+ JIM__A_AXES_MIN
, Jsrc
+ JME__JA_MIN
, body_invI1
);
1225 // This code reads facc/tacc fields of body objects which (the fields)
1226 // may be modified by dxJoint::getInfo2(). Therefore the code must be
1227 // in different sub-stage from Jacobian construction in Stage2a
1228 // to ensure proper synchronization and avoid accessing numbers being modified.
1230 dxBody
* const *const body
= callContext
->m_islandBodiesStart
;
1231 const unsigned int nb
= callContext
->m_islandBodiesCount
;
1232 const dReal
*invI
= localContext
->m_invI
;
1233 atomicord32
*bodyStartJoints
= localContext
->m_bodyStartJoints
;
1234 dReal
*rhs_tmp
= stage2CallContext
->m_rhs_tmp
;
1236 // compute the right hand side `rhs'
1237 const dReal stepsizeRecip
= dRecip(callContext
->m_stepSize
);
1239 // put v/h + invM*fe into rhs_tmp
1241 while ((bi
= ThrsafeIncrementIntUpToLimit(&stage2CallContext
->m_bi_rhs_tmp
, nb
)) != nb
) {
1242 dReal
*tmp1curr
= rhs_tmp
+ (sizeint
)bi
* dDA__MAX
;
1243 const dReal
*invIrow
= invI
+ (sizeint
)bi
* dM3E__MAX
;
1244 dxBody
*b
= body
[bi
];
1245 // dSetZero(tmp1curr, 8); -- not needed
1246 for (unsigned int j
= dSA__MIN
; j
!= dSA__MAX
; ++j
) tmp1curr
[dDA__L_MIN
+ j
] = b
->facc
[dV3E__AXES_MIN
+ j
] * b
->invMass
+ b
->lvel
[dV3E__AXES_MIN
+ j
] * stepsizeRecip
;
1247 dMultiply0_331 (tmp1curr
+ dDA__A_MIN
, invIrow
, b
->tacc
);
1248 for (unsigned int k
= dSA__MIN
; k
!= dSA__MAX
; ++k
) tmp1curr
[dDA__A_MIN
+ k
] += b
->avel
[dV3E__AXES_MIN
+ k
] * stepsizeRecip
;
1249 // Initialize body start joint indices -- this will be needed later for building body related joint list in dxStepIsland_Stage2c
1250 bodyStartJoints
[bi
] = 0;
1256 int dxStepIsland_Stage2bSync_Callback(void *_stage2CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1258 (void)callInstanceIndex
; // unused
1259 dxStepperStage2CallContext
*stage2CallContext
= (dxStepperStage2CallContext
*)_stage2CallContext
;
1260 const dxStepperProcessingCallContext
*callContext
= stage2CallContext
->m_stepperCallContext
;
1261 const unsigned allowedThreads
= callContext
->m_stepperAllowedThreads
;
1263 dIASSERT(allowedThreads
> 1); /*if (allowedThreads > 1) */{ // The allowed thread count is greater than one as otherwise current function would not be scheduled for execution from the previous stage
1264 dxWorld
*world
= callContext
->m_world
;
1265 world
->AlterThreadedCallDependenciesCount(callThisReleasee
, allowedThreads
- 1);
1266 world
->PostThreadedCallsGroup(NULL
, allowedThreads
- 1, callThisReleasee
, &dxStepIsland_Stage2c_Callback
, stage2CallContext
, "StepIsland Stage2c");
1268 dxStepIsland_Stage2c(stage2CallContext
);
1275 int dxStepIsland_Stage2c_Callback(void *_stage2CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1277 (void)callInstanceIndex
; // unused
1278 (void)callThisReleasee
; // unused
1279 dxStepperStage2CallContext
*stage2CallContext
= (dxStepperStage2CallContext
*)_stage2CallContext
;
1280 dxStepIsland_Stage2c(stage2CallContext
);
1285 void dxStepIsland_Stage2c(dxStepperStage2CallContext
*stage2CallContext
)
1287 //const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
1288 const dxStepperLocalContext
*localContext
= stage2CallContext
->m_localContext
;
1289 dJointWithInfo1
*jointinfos
= localContext
->m_jointinfos
;
1290 unsigned int nj
= localContext
->m_nj
;
1291 const unsigned int *mindex
= localContext
->m_mindex
;
1295 // This code depends on A elements and JinvM elements and therefore
1296 // must be in a different sub-stage from A initialization and JinvM calculation in Stage2b
1297 // to ensure proper synchronization and avoid accessing numbers being modified.
1299 dReal
*A
= localContext
->m_A
;
1300 const dReal
*JinvM
= stage2CallContext
->m_JinvM
;
1301 const dReal
*J
= localContext
->m_J
;
1302 const unsigned m
= localContext
->m_m
;
1304 // now compute A = JinvM * J'. A's rows and columns are grouped by joint,
1305 // i.e. in the same way as the rows of J. block (i,j) of A is only nonzero
1306 // if joints i and j have at least one body in common.
1307 const unsigned int mskip
= dPAD(m
);
1310 while ((ji
= ThrsafeIncrementIntUpToLimit(&stage2CallContext
->m_ji_Aaddjb
, nj
)) != nj
) {
1311 const unsigned ofsi
= mindex
[ji
];
1312 const unsigned int infom
= mindex
[ji
+ 1] - ofsi
;
1314 dReal
*Arow
= A
+ (sizeint
)mskip
* ofsi
;
1315 const dReal
*JinvMRow
= JinvM
+ (sizeint
)ofsi
* (2 * JIM__MAX
);
1316 dxJoint
*joint
= jointinfos
[ji
].joint
;
1318 dxBody
*jb0
= joint
->node
[0].body
;
1319 if (true || jb0
!= NULL
) { // -- always true
1320 // compute diagonal block of A
1321 const dReal
*JRow
= J
+ (sizeint
)ofsi
* (2 * JME__MAX
);
1322 MultiplyAddJinvMxJToA (Arow
+ ofsi
, JinvMRow
, JRow
, infom
, infom
, mskip
);
1324 for (dxJointNode
*n0
= (ji
!= 0 ? jb0
->firstjoint
: NULL
); n0
; n0
= n0
->next
) {
1325 // if joint was tagged as -1 then it is an inactive (m=0 or disabled)
1326 // joint that should not be considered
1327 int j0
= n0
->joint
->tag
;
1328 if (j0
!= -1 && (unsigned)j0
< ji
) {
1329 const unsigned int jiother_ofsi
= mindex
[j0
];
1330 const unsigned int jiother_infom
= mindex
[j0
+ 1] - jiother_ofsi
;
1331 const dJointWithInfo1
*jiother
= jointinfos
+ j0
;
1332 unsigned int smart_infom
= (jiother
->joint
->node
[1].body
== jb0
) ? jiother_infom
: 0;
1334 const dReal
*JOther
= J
+ ((sizeint
)jiother_ofsi
* 2 + smart_infom
) * JME__MAX
;
1335 MultiplyAddJinvMxJToA (Arow
+ jiother_ofsi
, JinvMRow
, JOther
, infom
, jiother_infom
, mskip
);
1340 dxBody
*jb1
= joint
->node
[1].body
;
1341 dIASSERT(jb1
!= jb0
);
1343 const dReal
*JinvMOther
= JinvMRow
+ infom
* JIM__MAX
;
1344 // compute diagonal block of A
1345 const dReal
*JRow
= J
+ ((sizeint
)ofsi
* 2 + infom
) * JME__MAX
;
1346 MultiplyAddJinvMxJToA (Arow
+ ofsi
, JinvMOther
, JRow
, infom
, infom
, mskip
);
1348 for (dxJointNode
*n1
= (ji
!= 0 ? jb1
->firstjoint
: NULL
); n1
; n1
= n1
->next
) {
1349 // if joint was tagged as -1 then it is an inactive (m=0 or disabled)
1350 // joint that should not be considered
1351 int j1
= n1
->joint
->tag
;
1352 if (j1
!= -1 && (unsigned)j1
< ji
) {
1353 const unsigned int jiother_ofsi
= mindex
[j1
];
1354 const unsigned int jiother_infom
= mindex
[j1
+ 1] - jiother_ofsi
;
1355 const dJointWithInfo1
*jiother
= jointinfos
+ j1
;
1356 unsigned int smart_infom
= (jiother
->joint
->node
[1].body
== jb1
) ? jiother_infom
: 0;
1358 const dReal
*JOther
= J
+ ((sizeint
)jiother_ofsi
* 2 + smart_infom
) * JME__MAX
;
1359 MultiplyAddJinvMxJToA (Arow
+ jiother_ofsi
, JinvMOther
, JOther
, infom
, jiother_infom
, mskip
);
1368 // This code depends on rhs_tmp elements and therefore must be in
1369 // different sub-stage from rhs_tmp calculation in Stage2b to ensure
1370 // proper synchronization and avoid accessing numbers being modified.
1372 const dReal
*J
= localContext
->m_J
;
1373 const dReal
*rhs_tmp
= stage2CallContext
->m_rhs_tmp
;
1374 dReal
*pairsRhsCfm
= localContext
->m_pairsRhsCfm
;
1375 atomicord32
*bodyStartJoints
= localContext
->m_bodyStartJoints
;
1376 atomicord32
*bodyJointLinks
= localContext
->m_bodyJointLinks
;
1378 // compute the right hand side `rhs'
1379 // put J*rhs_tmp into rhs
1381 while ((ji
= ThrsafeIncrementIntUpToLimit(&stage2CallContext
->m_ji_rhs
, nj
)) != nj
) {
1382 const unsigned ofsi
= mindex
[ji
];
1383 const unsigned int infom
= mindex
[ji
+ 1] - ofsi
;
1385 dReal
*currRhsCfm
= pairsRhsCfm
+ (sizeint
)ofsi
* RCE__RHS_CFM_MAX
;
1386 const dReal
*JRow
= J
+ (sizeint
)ofsi
* (2 * JME__MAX
);
1388 dxJoint
*joint
= jointinfos
[ji
].joint
;
1390 dxBody
*jb0
= joint
->node
[0].body
;
1391 if (true || jb0
!= NULL
) { // -- always true
1392 unsigned bodyIndex
= (unsigned)jb0
->tag
;
1393 MultiplySubJxRhsTmpFromRHS (currRhsCfm
, JRow
, rhs_tmp
+ (sizeint
)bodyIndex
* dDA__MAX
, infom
);
1395 // Link joints connected to each body into a list to be used on results incorporation. The bodyStartJoints have been initialized in dxStepIsland_Stage2b.
1396 const atomicord32 linkIndex
= (atomicord32
)((sizeint
)ji
* dJCB__MAX
+ dJCB_FIRST_BODY
); // It is asserted at links buffer allocation that the indices can't overflow atomicord32
1397 for (atomicord32 oldStartIndex
= bodyStartJoints
[bodyIndex
]; ; oldStartIndex
= bodyStartJoints
[bodyIndex
]) {
1398 bodyJointLinks
[linkIndex
] = oldStartIndex
;
1399 if (ThrsafeCompareExchange(&bodyStartJoints
[bodyIndex
], oldStartIndex
, linkIndex
+ 1)) { // The link index is stored incremented to allow 0 as end indicator
1405 dxBody
*jb1
= joint
->node
[1].body
;
1407 unsigned bodyIndex
= (unsigned)jb1
->tag
;
1408 MultiplySubJxRhsTmpFromRHS (currRhsCfm
, JRow
+ infom
* JME__MAX
, rhs_tmp
+ (sizeint
)bodyIndex
* dDA__MAX
, infom
);
1410 // Link joints connected to each body into a list to be used on results incorporation. The bodyStartJoints have been initialized in dxStepIsland_Stage2b
1411 const atomicord32 linkIndex
= (atomicord32
)((sizeint
)ji
* dJCB__MAX
+ dJCB_SECOND_BODY
); // It is asserted at links buffer allocation that the indices can't overflow atomicord32
1412 for (atomicord32 oldStartIndex
= bodyStartJoints
[bodyIndex
]; ; oldStartIndex
= bodyStartJoints
[bodyIndex
]) {
1413 bodyJointLinks
[linkIndex
] = oldStartIndex
;
1414 if (ThrsafeCompareExchange(&bodyStartJoints
[bodyIndex
], oldStartIndex
, linkIndex
+ 1)) { // The link index is stored incremented to allow 0 as end indicator
1425 int dxStepIsland_Stage3_Callback(void *_stage3CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1427 (void)callInstanceIndex
; // unused
1428 (void)callThisReleasee
; // unused
1429 dxStepperStage3CallContext
*stage3CallContext
= (dxStepperStage3CallContext
*)_stage3CallContext
;
1430 dxStepIsland_Stage3(stage3CallContext
);
1435 void dxStepIsland_Stage3(dxStepperStage3CallContext
*stage3CallContext
)
1437 const dxStepperProcessingCallContext
*callContext
= stage3CallContext
->m_stepperCallContext
;
1438 const dxStepperLocalContext
*localContext
= stage3CallContext
->m_localContext
;
1440 dxWorldProcessMemArena
*memarena
= callContext
->m_stepperArena
;
1441 memarena
->RestoreState(stage3CallContext
->m_stage1MemArenaState
);
1442 stage3CallContext
= NULL
; // WARNING! stage3CallContext is not valid after this point!
1443 dIVERIFY(stage3CallContext
== NULL
); // To suppress unused variable assignment warnings
1445 unsigned int m
= localContext
->m_m
;
1446 unsigned int nub
= localContext
->m_nub
;
1447 //const unsigned int *mindex = localContext->m_mindex;
1448 int *findex
= localContext
->m_findex
;
1449 dReal
*A
= localContext
->m_A
;
1450 dReal
*pairsRhsLambda
= localContext
->m_pairsRhsCfm
; // Reuse cfm buffer for lambdas as the former values are not needed anymore
1451 dReal
*pairsLoHi
= localContext
->m_pairsLoHi
;
1454 BEGIN_STATE_SAVE(memarena
, lcpstate
) {
1455 IFTIMING(dTimerNow ("solve LCP problem"));
1457 // solve the LCP problem and get lambda.
1458 // this will destroy A but that's OK
1459 dxSolveLCP (memarena
, m
, A
, pairsRhsLambda
, NULL
, nub
, pairsLoHi
, findex
);
1460 dSASSERT((int)RLE__RHS_LAMBDA_MAX
== PBX__MAX
&& (int)RLE_RHS
== PBX_B
&& (int)RLE_LAMBDA
== PBX_X
);
1461 dSASSERT((int)LHE__LO_HI_MAX
== PLH__MAX
&& (int)LHE_LO
== PLH_LO
&& (int)LHE_HI
== PLH_HI
);
1463 } END_STATE_SAVE(memarena
, lcpstate
);
1466 // void *stage3MemarenaState = memarena->SaveState();
1468 dxStepperStage4CallContext
*stage4CallContext
= (dxStepperStage4CallContext
*)memarena
->AllocateBlock(sizeof(dxStepperStage4CallContext
));
1469 stage4CallContext
->Initialize(callContext
, localContext
/*, stage3MemarenaState*/);
1471 const unsigned allowedThreads
= callContext
->m_stepperAllowedThreads
;
1472 dIASSERT(allowedThreads
!= 0);
1474 if (allowedThreads
== 1) {
1475 IFTIMING(dTimerNow ("compute and apply constraint force"));
1476 dxStepIsland_Stage4(stage4CallContext
);
1477 IFTIMING(dTimerEnd());
1480 IFTIMING(dTimerReport(stdout
,1));
1484 dCallReleaseeID finalReleasee
= callContext
->m_finalReleasee
;
1485 dxWorld
*world
= callContext
->m_world
;
1486 world
->AlterThreadedCallDependenciesCount(finalReleasee
, allowedThreads
- 1);
1487 world
->PostThreadedCallsGroup(NULL
, allowedThreads
- 1, finalReleasee
, &dxStepIsland_Stage4_Callback
, stage4CallContext
, "StepIsland Stage4");
1488 // Note: Adding another dependency for the finalReleasee is not necessary as it already depends on the current call
1489 dxStepIsland_Stage4(stage4CallContext
);
1494 int dxStepIsland_Stage4_Callback(void *_stage4CallContext
, dcallindex_t callInstanceIndex
, dCallReleaseeID callThisReleasee
)
1496 (void)callInstanceIndex
; // unused
1497 (void)callThisReleasee
; // unused
1498 dxStepperStage4CallContext
*stage4CallContext
= (dxStepperStage4CallContext
*)_stage4CallContext
;
1499 dxStepIsland_Stage4(stage4CallContext
);
1504 void dxStepIsland_Stage4(dxStepperStage4CallContext
*stage4CallContext
)
1506 const dxStepperProcessingCallContext
*callContext
= stage4CallContext
->m_stepperCallContext
;
1507 const dxStepperLocalContext
*localContext
= stage4CallContext
->m_localContext
;
1509 const dReal stepSize
= callContext
->m_stepSize
;
1510 dxBody
*const *bodies
= callContext
->m_islandBodiesStart
;
1511 dReal
*invI
= localContext
->m_invI
;
1512 dJointWithInfo1
*jointInfos
= localContext
->m_jointinfos
;
1513 dReal
*J
= localContext
->m_J
;
1514 dReal
*pairsRhsLambda
= localContext
->m_pairsRhsCfm
;
1515 const unsigned int *mIndex
= localContext
->m_mindex
;
1516 atomicord32
*bodyStartJoints
= localContext
->m_bodyStartJoints
;
1517 atomicord32
*bodyJointLinks
= localContext
->m_bodyJointLinks
;
1518 const unsigned int nb
= callContext
->m_islandBodiesCount
;
1521 while ((bi
= ThrsafeIncrementIntUpToLimit(&stage4CallContext
->m_bi_constrForce
, nb
)) != nb
) {
1522 dVector3 angularForceAccumulator
;
1523 dxBody
*b
= bodies
[bi
];
1524 const dReal
*invIrow
= invI
+ (sizeint
)bi
* dM3E__MAX
;
1525 dReal body_invMass_mul_stepSize
= stepSize
* b
->invMass
;
1527 dReal bodyConstrForce
[CFE__MAX
];
1528 bool constrForceAvailable
= false;
1530 unsigned linkIndex
= bodyStartJoints
!= NULL
? bodyStartJoints
[bi
] : 0;
1531 if (linkIndex
!= 0) {
1532 dSetZero(bodyConstrForce
, dARRAY_SIZE(bodyConstrForce
));
1535 // compute the constraint force as constrForce = J'*lambda
1536 for (; linkIndex
!= 0; constrForceAvailable
= true, linkIndex
= bodyJointLinks
[linkIndex
- 1]) {
1537 unsigned jointIndex
= (linkIndex
- 1) / dJCB__MAX
;
1538 unsigned jointBodyIndex
= (linkIndex
- 1) % dJCB__MAX
;
1540 const dJointWithInfo1
*currJointInfo
= jointInfos
+ jointIndex
;
1541 unsigned ofsi
= mIndex
[jointIndex
];
1542 dIASSERT(dIN_RANGE(jointIndex
, 0, localContext
->m_nj
));
1544 const dReal
*JRow
= J
+ (sizeint
)ofsi
* (2 * JME__MAX
);
1545 const dReal
*rowRhsLambda
= pairsRhsLambda
+ (sizeint
)ofsi
* RLE__RHS_LAMBDA_MAX
;
1547 dxJoint
*joint
= currJointInfo
->joint
;
1548 const unsigned int infom
= currJointInfo
->info
.m
;
1550 // unsigned jRowExtraOffset = jointBodyIndex * infom * JME__MAX;
1551 unsigned jRowExtraOffset
= jointBodyIndex
!= dJCB__MIN
? infom
* JME__MAX
: 0;
1552 dSASSERT(dJCB__MAX
== 2);
1554 dJointFeedback
*fb
= joint
->feedback
;
1555 MultiplyAddJxLambdaToCForce(bodyConstrForce
, JRow
+ jRowExtraOffset
, rowRhsLambda
, infom
, fb
, jointBodyIndex
);
1558 // compute the velocity update
1559 if (constrForceAvailable
) {
1560 // add fe to cforce and multiply cforce by stepSize
1561 for (unsigned int j
= dSA__MIN
; j
!= dSA__MAX
; ++j
) {
1562 b
->lvel
[dV3E__AXES_MIN
+ j
] += (bodyConstrForce
[CFE__L_MIN
+ j
] + b
->facc
[dV3E__AXES_MIN
+ j
]) * body_invMass_mul_stepSize
;
1564 for (unsigned int k
= dSA__MIN
; k
!= dSA__MAX
; ++k
) {
1565 angularForceAccumulator
[dV3E__AXES_MIN
+ k
] = (bodyConstrForce
[CFE__A_MIN
+ k
] + b
->tacc
[dV3E__AXES_MIN
+ k
]) * stepSize
;
1569 // add fe to cforce and multiply cforce by stepSize
1570 dAddVectorScaledVector3(b
->lvel
, b
->lvel
, b
->facc
, body_invMass_mul_stepSize
);
1571 dCopyScaledVector3(angularForceAccumulator
, b
->tacc
, stepSize
);
1574 dMultiplyAdd0_331 (b
->avel
, invIrow
, angularForceAccumulator
+ dV3E__AXES_MIN
);
1576 // update the position and orientation from the new linear/angular velocity
1577 // (over the given time step)
1578 dxStepBody (b
, stepSize
);
1580 // zero all force accumulators
1581 dZeroVector3(b
->facc
);
1582 dZeroVector3(b
->tacc
);
1587 //****************************************************************************
1590 sizeint
dxEstimateStepMemoryRequirements (dxBody
* const *body
, unsigned int nb
, dxJoint
* const *_joint
, unsigned int _nj
)
1592 (void)body
; // unused
1596 unsigned int njcurr
= 0, mcurr
= 0;
1597 dxJoint::SureMaxInfo info
;
1598 dxJoint
*const *const _jend
= _joint
+ _nj
;
1599 for (dxJoint
*const *_jcurr
= _joint
; _jcurr
!= _jend
; ++_jcurr
) {
1600 dxJoint
*j
= *_jcurr
;
1601 j
->getSureMaxInfo (&info
);
1603 unsigned int jm
= info
.max_m
;
1610 nj
= njcurr
; m
= mcurr
;
1615 res
+= dOVERALIGNED_SIZE(sizeof(dReal
) * dM3E__MAX
* nb
, INVI_ALIGNMENT
); // for invI
1618 sizeint sub1_res1
= dEFFICIENT_SIZE(sizeof(dJointWithInfo1
) * 2 * _nj
); // for initial jointinfos
1620 // The array can't grow right more than by nj
1621 sizeint sub1_res2
= dEFFICIENT_SIZE(sizeof(dJointWithInfo1
) * ((sizeint
)_nj
+ (sizeint
)nj
)); // for shrunk jointinfos
1622 sub1_res2
+= dEFFICIENT_SIZE(sizeof(dxStepperLocalContext
)); //for dxStepperLocalContext
1624 sub1_res2
+= dEFFICIENT_SIZE(sizeof(unsigned int) * (nj
+ 1)); // for mindex
1625 sub1_res2
+= dEFFICIENT_SIZE(sizeof(int) * m
); // for findex
1626 sub1_res2
+= dEFFICIENT_SIZE(sizeof(dReal
) * 2 * JME__MAX
* m
); // for J
1627 unsigned int mskip
= dPAD(m
);
1628 sub1_res2
+= dOVERALIGNED_SIZE(sizeof(dReal
) * mskip
* m
, AMATRIX_ALIGNMENT
); // for A
1629 sub1_res2
+= dEFFICIENT_SIZE(sizeof(dReal
) * RCE__RHS_CFM_MAX
* m
); // for pairsRhsCfm
1630 sub1_res2
+= dEFFICIENT_SIZE(sizeof(dReal
) * LHE__LO_HI_MAX
* m
); // for pairsLoHi
1631 sub1_res2
+= dEFFICIENT_SIZE(sizeof(atomicord32
) * nb
); // for bodyStartJoints
1632 sub1_res2
+= dEFFICIENT_SIZE(sizeof(atomicord32
)* dJCB__MAX
* nj
); // for bodyJointLinks
1636 sizeint sub2_res1
= dEFFICIENT_SIZE(sizeof(dxStepperStage3CallContext
)); // for dxStepperStage3CallContext
1638 sizeint sub2_res2
= 0;
1640 sizeint sub2_res3
= dEFFICIENT_SIZE(sizeof(dxStepperStage4CallContext
)); // for dxStepperStage4CallContext
1643 sub2_res1
+= dOVERALIGNED_SIZE(sizeof(dReal
) * 2 * JIM__MAX
* m
, JINVM_ALIGNMENT
); // for JinvM
1644 sub2_res1
+= dEFFICIENT_SIZE(sizeof(dReal
) * dDA__MAX
* nb
); // for rhs_tmp
1645 sub2_res1
+= dEFFICIENT_SIZE(sizeof(dxStepperStage2CallContext
)); // for dxStepperStage2CallContext
1647 sub2_res2
+= dxEstimateSolveLCPMemoryReq(m
, false);
1650 sub1_res2
+= dMAX(sub2_res1
, dMAX(sub2_res2
, sub2_res3
));
1653 sizeint sub1_res12_max
= dMAX(sub1_res1
, sub1_res2
);
1654 sizeint stage01_contexts
= dEFFICIENT_SIZE(sizeof(dxStepperStage0BodiesCallContext
))
1655 + dEFFICIENT_SIZE(sizeof(dxStepperStage0JointsCallContext
))
1656 + dEFFICIENT_SIZE(sizeof(dxStepperStage1CallContext
));
1657 res
+= dMAX(sub1_res12_max
, stage01_contexts
);
1665 unsigned dxEstimateStepMaxCallCount(
1666 unsigned /*activeThreadCount*/, unsigned allowedThreadCount
)
1668 unsigned result
= 1 // dxStepIsland itself
1669 + (2 * allowedThreadCount
+ 2) // (dxStepIsland_Stage2a + dxStepIsland_Stage2b) * allowedThreadCount + 2 * dxStepIsland_Stage2?_Sync
1670 + 1; // dxStepIsland_Stage3