Add a TriMesh to TriMesh collision demo.
[ode.git] / ode / src / step.cpp
blobfcd72098e1582ec44853b4d16dc26944afcafc62
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
23 #include <ode/odeconfig.h>
24 #include <ode/rotation.h>
25 #include <ode/timer.h>
26 #include <ode/error.h>
27 #include "config.h"
28 #include "odemath.h"
29 #include "matrix.h"
30 #include "objects.h"
31 #include "joints/joint.h"
32 #include "lcp.h"
33 #include "util.h"
34 #include "threadingutils.h"
36 #include <new>
39 #define dMIN(A,B) ((A)>(B) ? (B) : (A))
40 #define dMAX(A,B) ((B)>(A) ? (B) : (A))
42 //****************************************************************************
43 // misc defines
45 //#define TIMING
48 #ifdef TIMING
49 #define IFTIMING(x) x
50 #else
51 #define IFTIMING(x) ((void)0)
52 #endif
55 struct dJointWithInfo1
57 dxJoint *joint;
58 dxJoint::Info1 info;
61 enum dxRHSCFMElement
63 RCE_RHS = dxJoint::GI2_RHS,
64 RCE_CFM = dxJoint::GI2_CFM,
66 // Elements for array reuse
67 RLE_RHS = RCE_RHS,
68 RLE_LAMBDA = RCE_CFM,
70 RCE__RHS_CFM_MAX = dxJoint::GI2__RHS_CFM_MAX,
71 RLE__RHS_LAMBDA_MAX = RCE__RHS_CFM_MAX,
74 enum dxLoHiElement
76 LHE_LO = dxJoint::GI2_LO,
77 LHE_HI = dxJoint::GI2_HI,
79 LHE__LO_HI_MAX = dxJoint::GI2__LO_HI_MAX,
82 enum dxJacobiVectorElement
84 JVE__MIN,
86 JVE__L_MIN = JVE__MIN + dDA__L_MIN,
88 JVE_LX = JVE__L_MIN + dSA_X,
89 JVE_LY = JVE__L_MIN + dSA_Y,
90 JVE_LZ = JVE__L_MIN + dSA_Z,
92 JVE__L_MAX = JVE__L_MIN + dSA__MAX,
94 JVE__A_MIN = JVE__MIN + dDA__A_MIN,
96 JVE_AX = JVE__A_MIN + dSA_X,
97 JVE_AY = JVE__A_MIN + dSA_Y,
98 JVE_AZ = JVE__A_MIN + dSA_Z,
100 JVE__A_MAX = JVE__A_MIN + dSA__MAX,
102 JVE__MAX = JVE__MIN + dDA__MAX,
104 JVE__L_COUNT = JVE__L_MAX - JVE__L_MIN,
105 JVE__A_COUNT = JVE__A_MAX - JVE__A_MIN,
109 enum dxJacobiMatrixElement
111 JME__MIN,
113 JME__J_MIN = JME__MIN,
114 JME__JL_MIN = JME__J_MIN + JVE__L_MIN,
116 JME_JLX = JME__J_MIN + JVE_LX,
117 JME_JLY = JME__J_MIN + JVE_LY,
118 JME_JLZ = JME__J_MIN + JVE_LZ,
120 JME__JL_MAX = JME__J_MIN + JVE__L_MAX,
122 JME__JA_MIN = JME__J_MIN + JVE__A_MIN,
124 JME_JAX = JME__J_MIN + JVE_AX,
125 JME_JAY = JME__J_MIN + JVE_AY,
126 JME_JAZ = JME__J_MIN + JVE_AZ,
128 JME__JA_MAX = JME__J_MIN + JVE__A_MAX,
129 JME__J_MAX = JME__J_MIN + JVE__MAX,
131 JME__MAX = JME__J_MAX,
133 JME__J_COUNT = JME__J_MAX - JME__J_MIN,
136 enum dxJInvMElement
138 JIM__MIN,
140 JIM__L_MIN = JIM__MIN + dMD_LINEAR * dV3E__MAX,
142 JIM__L_AXES_MIN = JIM__L_MIN + dV3E__AXES_MIN,
144 JIM_LX = JIM__L_MIN + dV3E_X,
145 JIM_LY = JIM__L_MIN + dV3E_Y,
146 JIM_LZ = JIM__L_MIN + dV3E_Z,
148 JIM__L_AXES_MAX = JIM__L_MIN + dV3E__AXES_MAX,
150 JIM_LPAD = JIM__L_MIN + dV3E_PAD,
152 JIM__L_MAX = JIM__L_MIN + dV3E__MAX,
154 JIM__A_MIN = JIM__MIN + dMD_ANGULAR * dV3E__MAX,
156 JIM__A_AXES_MIN = JIM__A_MIN + dV3E__AXES_MIN,
158 JIM_AX = JIM__A_MIN + dV3E_X,
159 JIM_AY = JIM__A_MIN + dV3E_Y,
160 JIM_AZ = JIM__A_MIN + dV3E_Z,
162 JIM__A_AXES_MAX = JIM__A_MIN + dV3E__AXES_MAX,
164 JIM_APAD = JIM__A_MIN + dV3E_PAD,
166 JIM__A_MAX = JIM__A_MIN + dV3E__MAX,
168 JIM__MAX = JIM__MIN + dMD__MAX * dV3E__MAX,
171 enum dxContactForceElement
173 CFE__MIN,
175 CFE__DYNAMICS_MIN = CFE__MIN,
177 CFE__L_MIN = CFE__DYNAMICS_MIN + dDA__L_MIN,
179 CFE_LX = CFE__DYNAMICS_MIN + dDA_LX,
180 CFE_LY = CFE__DYNAMICS_MIN + dDA_LY,
181 CFE_LZ = CFE__DYNAMICS_MIN + dDA_LZ,
183 CFE__L_MAX = CFE__DYNAMICS_MIN + dDA__L_MAX,
185 CFE__A_MIN = CFE__DYNAMICS_MIN + dDA__A_MIN,
187 CFE_AX = CFE__DYNAMICS_MIN + dDA_AX,
188 CFE_AY = CFE__DYNAMICS_MIN + dDA_AY,
189 CFE_AZ = CFE__DYNAMICS_MIN + dDA_AZ,
191 CFE__A_MAX = CFE__DYNAMICS_MIN + dDA__A_MAX,
193 CFE__DYNAMICS_MAX = CFE__DYNAMICS_MIN + dDA__MAX,
195 CFE__MAX = CFE__DYNAMICS_MAX,
199 #define AMATRIX_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
200 #define INVI_ALIGNMENT dMAX(32, EFFICIENT_ALIGNMENT)
201 #define JINVM_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
203 struct dxStepperStage0Outputs
205 sizeint ji_start;
206 sizeint ji_end;
207 unsigned int m;
208 unsigned int nub;
211 struct dxStepperStage1CallContext
213 void Initialize(const dxStepperProcessingCallContext *stepperCallContext, void *stageMemArenaState, dReal *invI, dJointWithInfo1 *jointinfos)
215 m_stepperCallContext = stepperCallContext;
216 m_stageMemArenaState = stageMemArenaState;
217 m_invI = invI;
218 m_jointinfos = jointinfos;
221 const dxStepperProcessingCallContext *m_stepperCallContext;
222 void *m_stageMemArenaState;
223 dReal *m_invI;
224 dJointWithInfo1 *m_jointinfos;
225 dxStepperStage0Outputs m_stage0Outputs;
228 struct dxStepperStage0BodiesCallContext
230 void Initialize(const dxStepperProcessingCallContext *stepperCallContext, dReal *invI)
232 m_stepperCallContext = stepperCallContext;
233 m_invI = invI;
234 m_tagsTaken = 0;
235 m_gravityTaken = 0;
236 m_inertiaBodyIndex = 0;
239 const dxStepperProcessingCallContext *m_stepperCallContext;
240 dReal *m_invI;
241 atomicord32 m_tagsTaken;
242 atomicord32 m_gravityTaken;
243 volatile atomicord32 m_inertiaBodyIndex;
246 struct dxStepperStage0JointsCallContext
248 void Initialize(const dxStepperProcessingCallContext *stepperCallContext, dJointWithInfo1 *jointinfos, dxStepperStage0Outputs *stage0Outputs)
250 m_stepperCallContext = stepperCallContext;
251 m_jointinfos = jointinfos;
252 m_stage0Outputs = stage0Outputs;
255 const dxStepperProcessingCallContext *m_stepperCallContext;
256 dJointWithInfo1 *m_jointinfos;
257 dxStepperStage0Outputs *m_stage0Outputs;
260 static int dxStepIsland_Stage0_Bodies_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
261 // static int dxStepIsland_Stage0_Joints_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
262 static int dxStepIsland_Stage1_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
264 static void dxStepIsland_Stage0_Bodies(dxStepperStage0BodiesCallContext *callContext);
265 static void dxStepIsland_Stage0_Joints(dxStepperStage0JointsCallContext *callContext);
266 static void dxStepIsland_Stage1(dxStepperStage1CallContext *callContext);
269 struct dxStepperLocalContext
271 void Initialize(dReal *invI, dJointWithInfo1 *jointinfos, unsigned int nj,
272 unsigned int m, unsigned int nub, const unsigned int *mindex, int *findex,
273 dReal *J, dReal *A, dReal *pairsRhsCfm, dReal *pairsLoHi,
274 atomicord32 *bodyStartJoints, atomicord32 *bodyJointLinks)
276 m_invI = invI;
277 m_jointinfos = jointinfos;
278 m_nj = nj;
279 m_m = m;
280 m_nub = nub;
281 m_mindex = mindex;
282 m_findex = findex;
283 m_J = J;
284 m_A = A;
285 m_pairsRhsCfm = pairsRhsCfm;
286 m_pairsLoHi = pairsLoHi;
287 m_bodyStartJoints = bodyStartJoints;
288 m_bodyJointLinks = bodyJointLinks;
291 dReal *m_invI;
292 dJointWithInfo1 *m_jointinfos;
293 unsigned int m_nj;
294 unsigned int m_m;
295 unsigned int m_nub;
296 const unsigned int *m_mindex;
297 int *m_findex;
298 dReal *m_J;
299 dReal *m_A;
300 dReal *m_pairsRhsCfm;
301 dReal *m_pairsLoHi;
302 atomicord32 *m_bodyStartJoints;
303 atomicord32 *m_bodyJointLinks;
306 struct dxStepperStage2CallContext
308 void Initialize(const dxStepperProcessingCallContext *callContext, const dxStepperLocalContext *localContext,
309 dReal *JinvM, dReal *rhs_tmp)
311 m_stepperCallContext = callContext;
312 m_localContext = localContext;
313 m_JinvM = JinvM;
314 m_rhs_tmp = rhs_tmp;
315 m_ji_J = 0;
316 m_ji_Ainit = 0;
317 m_ji_JinvM = 0;
318 m_ji_Aaddjb = 0;
319 m_bi_rhs_tmp = 0;
320 m_ji_rhs = 0;
323 const dxStepperProcessingCallContext *m_stepperCallContext;
324 const dxStepperLocalContext *m_localContext;
325 dReal *m_JinvM;
326 dReal *m_rhs_tmp;
327 volatile atomicord32 m_ji_J;
328 volatile atomicord32 m_ji_Ainit;
329 volatile atomicord32 m_ji_JinvM;
330 volatile atomicord32 m_ji_Aaddjb;
331 volatile atomicord32 m_bi_rhs_tmp;
332 volatile atomicord32 m_ji_rhs;
335 struct dxStepperStage3CallContext
337 void Initialize(const dxStepperProcessingCallContext *callContext, const dxStepperLocalContext *localContext,
338 void *stage1MemArenaState)
340 m_stepperCallContext = callContext;
341 m_localContext = localContext;
342 m_stage1MemArenaState = stage1MemArenaState;
345 const dxStepperProcessingCallContext *m_stepperCallContext;
346 const dxStepperLocalContext *m_localContext;
347 void *m_stage1MemArenaState;
350 struct dxStepperStage4CallContext
352 void Initialize(const dxStepperProcessingCallContext *callContext, const dxStepperLocalContext *localContext/*,
353 void *stage3MemarenaState*/)
355 m_stepperCallContext = callContext;
356 m_localContext = localContext;
357 // m_stage3MemarenaState = stage3MemarenaState;
358 m_bi_constrForce = 0;
361 const dxStepperProcessingCallContext *m_stepperCallContext;
362 const dxStepperLocalContext *m_localContext;
363 // void *m_stage3MemarenaState;
364 volatile atomicord32 m_bi_constrForce;
367 static int dxStepIsland_Stage2a_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
368 static int dxStepIsland_Stage2aSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
369 static int dxStepIsland_Stage2b_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
370 static int dxStepIsland_Stage2bSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
371 static int dxStepIsland_Stage2c_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
372 static int dxStepIsland_Stage3_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
374 static void dxStepIsland_Stage2a(dxStepperStage2CallContext *callContext);
375 static void dxStepIsland_Stage2b(dxStepperStage2CallContext *callContext);
376 static void dxStepIsland_Stage2c(dxStepperStage2CallContext *callContext);
377 static void dxStepIsland_Stage3(dxStepperStage3CallContext *callContext);
379 static int dxStepIsland_Stage4_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
380 static void dxStepIsland_Stage4(dxStepperStage4CallContext *stage4CallContext);
383 //****************************************************************************
384 // special matrix multipliers
387 // this assumes the 4th and 8th rows of B and C are zero.
389 static inline
390 void MultiplyAddJinvMxJToA (dReal *Arow, const dReal *JinvMRow, const dReal *JRow,
391 unsigned int infomJinvM, unsigned int infomJ, unsigned int mskip)
393 dIASSERT (infomJinvM > 0 && infomJ > 0 && Arow && JinvMRow && JRow);
394 const unsigned int mskip_munus_infomJ_plus_1 = mskip - infomJ + 1;
395 dIASSERT(mskip >= infomJ);
396 dReal *currA = Arow;
397 const dReal *currJinvM = JinvMRow;
398 for (unsigned int i = infomJinvM; ; ) {
399 dReal JiM0 = currJinvM[JIM_LX];
400 dReal JiM1 = currJinvM[JIM_LY];
401 dReal JiM2 = currJinvM[JIM_LZ];
402 dReal JiM4 = currJinvM[JIM_AX];
403 dReal JiM5 = currJinvM[JIM_AY];
404 dReal JiM6 = currJinvM[JIM_AZ];
405 const dReal *currJ = JRow;
406 for (unsigned int j = infomJ; ; ) {
407 dReal sum;
408 sum = JiM0 * currJ[JME_JLX];
409 sum += JiM1 * currJ[JME_JLY];
410 sum += JiM2 * currJ[JME_JLZ];
411 sum += JiM4 * currJ[JME_JAX];
412 sum += JiM5 * currJ[JME_JAY];
413 sum += JiM6 * currJ[JME_JAZ];
414 *currA += sum;
415 if (--j == 0) {
416 break;
418 ++currA;
419 currJ += JME__MAX;
421 if (--i == 0) {
422 break;
424 currJinvM += JIM__MAX;
425 currA += mskip_munus_infomJ_plus_1;
430 // this assumes the 4th and 8th rows of B are zero.
432 static inline
433 void MultiplySubJxRhsTmpFromRHS (dReal *rowRhsCfm, const dReal *JRow, const dReal *rowRhsTmp, unsigned int infom)
435 dIASSERT (infom > 0 && rowRhsCfm && JRow && rowRhsTmp);
436 dReal *currRhs = rowRhsCfm + RCE_RHS;
437 const dReal *currJ = JRow;
438 const dReal RT_LX = rowRhsTmp[dDA_LX], RT_LY = rowRhsTmp[dDA_LY], RT_LZ = rowRhsTmp[dDA_LZ];
439 const dReal RT_AX = rowRhsTmp[dDA_AX], RT_AY = rowRhsTmp[dDA_AY], RT_AZ = rowRhsTmp[dDA_AZ];
440 for (unsigned int i = infom; ; ) {
441 dReal sum;
442 sum = currJ[JME_JLX] * RT_LX;
443 sum += currJ[JME_JLY] * RT_LY;
444 sum += currJ[JME_JLZ] * RT_LZ;
445 sum += currJ[JME_JAX] * RT_AX;
446 sum += currJ[JME_JAY] * RT_AY;
447 sum += currJ[JME_JAZ] * RT_AZ;
448 *currRhs -= sum;
449 if (--i == 0) {
450 break;
452 currRhs += RCE__RHS_CFM_MAX;
453 currJ += JME__MAX;
458 static inline
459 void MultiplyAddJxLambdaToCForce(dReal cforce[CFE__MAX],
460 const dReal *JRow, const dReal *rowRhsLambda, unsigned int infom,
461 dJointFeedback *fb/*=NULL*/, unsigned jointBodyIndex)
463 dIASSERT (infom > 0 && cforce && JRow && rowRhsLambda);
464 dReal sumLX = 0, sumLY = 0, sumLZ = 0, sumAX=0, sumAY = 0, sumAZ = 0;
465 const dReal *currJ = JRow, *currLambda = rowRhsLambda + RLE_LAMBDA;
466 for (unsigned int k = infom; ; ) {
467 const dReal lambda = *currLambda;
468 sumLX += currJ[JME_JLX] * lambda;
469 sumLY += currJ[JME_JLY] * lambda;
470 sumLZ += currJ[JME_JLZ] * lambda;
471 sumAX += currJ[JME_JAX] * lambda;
472 sumAY += currJ[JME_JAY] * lambda;
473 sumAZ += currJ[JME_JAZ] * lambda;
474 if (--k == 0) {
475 break;
477 currJ += JME__MAX;
478 currLambda += RLE__RHS_LAMBDA_MAX;
480 if (fb != NULL) {
481 if (jointBodyIndex == dJCB__MIN) {
482 fb->f1[dV3E_X] = sumLX;
483 fb->f1[dV3E_Y] = sumLY;
484 fb->f1[dV3E_Z] = sumLZ;
485 fb->t1[dV3E_X] = sumAX;
486 fb->t1[dV3E_Y] = sumAY;
487 fb->t1[dV3E_Z] = sumAZ;
489 else {
490 dIASSERT(jointBodyIndex == dJCB__MIN + 1);
491 dSASSERT(dJCB__MAX == 2);
493 fb->f2[dV3E_X] = sumLX;
494 fb->f2[dV3E_Y] = sumLY;
495 fb->f2[dV3E_Z] = sumLZ;
496 fb->t2[dV3E_X] = sumAX;
497 fb->t2[dV3E_Y] = sumAY;
498 fb->t2[dV3E_Z] = sumAZ;
501 cforce[CFE_LX] += sumLX;
502 cforce[CFE_LY] += sumLY;
503 cforce[CFE_LZ] += sumLZ;
504 cforce[CFE_AX] += sumAX;
505 cforce[CFE_AY] += sumAY;
506 cforce[CFE_AZ] += sumAZ;
510 //****************************************************************************
512 /*extern */
513 void dxStepIsland(const dxStepperProcessingCallContext *callContext)
515 IFTIMING(dTimerStart("preprocessing"));
517 dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
518 dxWorld *world = callContext->m_world;
519 unsigned int nb = callContext->m_islandBodiesCount;
520 unsigned int _nj = callContext->m_islandJointsCount;
522 dReal *invI = memarena->AllocateOveralignedArray<dReal>(dM3E__MAX * (sizeint)nb, INVI_ALIGNMENT);
523 // Reserve twice as much memory and start from the middle so that regardless of
524 // what direction the array grows to there would be sufficient room available.
525 const sizeint ji_reserve_count = 2 * (sizeint)_nj;
526 dJointWithInfo1 *const jointinfos = memarena->AllocateArray<dJointWithInfo1>(ji_reserve_count);
528 const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
529 dIASSERT(allowedThreads != 0);
531 void *stagesMemArenaState = memarena->SaveState();
533 dxStepperStage1CallContext *stage1CallContext = (dxStepperStage1CallContext *)memarena->AllocateBlock(sizeof(dxStepperStage1CallContext));
534 stage1CallContext->Initialize(callContext, stagesMemArenaState, invI, jointinfos);
536 dxStepperStage0BodiesCallContext *stage0BodiesCallContext = (dxStepperStage0BodiesCallContext *)memarena->AllocateBlock(sizeof(dxStepperStage0BodiesCallContext));
537 stage0BodiesCallContext->Initialize(callContext, invI);
539 dxStepperStage0JointsCallContext *stage0JointsCallContext = (dxStepperStage0JointsCallContext *)memarena->AllocateBlock(sizeof(dxStepperStage0JointsCallContext));
540 stage0JointsCallContext->Initialize(callContext, jointinfos, &stage1CallContext->m_stage0Outputs);
542 if (allowedThreads == 1)
544 dxStepIsland_Stage0_Bodies(stage0BodiesCallContext);
545 dxStepIsland_Stage0_Joints(stage0JointsCallContext);
546 dxStepIsland_Stage1(stage1CallContext);
548 else
550 unsigned bodyThreads = allowedThreads;
551 unsigned jointThreads = 1;
553 dCallReleaseeID stage1CallReleasee;
554 world->PostThreadedCallForUnawareReleasee(NULL, &stage1CallReleasee, bodyThreads + jointThreads, callContext->m_finalReleasee,
555 NULL, &dxStepIsland_Stage1_Callback, stage1CallContext, 0, "StepIsland Stage1");
557 world->PostThreadedCallsGroup(NULL, bodyThreads, stage1CallReleasee, &dxStepIsland_Stage0_Bodies_Callback, stage0BodiesCallContext, "StepIsland Stage0-Bodies");
559 dxStepIsland_Stage0_Joints(stage0JointsCallContext);
560 world->AlterThreadedCallDependenciesCount(stage1CallReleasee, -1);
561 dIASSERT(jointThreads == 1);
565 static
566 int dxStepIsland_Stage0_Bodies_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
568 (void)callInstanceIndex; // unused
569 (void)callThisReleasee; // unused
570 dxStepperStage0BodiesCallContext *callContext = (dxStepperStage0BodiesCallContext *)_callContext;
571 dxStepIsland_Stage0_Bodies(callContext);
572 return 1;
575 static
576 void dxStepIsland_Stage0_Bodies(dxStepperStage0BodiesCallContext *callContext)
578 dxBody * const *body = callContext->m_stepperCallContext->m_islandBodiesStart;
579 unsigned int nb = callContext->m_stepperCallContext->m_islandBodiesCount;
581 if (ThrsafeExchange(&callContext->m_tagsTaken, 1) == 0)
583 // number all bodies in the body list - set their tag values
584 for (unsigned int i=0; i<nb; i++) body[i]->tag = i;
587 if (ThrsafeExchange(&callContext->m_gravityTaken, 1) == 0)
589 dxWorld *world = callContext->m_stepperCallContext->m_world;
591 // add the gravity force to all bodies
592 // since gravity does normally have only one component it's more efficient
593 // to run three loops for each individual component
594 dxBody *const *const bodyend = body + nb;
595 dReal gravity_x = world->gravity[0];
596 if (gravity_x) {
597 for (dxBody *const *bodycurr = body; bodycurr != bodyend; ++bodycurr) {
598 dxBody *b = *bodycurr;
599 if ((b->flags & dxBodyNoGravity) == 0) {
600 b->facc[dV3E_X] += b->mass.mass * gravity_x;
604 dReal gravity_y = world->gravity[1];
605 if (gravity_y) {
606 for (dxBody *const *bodycurr = body; bodycurr != bodyend; ++bodycurr) {
607 dxBody *b = *bodycurr;
608 if ((b->flags & dxBodyNoGravity) == 0) {
609 b->facc[dV3E_Y] += b->mass.mass * gravity_y;
613 dReal gravity_z = world->gravity[2];
614 if (gravity_z) {
615 for (dxBody *const *bodycurr = body; bodycurr != bodyend; ++bodycurr) {
616 dxBody *b = *bodycurr;
617 if ((b->flags & dxBodyNoGravity) == 0) {
618 b->facc[dV3E_Z] += b->mass.mass * gravity_z;
624 // for all bodies, compute the inertia tensor and its inverse in the global
625 // frame, and compute the rotational force and add it to the torque
626 // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
628 dReal *invIrow = callContext->m_invI;
629 unsigned int bodyIndex = ThrsafeIncrementIntUpToLimit(&callContext->m_inertiaBodyIndex, nb);
631 for (unsigned int i = 0; i != nb; invIrow += dM3E__MAX, ++i) {
632 if (i == bodyIndex) {
633 dMatrix3 tmp;
634 dxBody *b = body[i];
636 // compute inverse inertia tensor in global frame
637 dMultiply2_333 (tmp, b->invI, b->posr.R);
638 dMultiply0_333 (invIrow, b->posr.R, tmp);
640 // Don't apply gyroscopic torques to bodies
641 // if not flagged or the body is kinematic
642 if ((b->flags & dxBodyGyroscopic) && (b->invMass > 0)) {
643 dMatrix3 I;
644 // compute inertia tensor in global frame
645 dMultiply2_333 (tmp,b->mass.I,b->posr.R);
646 dMultiply0_333 (I,b->posr.R,tmp);
647 // compute rotational force
648 #if 0
649 // Explicit computation
650 dMultiply0_331 (tmp,I,b->avel);
651 dSubtractVectorCross3(b->tacc,b->avel,tmp);
652 #else
653 // Do the implicit computation based on
654 //"Stabilizing Gyroscopic Forces in Rigid Multibody Simulations"
655 // (Lacoursière 2006)
656 dReal h = callContext->m_stepperCallContext->m_stepSize; // Step size
657 dVector3 L; // Compute angular momentum
658 dMultiply0_331(L, I, b->avel);
660 // Compute a new effective 'inertia tensor'
661 // for the implicit step: the cross-product
662 // matrix of the angular momentum plus the
663 // old tensor scaled by the timestep.
664 // Itild may not be symmetric pos-definite,
665 // but we can still use it to compute implicit
666 // gyroscopic torques.
667 dMatrix3 Itild = { 0 };
668 dSetCrossMatrixMinus(Itild, L, dV3E__MAX);
669 for (int ii = dM3E__MIN; ii != dM3E__MAX; ++ii) {
670 Itild[ii] = Itild[ii] * h + I[ii];
673 // Scale momentum by inverse time to get
674 // a sort of "torque"
675 dScaleVector3(L, dRecip(h));
676 // Invert the pseudo-tensor
677 dMatrix3 itInv;
678 // This is a closed-form inversion.
679 // It's probably not numerically stable
680 // when dealing with small masses with
681 // a large asymmetry.
682 // An LU decomposition might be better.
683 if (dInvertMatrix3(itInv, Itild) != 0) {
684 // "Divide" the original tensor
685 // by the pseudo-tensor (on the right)
686 dMultiply0_333(Itild, I, itInv);
687 // Subtract an identity matrix
688 Itild[dM3E_XX] -= 1; Itild[dM3E_YY] -= 1; Itild[dM3E_ZZ] -= 1;
690 // This new inertia matrix rotates the
691 // momentum to get a new set of torques
692 // that will work correctly when applied
693 // to the old inertia matrix as explicit
694 // torques with a semi-implicit update
695 // step.
696 dVector3 tau0;
697 dMultiply0_331(tau0,Itild,L);
699 // Add the gyro torques to the torque
700 // accumulator
701 for (int ii = dSA__MIN; ii != dSA__MAX; ++ii) {
702 b->tacc[dV3E__AXES_MIN + ii] += tau0[dV3E__AXES_MIN + ii];
705 #endif
708 bodyIndex = ThrsafeIncrementIntUpToLimit(&callContext->m_inertiaBodyIndex, nb);
714 // static
715 // int dxStepIsland_Stage0_Joints_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
716 // {
717 // (void)callInstanceIndex; // unused
718 // (void)callThisReleasee; // unused
719 // dxStepperStage0JointsCallContext *callContext = (dxStepperStage0JointsCallContext *)_callContext;
720 // dxStepIsland_Stage0_Joints(callContext);
721 // return 1;
722 // }
724 static
725 void dxStepIsland_Stage0_Joints(dxStepperStage0JointsCallContext *callContext)
727 dxJoint * const *_joint = callContext->m_stepperCallContext->m_islandJointsStart;
728 dJointWithInfo1 *jointinfos = callContext->m_jointinfos;
729 unsigned int _nj = callContext->m_stepperCallContext->m_islandJointsCount;
731 // get m = total constraint dimension, nub = number of unbounded variables.
732 // create constraint offset array and number-of-rows array for all joints.
733 // the constraints are re-ordered as follows: the purely unbounded
734 // constraints, the mixed unbounded + LCP constraints, and last the purely
735 // LCP constraints. this assists the LCP solver to put all unbounded
736 // variables at the start for a quick factorization.
738 // joints with m=0 are inactive and are removed from the joints array
739 // entirely, so that the code that follows does not consider them.
740 // also number all active joints in the joint list (set their tag values).
741 // inactive joints receive a tag value of -1.
743 sizeint ji_start, ji_end;
745 unsigned int mcurr = 0;
746 sizeint unb_start, mix_start, mix_end, lcp_end;
747 unb_start = mix_start = mix_end = lcp_end = _nj;
749 dJointWithInfo1 *jicurr = jointinfos + lcp_end;
750 dxJoint *const *const _jend = _joint + _nj;
751 dxJoint *const *_jcurr = _joint;
752 while (true) {
753 // -------------------------------------------------------------------------
754 // Switch to growing array forward
756 bool fwd_end_reached = false;
757 dJointWithInfo1 *jimixend = jointinfos + mix_end;
758 while (true) { // jicurr=dest, _jcurr=src
759 if (_jcurr == _jend) {
760 lcp_end = jicurr - jointinfos;
761 fwd_end_reached = true;
762 break;
764 dxJoint *j = *_jcurr++;
765 j->getInfo1 (&jicurr->info);
766 dIASSERT (/*jicurr->info.m >= 0 && */jicurr->info.m <= 6 && /*jicurr->info.nub >= 0 && */jicurr->info.nub <= jicurr->info.m);
767 if (jicurr->info.m != 0) {
768 mcurr += jicurr->info.m;
769 if (jicurr->info.nub == 0) { // A lcp info - a correct guess!!!
770 jicurr->joint = j;
771 ++jicurr;
772 } else if (jicurr->info.nub < jicurr->info.m) { // A mixed case
773 if (unb_start == mix_start) { // no unbounded infos yet - just move to opposite side of mixed-s
774 unb_start = mix_start = mix_start - 1;
775 dJointWithInfo1 *jimixstart = jointinfos + mix_start;
776 jimixstart->info = jicurr->info;
777 jimixstart->joint = j;
778 } else if (jimixend != jicurr) { // have to swap to the tail of mixed-s
779 dxJoint::Info1 tmp_info = jicurr->info;
780 *jicurr = *jimixend;
781 jimixend->info = tmp_info;
782 jimixend->joint = j;
783 ++jimixend; ++jicurr;
784 } else { // no need to swap as there are no LCP info-s yet
785 jicurr->joint = j;
786 jimixend = jicurr = jicurr + 1;
788 } else { // A purely unbounded case -- break out and proceed growing in opposite direction
789 unb_start = unb_start - 1;
790 dJointWithInfo1 *jiunbstart = jointinfos + unb_start;
791 jiunbstart->info = jicurr->info;
792 jiunbstart->joint = j;
793 lcp_end = jicurr - jointinfos;
794 mix_end = jimixend - jointinfos;
795 jicurr = jiunbstart - 1;
796 break;
798 } else {
799 j->tag = -1;
802 if (fwd_end_reached) {
803 break;
806 // -------------------------------------------------------------------------
807 // Switch to growing array backward
809 bool bkw_end_reached = false;
810 dJointWithInfo1 *jimixstart = jointinfos + mix_start - 1;
811 while (true) { // jicurr=dest, _jcurr=src
812 if (_jcurr == _jend) {
813 unb_start = (jicurr + 1) - jointinfos;
814 mix_start = (jimixstart + 1) - jointinfos;
815 bkw_end_reached = true;
816 break;
818 dxJoint *j = *_jcurr++;
819 j->getInfo1 (&jicurr->info);
820 dIASSERT (/*jicurr->info.m >= 0 && */jicurr->info.m <= 6 && /*jicurr->info.nub >= 0 && */jicurr->info.nub <= jicurr->info.m);
821 if (jicurr->info.m != 0) {
822 mcurr += jicurr->info.m;
823 if (jicurr->info.nub == jicurr->info.m) { // An unbounded info - a correct guess!!!
824 jicurr->joint = j;
825 --jicurr;
826 } else if (jicurr->info.nub != 0) { // A mixed case
827 if (mix_end == lcp_end) { // no lcp infos yet - just move to opposite side of mixed-s
828 dJointWithInfo1 *jimixend = jointinfos + mix_end;
829 lcp_end = mix_end = mix_end + 1;
830 jimixend->info = jicurr->info;
831 jimixend->joint = j;
832 } else if (jimixstart != jicurr) { // have to swap to the head of mixed-s
833 dxJoint::Info1 tmp_info = jicurr->info;
834 *jicurr = *jimixstart;
835 jimixstart->info = tmp_info;
836 jimixstart->joint = j;
837 --jimixstart; --jicurr;
838 } else { // no need to swap as there are no unbounded info-s yet
839 jicurr->joint = j;
840 jimixstart = jicurr = jicurr - 1;
842 } else { // A purely lcp case -- break out and proceed growing in opposite direction
843 dJointWithInfo1 *jilcpend = jointinfos + lcp_end;
844 lcp_end = lcp_end + 1;
845 jilcpend->info = jicurr->info;
846 jilcpend->joint = j;
847 unb_start = (jicurr + 1) - jointinfos;
848 mix_start = (jimixstart + 1) - jointinfos;
849 jicurr = jilcpend + 1;
850 break;
852 } else {
853 j->tag = -1;
856 if (bkw_end_reached) {
857 break;
862 callContext->m_stage0Outputs->m = mcurr;
863 callContext->m_stage0Outputs->nub = (unsigned)(mix_start - unb_start);
864 dIASSERT((sizeint)(mix_start - unb_start) <= (sizeint)UINT_MAX);
865 ji_start = unb_start;
866 ji_end = lcp_end;
870 const dJointWithInfo1 *jicurr = jointinfos + ji_start;
871 const dJointWithInfo1 *const jiend = jointinfos + ji_end;
872 for (unsigned int i = 0; jicurr != jiend; i++, ++jicurr) {
873 jicurr->joint->tag = i;
877 callContext->m_stage0Outputs->ji_start = ji_start;
878 callContext->m_stage0Outputs->ji_end = ji_end;
881 static
882 int dxStepIsland_Stage1_Callback(void *_stage1CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
884 (void)callInstanceIndex; // unused
885 (void)callThisReleasee; // unused
886 dxStepperStage1CallContext *stage1CallContext = (dxStepperStage1CallContext *)_stage1CallContext;
887 dxStepIsland_Stage1(stage1CallContext);
888 return 1;
891 static
892 void dxStepIsland_Stage1(dxStepperStage1CallContext *stage1CallContext)
894 const dxStepperProcessingCallContext *callContext = stage1CallContext->m_stepperCallContext;
895 dJointWithInfo1 *_jointinfos = stage1CallContext->m_jointinfos;
896 dReal *invI = stage1CallContext->m_invI;
897 sizeint ji_start = stage1CallContext->m_stage0Outputs.ji_start;
898 sizeint ji_end = stage1CallContext->m_stage0Outputs.ji_end;
899 unsigned int m = stage1CallContext->m_stage0Outputs.m;
900 unsigned int nub = stage1CallContext->m_stage0Outputs.nub;
902 dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
904 memarena->RestoreState(stage1CallContext->m_stageMemArenaState);
905 stage1CallContext = NULL; // WARNING! _stage1CallContext is not valid after this point!
906 dIVERIFY(stage1CallContext == NULL); // To suppress compiler warnings about unused variable assignment
908 unsigned int _nj = callContext->m_islandJointsCount;
909 const sizeint ji_reserve_count = 2 * (sizeint)_nj;
910 memarena->ShrinkArray<dJointWithInfo1>(_jointinfos, ji_reserve_count, ji_end);
913 dJointWithInfo1 *jointinfos = _jointinfos + ji_start;
914 unsigned int nj = (unsigned int)(ji_end - ji_start);
915 dIASSERT((sizeint)(ji_end - ji_start) <= (sizeint)UINT_MAX);
917 unsigned int *mindex = NULL;
918 dReal *J = NULL, *A = NULL, *pairsRhsCfm = NULL, *pairsLoHi = NULL;
919 int *findex = NULL;
920 atomicord32 *bodyStartJoints = NULL, *bodyJointLinks = NULL;
922 // if there are constraints, compute constrForce
923 if (m > 0) {
924 mindex = memarena->AllocateArray<unsigned int>((sizeint)(nj + 1));
926 unsigned int *mcurr = mindex;
927 unsigned int moffs = 0;
928 mcurr[0] = moffs;
929 mcurr += 1;
931 const dJointWithInfo1 *const jiend = jointinfos + nj;
932 for (const dJointWithInfo1 *jicurr = jointinfos; jicurr != jiend; ++jicurr) {
933 //dxJoint *joint = jicurr->joint;
934 moffs += jicurr->info.m;
935 mcurr[0] = moffs;
936 mcurr += 1;
940 // create a constraint equation right hand side vector `c', a constraint
941 // force mixing vector `cfm', and LCP low and high bound vectors, and an
942 // 'findex' vector.
943 findex = memarena->AllocateArray<int>(m);
944 J = memarena->AllocateArray<dReal>((sizeint)m * (2 * JME__MAX));
945 A = memarena->AllocateOveralignedArray<dReal>((sizeint)m * dPAD(m), AMATRIX_ALIGNMENT);
946 pairsRhsCfm = memarena->AllocateArray<dReal>((sizeint)m * RCE__RHS_CFM_MAX);
947 pairsLoHi = memarena->AllocateArray<dReal>((sizeint)m * LHE__LO_HI_MAX);
948 const unsigned int nb = callContext->m_islandBodiesCount;
949 bodyStartJoints = memarena->AllocateArray<atomicord32>(nb);
950 bodyJointLinks = memarena->AllocateArray<atomicord32>((sizeint)nj * dJCB__MAX);
951 dICHECK(nj < ~((atomicord32)0) / dJCB__MAX); // If larger joint counts are to be used, pointers (or sizeint) need to be stored rather than atomicord32 indices
954 dxStepperLocalContext *localContext = (dxStepperLocalContext *)memarena->AllocateBlock(sizeof(dxStepperLocalContext));
955 localContext->Initialize(invI, jointinfos, nj, m, nub, mindex, findex, J, A, pairsRhsCfm, pairsLoHi, bodyStartJoints, bodyJointLinks);
957 void *stage1MemarenaState = memarena->SaveState();
958 dxStepperStage3CallContext *stage3CallContext = (dxStepperStage3CallContext*)memarena->AllocateBlock(sizeof(dxStepperStage3CallContext));
959 stage3CallContext->Initialize(callContext, localContext, stage1MemarenaState);
961 if (m > 0) {
962 dReal *JinvM = memarena->AllocateOveralignedArray<dReal>((sizeint)m * (2 * JIM__MAX), JINVM_ALIGNMENT);
963 const unsigned int nb = callContext->m_islandBodiesCount;
964 dReal *rhs_tmp = memarena->AllocateArray<dReal>((sizeint)nb * dDA__MAX);
966 dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)memarena->AllocateBlock(sizeof(dxStepperStage2CallContext));
967 stage2CallContext->Initialize(callContext, localContext, JinvM, rhs_tmp);
969 const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
970 dIASSERT(allowedThreads != 0);
972 if (allowedThreads == 1) {
973 IFTIMING(dTimerNow("create J"));
974 dxStepIsland_Stage2a(stage2CallContext);
975 IFTIMING(dTimerNow("compute Adiag, JinvM and rhs_tmp"));
976 dxStepIsland_Stage2b(stage2CallContext);
977 IFTIMING(dTimerNow("compute A and rhs"));
978 dxStepIsland_Stage2c(stage2CallContext);
979 dxStepIsland_Stage3(stage3CallContext);
981 else {
982 dxWorld *world = callContext->m_world;
983 dCallReleaseeID stage3CallReleasee;
984 world->PostThreadedCallForUnawareReleasee(NULL, &stage3CallReleasee, 1, callContext->m_finalReleasee,
985 NULL, &dxStepIsland_Stage3_Callback, stage3CallContext, 0, "StepIsland Stage3");
987 dCallReleaseeID stage2bSyncReleasee;
988 world->PostThreadedCall(NULL, &stage2bSyncReleasee, 1, stage3CallReleasee,
989 NULL, &dxStepIsland_Stage2bSync_Callback, stage2CallContext, 0, "StepIsland Stage2b Sync");
991 dCallReleaseeID stage2aSyncReleasee;
992 world->PostThreadedCall(NULL, &stage2aSyncReleasee, allowedThreads, stage2bSyncReleasee,
993 NULL, &dxStepIsland_Stage2aSync_Callback, stage2CallContext, 0, "StepIsland Stage2a Sync");
995 dIASSERT(allowedThreads > 1); /*if (allowedThreads > 1) */{
996 world->PostThreadedCallsGroup(NULL, allowedThreads - 1, stage2aSyncReleasee, &dxStepIsland_Stage2a_Callback, stage2CallContext, "StepIsland Stage2a");
998 dxStepIsland_Stage2a(stage2CallContext);
999 world->AlterThreadedCallDependenciesCount(stage2aSyncReleasee, -1);
1002 else {
1003 dxStepIsland_Stage3(stage3CallContext);
1008 static
1009 int dxStepIsland_Stage2a_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1011 (void)callInstanceIndex; // unused
1012 (void)callThisReleasee; // unused
1013 dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
1014 dxStepIsland_Stage2a(stage2CallContext);
1015 return 1;
1018 static
1019 void dxStepIsland_Stage2a(dxStepperStage2CallContext *stage2CallContext)
1021 const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
1022 const dxStepperLocalContext *localContext = stage2CallContext->m_localContext;
1023 dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
1024 unsigned int nj = localContext->m_nj;
1025 const unsigned int *mindex = localContext->m_mindex;
1027 const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
1028 dxWorld *world = callContext->m_world;
1031 int *findex = localContext->m_findex;
1032 dReal *J = localContext->m_J;
1033 dReal *pairsRhsCfm = localContext->m_pairsRhsCfm;
1034 dReal *pairsLoHi = localContext->m_pairsLoHi;
1036 // get jacobian data from constraints. a (2*m)x8 matrix will be created
1037 // to store the two jacobian blocks from each constraint. it has this
1038 // format:
1040 // l l l 0 a a a 0 \ .
1041 // l l l 0 a a a 0 }-- jacobian body 1 block for joint 0 (3 rows)
1042 // l l l 0 a a a 0 /
1043 // l l l 0 a a a 0 \ .
1044 // l l l 0 a a a 0 }-- jacobian body 2 block for joint 0 (3 rows)
1045 // l l l 0 a a a 0 /
1046 // l l l 0 a a a 0 }--- jacobian body 1 block for joint 1 (1 row)
1047 // l l l 0 a a a 0 }--- jacobian body 2 block for joint 1 (1 row)
1048 // etc...
1050 // (lll) = linear jacobian data
1051 // (aaa) = angular jacobian data
1054 const dReal worldERP = world->global_erp;
1055 const dReal worldCFM = world->global_cfm;
1057 unsigned ji;
1058 while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_J, nj)) != nj) {
1059 const unsigned ofsi = mindex[ji];
1060 const unsigned int infom = mindex[ji + 1] - ofsi;
1062 dReal *const JRow = J + (sizeint)ofsi * (2 * JME__MAX);
1063 dReal *rowRhsCfm = pairsRhsCfm + (sizeint)ofsi * RCE__RHS_CFM_MAX;
1064 dReal *rowLoHi = pairsLoHi + (sizeint)ofsi * LHE__LO_HI_MAX;
1066 dSetZero (JRow, infom * (2 * JME__MAX));
1068 dReal *const endRhsCfm = rowRhsCfm + infom * RCE__RHS_CFM_MAX;
1069 for (dReal *currRhsCfm = rowRhsCfm; currRhsCfm != endRhsCfm; currRhsCfm += RCE__RHS_CFM_MAX) {
1070 currRhsCfm[RCE_RHS] = REAL(0.0);
1071 currRhsCfm[RCE_CFM] = worldCFM;
1074 dReal *const endLoHi = rowLoHi + infom * LHE__LO_HI_MAX;
1075 for (dReal *currLoHi = rowLoHi; currLoHi != endLoHi; currLoHi += LHE__LO_HI_MAX) {
1076 currLoHi[LHE_LO] = -dInfinity;
1077 currLoHi[LHE_HI] = dInfinity;
1080 int *findexRow = findex + ofsi;
1081 dSetValue(findexRow, infom, -1);
1083 dxJoint *joint = jointinfos[ji].joint;
1084 joint->getInfo2(stepsizeRecip, worldERP, JME__MAX, JRow + JME__J_MIN, JRow + infom * JME__MAX + JME__J_MIN, RCE__RHS_CFM_MAX, rowRhsCfm, rowLoHi, findexRow);
1085 dSASSERT((int)LHE__LO_HI_MAX == RCE__RHS_CFM_MAX); // To make sure same step fits for both pairs in the call to dxJoint::getInfo2() above
1087 // findex iteration is compact and is not going to pollute caches - do it first
1089 // adjust returned findex values for global index numbering
1090 int *const findicesEnd = findexRow + infom;
1091 for (int *findexCurr = findexRow; findexCurr != findicesEnd; ++findexCurr) {
1092 int fival = *findexCurr;
1093 if (fival != -1) {
1094 *findexCurr = fival + ofsi;
1099 dReal *const endRhsCfm = rowRhsCfm + infom * RCE__RHS_CFM_MAX;
1100 for (dReal *currRhsCfm = rowRhsCfm; currRhsCfm != endRhsCfm; currRhsCfm += RCE__RHS_CFM_MAX) {
1101 currRhsCfm[RCE_RHS] *= stepsizeRecip;
1102 currRhsCfm[RCE_CFM] *= stepsizeRecip;
1109 static
1110 int dxStepIsland_Stage2aSync_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1112 (void)callInstanceIndex; // unused
1113 dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
1114 const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
1115 const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
1117 dIASSERT(allowedThreads > 1); /*if (allowedThreads > 1) */{ // The allowed thread count is greater than one as otherwise current function would not be scheduled for execution from the previous stage
1118 dxWorld *world = callContext->m_world;
1119 world->AlterThreadedCallDependenciesCount(callThisReleasee, allowedThreads - 1);
1120 world->PostThreadedCallsGroup(NULL, allowedThreads - 1, callThisReleasee, &dxStepIsland_Stage2b_Callback, stage2CallContext, "StepIsland Stage2b");
1122 dxStepIsland_Stage2b(stage2CallContext);
1124 return 1;
1127 static
1128 int dxStepIsland_Stage2b_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1130 (void)callInstanceIndex; // unused
1131 (void)callThisReleasee; // unused
1132 dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
1133 dxStepIsland_Stage2b(stage2CallContext);
1134 return 1;
1137 static
1138 void dxStepIsland_Stage2b(dxStepperStage2CallContext *stage2CallContext)
1140 const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
1141 const dxStepperLocalContext *localContext = stage2CallContext->m_localContext;
1142 dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
1143 unsigned int nj = localContext->m_nj;
1144 const unsigned int *mindex = localContext->m_mindex;
1147 // Warning!!!
1148 // This code depends on cfm elements and therefore must be in different sub-stage
1149 // from Jacobian construction in Stage2a to ensure proper synchronization
1150 // and avoid accessing numbers being modified.
1151 // Warning!!!
1152 dReal *A = localContext->m_A;
1153 const dReal *pairsRhsCfm = localContext->m_pairsRhsCfm;
1154 const unsigned m = localContext->m_m;
1156 const unsigned int mskip = dPAD(m);
1158 unsigned ji;
1159 while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_Ainit, nj)) != nj) {
1160 const unsigned ofsi = mindex[ji];
1161 const unsigned int infom = mindex[ji + 1] - ofsi;
1163 dReal *Arow = A + (sizeint)mskip * ofsi;
1164 dSetZero(Arow, (sizeint)mskip * infom);
1165 dReal *Adiag = Arow + ofsi;
1166 const dReal *rowRfsCrm = pairsRhsCfm + (sizeint)ofsi * RCE__RHS_CFM_MAX;
1167 for (unsigned int i = 0; i != infom; Adiag += mskip, ++i) {
1168 Adiag[i] = (rowRfsCrm + i * RCE__RHS_CFM_MAX)[RCE_CFM];
1174 // Warning!!!
1175 // This code depends on J elements and therefore must be in different sub-stage
1176 // from Jacobian construction in Stage2a to ensure proper synchronization
1177 // and avoid accessing numbers being modified.
1178 // Warning!!!
1179 const dReal *invI = localContext->m_invI;
1180 const dReal *J = localContext->m_J;
1181 dReal *JinvM = stage2CallContext->m_JinvM;
1183 // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
1184 // format as J so we just go through the constraints in J multiplying by
1185 // the appropriate scalars and matrices.
1186 unsigned ji;
1187 while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_JinvM, nj)) != nj) {
1188 const unsigned ofsi = mindex[ji];
1189 const unsigned int infom = mindex[ji + 1] - ofsi;
1191 dReal *Jdst = JinvM + (sizeint)ofsi * (2 * JIM__MAX);
1192 dSetZero(Jdst, infom * (2 * JIM__MAX));
1194 const dReal *Jsrc = J + (sizeint)ofsi * (2 * JME__MAX);
1195 dxJoint *joint = jointinfos[ji].joint;
1197 dxBody *jb0 = joint->node[0].body;
1198 if (true || jb0 != NULL) { // -- always true
1199 dReal body_invMass0 = jb0->invMass;
1200 const dReal *body_invI0 = invI + (sizeint)(unsigned int)jb0->tag * dM3E__MAX;
1201 for (unsigned int j = infom; j != 0; --j) {
1202 for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) Jdst[JIM__L_AXES_MIN + k] = Jsrc[JME__JL_MIN + k] * body_invMass0;
1203 dMultiply0_133(Jdst + JIM__A_AXES_MIN, Jsrc + JME__JA_MIN, body_invI0);
1204 Jsrc += JME__MAX;
1205 Jdst += JIM__MAX;
1209 dxBody *jb1 = joint->node[1].body;
1210 if (jb1 != NULL) {
1211 dReal body_invMass1 = jb1->invMass;
1212 const dReal *body_invI1 = invI + (sizeint)(unsigned int)jb1->tag * dM3E__MAX;
1213 for (unsigned int j = infom; j != 0; --j) {
1214 for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) Jdst[JIM__L_AXES_MIN + k] = Jsrc[JME__JL_MIN + k] * body_invMass1;
1215 dMultiply0_133 (Jdst + JIM__A_AXES_MIN, Jsrc + JME__JA_MIN, body_invI1);
1216 Jsrc += JME__MAX;
1217 Jdst += JIM__MAX;
1224 // Warning!!!
1225 // This code reads facc/tacc fields of body objects which (the fields)
1226 // may be modified by dxJoint::getInfo2(). Therefore the code must be
1227 // in different sub-stage from Jacobian construction in Stage2a
1228 // to ensure proper synchronization and avoid accessing numbers being modified.
1229 // Warning!!!
1230 dxBody * const *const body = callContext->m_islandBodiesStart;
1231 const unsigned int nb = callContext->m_islandBodiesCount;
1232 const dReal *invI = localContext->m_invI;
1233 atomicord32 *bodyStartJoints = localContext->m_bodyStartJoints;
1234 dReal *rhs_tmp = stage2CallContext->m_rhs_tmp;
1236 // compute the right hand side `rhs'
1237 const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
1239 // put v/h + invM*fe into rhs_tmp
1240 unsigned bi;
1241 while ((bi = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_bi_rhs_tmp, nb)) != nb) {
1242 dReal *tmp1curr = rhs_tmp + (sizeint)bi * dDA__MAX;
1243 const dReal *invIrow = invI + (sizeint)bi * dM3E__MAX;
1244 dxBody *b = body[bi];
1245 // dSetZero(tmp1curr, 8); -- not needed
1246 for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) tmp1curr[dDA__L_MIN + j] = b->facc[dV3E__AXES_MIN + j] * b->invMass + b->lvel[dV3E__AXES_MIN + j] * stepsizeRecip;
1247 dMultiply0_331 (tmp1curr + dDA__A_MIN, invIrow, b->tacc);
1248 for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) tmp1curr[dDA__A_MIN + k] += b->avel[dV3E__AXES_MIN + k] * stepsizeRecip;
1249 // Initialize body start joint indices -- this will be needed later for building body related joint list in dxStepIsland_Stage2c
1250 bodyStartJoints[bi] = 0;
1255 static
1256 int dxStepIsland_Stage2bSync_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1258 (void)callInstanceIndex; // unused
1259 dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
1260 const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
1261 const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
1263 dIASSERT(allowedThreads > 1); /*if (allowedThreads > 1) */{ // The allowed thread count is greater than one as otherwise current function would not be scheduled for execution from the previous stage
1264 dxWorld *world = callContext->m_world;
1265 world->AlterThreadedCallDependenciesCount(callThisReleasee, allowedThreads - 1);
1266 world->PostThreadedCallsGroup(NULL, allowedThreads - 1, callThisReleasee, &dxStepIsland_Stage2c_Callback, stage2CallContext, "StepIsland Stage2c");
1268 dxStepIsland_Stage2c(stage2CallContext);
1270 return 1;
1274 static
1275 int dxStepIsland_Stage2c_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1277 (void)callInstanceIndex; // unused
1278 (void)callThisReleasee; // unused
1279 dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
1280 dxStepIsland_Stage2c(stage2CallContext);
1281 return 1;
1284 static
1285 void dxStepIsland_Stage2c(dxStepperStage2CallContext *stage2CallContext)
1287 //const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
1288 const dxStepperLocalContext *localContext = stage2CallContext->m_localContext;
1289 dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
1290 unsigned int nj = localContext->m_nj;
1291 const unsigned int *mindex = localContext->m_mindex;
1294 // Warning!!!
1295 // This code depends on A elements and JinvM elements and therefore
1296 // must be in a different sub-stage from A initialization and JinvM calculation in Stage2b
1297 // to ensure proper synchronization and avoid accessing numbers being modified.
1298 // Warning!!!
1299 dReal *A = localContext->m_A;
1300 const dReal *JinvM = stage2CallContext->m_JinvM;
1301 const dReal *J = localContext->m_J;
1302 const unsigned m = localContext->m_m;
1304 // now compute A = JinvM * J'. A's rows and columns are grouped by joint,
1305 // i.e. in the same way as the rows of J. block (i,j) of A is only nonzero
1306 // if joints i and j have at least one body in common.
1307 const unsigned int mskip = dPAD(m);
1309 unsigned ji;
1310 while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_Aaddjb, nj)) != nj) {
1311 const unsigned ofsi = mindex[ji];
1312 const unsigned int infom = mindex[ji + 1] - ofsi;
1314 dReal *Arow = A + (sizeint)mskip * ofsi;
1315 const dReal *JinvMRow = JinvM + (sizeint)ofsi * (2 * JIM__MAX);
1316 dxJoint *joint = jointinfos[ji].joint;
1318 dxBody *jb0 = joint->node[0].body;
1319 if (true || jb0 != NULL) { // -- always true
1320 // compute diagonal block of A
1321 const dReal *JRow = J + (sizeint)ofsi * (2 * JME__MAX);
1322 MultiplyAddJinvMxJToA (Arow + ofsi, JinvMRow, JRow, infom, infom, mskip);
1324 for (dxJointNode *n0 = (ji != 0 ? jb0->firstjoint : NULL); n0; n0 = n0->next) {
1325 // if joint was tagged as -1 then it is an inactive (m=0 or disabled)
1326 // joint that should not be considered
1327 int j0 = n0->joint->tag;
1328 if (j0 != -1 && (unsigned)j0 < ji) {
1329 const unsigned int jiother_ofsi = mindex[j0];
1330 const unsigned int jiother_infom = mindex[j0 + 1] - jiother_ofsi;
1331 const dJointWithInfo1 *jiother = jointinfos + j0;
1332 unsigned int smart_infom = (jiother->joint->node[1].body == jb0) ? jiother_infom : 0;
1333 // set block of A
1334 const dReal *JOther = J + ((sizeint)jiother_ofsi * 2 + smart_infom) * JME__MAX;
1335 MultiplyAddJinvMxJToA (Arow + jiother_ofsi, JinvMRow, JOther, infom, jiother_infom, mskip);
1340 dxBody *jb1 = joint->node[1].body;
1341 dIASSERT(jb1 != jb0);
1342 if (jb1 != NULL) {
1343 const dReal *JinvMOther = JinvMRow + infom * JIM__MAX;
1344 // compute diagonal block of A
1345 const dReal *JRow = J + ((sizeint)ofsi * 2 + infom) * JME__MAX;
1346 MultiplyAddJinvMxJToA (Arow + ofsi, JinvMOther, JRow, infom, infom, mskip);
1348 for (dxJointNode *n1 = (ji != 0 ? jb1->firstjoint : NULL); n1; n1 = n1->next) {
1349 // if joint was tagged as -1 then it is an inactive (m=0 or disabled)
1350 // joint that should not be considered
1351 int j1 = n1->joint->tag;
1352 if (j1 != -1 && (unsigned)j1 < ji) {
1353 const unsigned int jiother_ofsi = mindex[j1];
1354 const unsigned int jiother_infom = mindex[j1 + 1] - jiother_ofsi;
1355 const dJointWithInfo1 *jiother = jointinfos + j1;
1356 unsigned int smart_infom = (jiother->joint->node[1].body == jb1) ? jiother_infom : 0;
1357 // set block of A
1358 const dReal *JOther = J + ((sizeint)jiother_ofsi * 2 + smart_infom) * JME__MAX;
1359 MultiplyAddJinvMxJToA (Arow + jiother_ofsi, JinvMOther, JOther, infom, jiother_infom, mskip);
1367 // Warning!!!
1368 // This code depends on rhs_tmp elements and therefore must be in
1369 // different sub-stage from rhs_tmp calculation in Stage2b to ensure
1370 // proper synchronization and avoid accessing numbers being modified.
1371 // Warning!!!
1372 const dReal *J = localContext->m_J;
1373 const dReal *rhs_tmp = stage2CallContext->m_rhs_tmp;
1374 dReal *pairsRhsCfm = localContext->m_pairsRhsCfm;
1375 atomicord32 *bodyStartJoints = localContext->m_bodyStartJoints;
1376 atomicord32 *bodyJointLinks = localContext->m_bodyJointLinks;
1378 // compute the right hand side `rhs'
1379 // put J*rhs_tmp into rhs
1380 unsigned ji;
1381 while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_rhs, nj)) != nj) {
1382 const unsigned ofsi = mindex[ji];
1383 const unsigned int infom = mindex[ji + 1] - ofsi;
1385 dReal *currRhsCfm = pairsRhsCfm + (sizeint)ofsi * RCE__RHS_CFM_MAX;
1386 const dReal *JRow = J + (sizeint)ofsi * (2 * JME__MAX);
1388 dxJoint *joint = jointinfos[ji].joint;
1390 dxBody *jb0 = joint->node[0].body;
1391 if (true || jb0 != NULL) { // -- always true
1392 unsigned bodyIndex = (unsigned)jb0->tag;
1393 MultiplySubJxRhsTmpFromRHS (currRhsCfm, JRow, rhs_tmp + (sizeint)bodyIndex * dDA__MAX, infom);
1395 // Link joints connected to each body into a list to be used on results incorporation. The bodyStartJoints have been initialized in dxStepIsland_Stage2b.
1396 const atomicord32 linkIndex = (atomicord32)((sizeint)ji * dJCB__MAX + dJCB_FIRST_BODY); // It is asserted at links buffer allocation that the indices can't overflow atomicord32
1397 for (atomicord32 oldStartIndex = bodyStartJoints[bodyIndex]; ; oldStartIndex = bodyStartJoints[bodyIndex]) {
1398 bodyJointLinks[linkIndex] = oldStartIndex;
1399 if (ThrsafeCompareExchange(&bodyStartJoints[bodyIndex], oldStartIndex, linkIndex + 1)) { // The link index is stored incremented to allow 0 as end indicator
1400 break;
1405 dxBody *jb1 = joint->node[1].body;
1406 if (jb1 != NULL) {
1407 unsigned bodyIndex = (unsigned)jb1->tag;
1408 MultiplySubJxRhsTmpFromRHS (currRhsCfm, JRow + infom * JME__MAX, rhs_tmp + (sizeint)bodyIndex * dDA__MAX, infom);
1410 // Link joints connected to each body into a list to be used on results incorporation. The bodyStartJoints have been initialized in dxStepIsland_Stage2b
1411 const atomicord32 linkIndex = (atomicord32)((sizeint)ji * dJCB__MAX + dJCB_SECOND_BODY); // It is asserted at links buffer allocation that the indices can't overflow atomicord32
1412 for (atomicord32 oldStartIndex = bodyStartJoints[bodyIndex]; ; oldStartIndex = bodyStartJoints[bodyIndex]) {
1413 bodyJointLinks[linkIndex] = oldStartIndex;
1414 if (ThrsafeCompareExchange(&bodyStartJoints[bodyIndex], oldStartIndex, linkIndex + 1)) { // The link index is stored incremented to allow 0 as end indicator
1415 break;
1424 static
1425 int dxStepIsland_Stage3_Callback(void *_stage3CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1427 (void)callInstanceIndex; // unused
1428 (void)callThisReleasee; // unused
1429 dxStepperStage3CallContext *stage3CallContext = (dxStepperStage3CallContext *)_stage3CallContext;
1430 dxStepIsland_Stage3(stage3CallContext);
1431 return 1;
1434 static
1435 void dxStepIsland_Stage3(dxStepperStage3CallContext *stage3CallContext)
1437 const dxStepperProcessingCallContext *callContext = stage3CallContext->m_stepperCallContext;
1438 const dxStepperLocalContext *localContext = stage3CallContext->m_localContext;
1440 dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
1441 memarena->RestoreState(stage3CallContext->m_stage1MemArenaState);
1442 stage3CallContext = NULL; // WARNING! stage3CallContext is not valid after this point!
1443 dIVERIFY(stage3CallContext == NULL); // To suppress unused variable assignment warnings
1445 unsigned int m = localContext->m_m;
1446 unsigned int nub = localContext->m_nub;
1447 //const unsigned int *mindex = localContext->m_mindex;
1448 int *findex = localContext->m_findex;
1449 dReal *A = localContext->m_A;
1450 dReal *pairsRhsLambda = localContext->m_pairsRhsCfm; // Reuse cfm buffer for lambdas as the former values are not needed anymore
1451 dReal *pairsLoHi = localContext->m_pairsLoHi;
1453 if (m > 0) {
1454 BEGIN_STATE_SAVE(memarena, lcpstate) {
1455 IFTIMING(dTimerNow ("solve LCP problem"));
1457 // solve the LCP problem and get lambda.
1458 // this will destroy A but that's OK
1459 dxSolveLCP (memarena, m, A, pairsRhsLambda, NULL, nub, pairsLoHi, findex);
1460 dSASSERT((int)RLE__RHS_LAMBDA_MAX == PBX__MAX && (int)RLE_RHS == PBX_B && (int)RLE_LAMBDA == PBX_X);
1461 dSASSERT((int)LHE__LO_HI_MAX == PLH__MAX && (int)LHE_LO == PLH_LO && (int)LHE_HI == PLH_HI);
1463 } END_STATE_SAVE(memarena, lcpstate);
1466 // void *stage3MemarenaState = memarena->SaveState();
1468 dxStepperStage4CallContext *stage4CallContext = (dxStepperStage4CallContext *)memarena->AllocateBlock(sizeof(dxStepperStage4CallContext));
1469 stage4CallContext->Initialize(callContext, localContext/*, stage3MemarenaState*/);
1471 const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
1472 dIASSERT(allowedThreads != 0);
1474 if (allowedThreads == 1) {
1475 IFTIMING(dTimerNow ("compute and apply constraint force"));
1476 dxStepIsland_Stage4(stage4CallContext);
1477 IFTIMING(dTimerEnd());
1479 if (m > 0) {
1480 IFTIMING(dTimerReport(stdout,1));
1483 else {
1484 dCallReleaseeID finalReleasee = callContext->m_finalReleasee;
1485 dxWorld *world = callContext->m_world;
1486 world->AlterThreadedCallDependenciesCount(finalReleasee, allowedThreads - 1);
1487 world->PostThreadedCallsGroup(NULL, allowedThreads - 1, finalReleasee, &dxStepIsland_Stage4_Callback, stage4CallContext, "StepIsland Stage4");
1488 // Note: Adding another dependency for the finalReleasee is not necessary as it already depends on the current call
1489 dxStepIsland_Stage4(stage4CallContext);
1493 static
1494 int dxStepIsland_Stage4_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
1496 (void)callInstanceIndex; // unused
1497 (void)callThisReleasee; // unused
1498 dxStepperStage4CallContext *stage4CallContext = (dxStepperStage4CallContext *)_stage4CallContext;
1499 dxStepIsland_Stage4(stage4CallContext);
1500 return 1;
1503 static
1504 void dxStepIsland_Stage4(dxStepperStage4CallContext *stage4CallContext)
1506 const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
1507 const dxStepperLocalContext *localContext = stage4CallContext->m_localContext;
1509 const dReal stepSize = callContext->m_stepSize;
1510 dxBody *const *bodies = callContext->m_islandBodiesStart;
1511 dReal *invI = localContext->m_invI;
1512 dJointWithInfo1 *jointInfos = localContext->m_jointinfos;
1513 dReal *J = localContext->m_J;
1514 dReal *pairsRhsLambda = localContext->m_pairsRhsCfm;
1515 const unsigned int *mIndex = localContext->m_mindex;
1516 atomicord32 *bodyStartJoints = localContext->m_bodyStartJoints;
1517 atomicord32 *bodyJointLinks = localContext->m_bodyJointLinks;
1518 const unsigned int nb = callContext->m_islandBodiesCount;
1520 unsigned bi;
1521 while ((bi = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_bi_constrForce, nb)) != nb) {
1522 dVector3 angularForceAccumulator;
1523 dxBody *b = bodies[bi];
1524 const dReal *invIrow = invI + (sizeint)bi * dM3E__MAX;
1525 dReal body_invMass_mul_stepSize = stepSize * b->invMass;
1527 dReal bodyConstrForce[CFE__MAX];
1528 bool constrForceAvailable = false;
1530 unsigned linkIndex = bodyStartJoints != NULL ? bodyStartJoints[bi] : 0;
1531 if (linkIndex != 0) {
1532 dSetZero(bodyConstrForce, dARRAY_SIZE(bodyConstrForce));
1535 // compute the constraint force as constrForce = J'*lambda
1536 for (; linkIndex != 0; constrForceAvailable = true, linkIndex = bodyJointLinks[linkIndex - 1]) {
1537 unsigned jointIndex = (linkIndex - 1) / dJCB__MAX;
1538 unsigned jointBodyIndex = (linkIndex - 1) % dJCB__MAX;
1540 const dJointWithInfo1 *currJointInfo = jointInfos + jointIndex;
1541 unsigned ofsi = mIndex[jointIndex];
1542 dIASSERT(dIN_RANGE(jointIndex, 0, localContext->m_nj));
1544 const dReal *JRow = J + (sizeint)ofsi * (2 * JME__MAX);
1545 const dReal *rowRhsLambda = pairsRhsLambda + (sizeint)ofsi * RLE__RHS_LAMBDA_MAX;
1547 dxJoint *joint = currJointInfo->joint;
1548 const unsigned int infom = currJointInfo->info.m;
1550 // unsigned jRowExtraOffset = jointBodyIndex * infom * JME__MAX;
1551 unsigned jRowExtraOffset = jointBodyIndex != dJCB__MIN ? infom * JME__MAX : 0;
1552 dSASSERT(dJCB__MAX == 2);
1554 dJointFeedback *fb = joint->feedback;
1555 MultiplyAddJxLambdaToCForce(bodyConstrForce, JRow + jRowExtraOffset, rowRhsLambda, infom, fb, jointBodyIndex);
1558 // compute the velocity update
1559 if (constrForceAvailable) {
1560 // add fe to cforce and multiply cforce by stepSize
1561 for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) {
1562 b->lvel[dV3E__AXES_MIN + j] += (bodyConstrForce[CFE__L_MIN + j] + b->facc[dV3E__AXES_MIN + j]) * body_invMass_mul_stepSize;
1564 for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) {
1565 angularForceAccumulator[dV3E__AXES_MIN + k] = (bodyConstrForce[CFE__A_MIN + k] + b->tacc[dV3E__AXES_MIN + k]) * stepSize;
1568 else {
1569 // add fe to cforce and multiply cforce by stepSize
1570 dAddVectorScaledVector3(b->lvel, b->lvel, b->facc, body_invMass_mul_stepSize);
1571 dCopyScaledVector3(angularForceAccumulator, b->tacc, stepSize);
1574 dMultiplyAdd0_331 (b->avel, invIrow, angularForceAccumulator + dV3E__AXES_MIN);
1576 // update the position and orientation from the new linear/angular velocity
1577 // (over the given time step)
1578 dxStepBody (b, stepSize);
1580 // zero all force accumulators
1581 dZeroVector3(b->facc);
1582 dZeroVector3(b->tacc);
1587 //****************************************************************************
1589 /*extern */
1590 sizeint dxEstimateStepMemoryRequirements (dxBody * const *body, unsigned int nb, dxJoint * const *_joint, unsigned int _nj)
1592 (void)body; // unused
1593 unsigned int nj, m;
1596 unsigned int njcurr = 0, mcurr = 0;
1597 dxJoint::SureMaxInfo info;
1598 dxJoint *const *const _jend = _joint + _nj;
1599 for (dxJoint *const *_jcurr = _joint; _jcurr != _jend; ++_jcurr) {
1600 dxJoint *j = *_jcurr;
1601 j->getSureMaxInfo (&info);
1603 unsigned int jm = info.max_m;
1604 if (jm > 0) {
1605 njcurr++;
1607 mcurr += jm;
1610 nj = njcurr; m = mcurr;
1613 sizeint res = 0;
1615 res += dOVERALIGNED_SIZE(sizeof(dReal) * dM3E__MAX * nb, INVI_ALIGNMENT); // for invI
1618 sizeint sub1_res1 = dEFFICIENT_SIZE(sizeof(dJointWithInfo1) * 2 * _nj); // for initial jointinfos
1620 // The array can't grow right more than by nj
1621 sizeint sub1_res2 = dEFFICIENT_SIZE(sizeof(dJointWithInfo1) * ((sizeint)_nj + (sizeint)nj)); // for shrunk jointinfos
1622 sub1_res2 += dEFFICIENT_SIZE(sizeof(dxStepperLocalContext)); //for dxStepperLocalContext
1623 if (m > 0) {
1624 sub1_res2 += dEFFICIENT_SIZE(sizeof(unsigned int) * (nj + 1)); // for mindex
1625 sub1_res2 += dEFFICIENT_SIZE(sizeof(int) * m); // for findex
1626 sub1_res2 += dEFFICIENT_SIZE(sizeof(dReal) * 2 * JME__MAX * m); // for J
1627 unsigned int mskip = dPAD(m);
1628 sub1_res2 += dOVERALIGNED_SIZE(sizeof(dReal) * mskip * m, AMATRIX_ALIGNMENT); // for A
1629 sub1_res2 += dEFFICIENT_SIZE(sizeof(dReal) * RCE__RHS_CFM_MAX * m); // for pairsRhsCfm
1630 sub1_res2 += dEFFICIENT_SIZE(sizeof(dReal) * LHE__LO_HI_MAX * m); // for pairsLoHi
1631 sub1_res2 += dEFFICIENT_SIZE(sizeof(atomicord32) * nb); // for bodyStartJoints
1632 sub1_res2 += dEFFICIENT_SIZE(sizeof(atomicord32)* dJCB__MAX * nj); // for bodyJointLinks
1636 sizeint sub2_res1 = dEFFICIENT_SIZE(sizeof(dxStepperStage3CallContext)); // for dxStepperStage3CallContext
1638 sizeint sub2_res2 = 0;
1640 sizeint sub2_res3 = dEFFICIENT_SIZE(sizeof(dxStepperStage4CallContext)); // for dxStepperStage4CallContext
1642 if (m > 0) {
1643 sub2_res1 += dOVERALIGNED_SIZE(sizeof(dReal) * 2 * JIM__MAX * m, JINVM_ALIGNMENT); // for JinvM
1644 sub2_res1 += dEFFICIENT_SIZE(sizeof(dReal) * dDA__MAX * nb); // for rhs_tmp
1645 sub2_res1 += dEFFICIENT_SIZE(sizeof(dxStepperStage2CallContext)); // for dxStepperStage2CallContext
1647 sub2_res2 += dxEstimateSolveLCPMemoryReq(m, false);
1650 sub1_res2 += dMAX(sub2_res1, dMAX(sub2_res2, sub2_res3));
1653 sizeint sub1_res12_max = dMAX(sub1_res1, sub1_res2);
1654 sizeint stage01_contexts = dEFFICIENT_SIZE(sizeof(dxStepperStage0BodiesCallContext))
1655 + dEFFICIENT_SIZE(sizeof(dxStepperStage0JointsCallContext))
1656 + dEFFICIENT_SIZE(sizeof(dxStepperStage1CallContext));
1657 res += dMAX(sub1_res12_max, stage01_contexts);
1660 return res;
1664 /*extern */
1665 unsigned dxEstimateStepMaxCallCount(
1666 unsigned /*activeThreadCount*/, unsigned allowedThreadCount)
1668 unsigned result = 1 // dxStepIsland itself
1669 + (2 * allowedThreadCount + 2) // (dxStepIsland_Stage2a + dxStepIsland_Stage2b) * allowedThreadCount + 2 * dxStepIsland_Stage2?_Sync
1670 + 1; // dxStepIsland_Stage3
1671 return result;