Added: dSpaceSetSublevel/dSpaceGetSublevel and possibility to collide a space as...
[ode.git] / ode / src / step.cpp
blobbe6a266d2da2985450cb654b22dd0fd2acf5f07f
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
23 #include "objects.h"
24 #include "joint.h"
25 #include <ode/odeconfig.h>
26 #include "config.h"
27 #include <ode/odemath.h>
28 #include <ode/rotation.h>
29 #include <ode/timer.h>
30 #include <ode/error.h>
31 #include <ode/matrix.h>
32 #include "lcp.h"
33 #include "util.h"
35 //****************************************************************************
36 // misc defines
38 #define FAST_FACTOR
39 //#define TIMING
41 // memory allocation system
42 #ifdef dUSE_MALLOC_FOR_ALLOCA
43 unsigned int dMemoryFlag;
44 #define REPORT_OUT_OF_MEMORY fprintf(stderr, "Insufficient memory to complete rigid body simulation. Results will not be accurate.\n")
46 #define ALLOCA(t,v,s) t* v=(t*)malloc(s)
47 #define UNALLOCA(t) free(t)
49 #else
50 #define ALLOCA(t,v,s) t* v=(t*)dALLOCA16(s)
51 #define UNALLOCA(t) /* nothing */
52 #endif
55 //****************************************************************************
56 // debugging - comparison of various vectors and matrices produced by the
57 // slow and fast versions of the stepper.
59 //#define COMPARE_METHODS
61 #ifdef COMPARE_METHODS
62 #include "testing.h"
63 dMatrixComparison comparator;
64 #endif
66 // undef to use the fast decomposition
67 #define DIRECT_CHOLESKY
68 #undef REPORT_ERROR
70 //****************************************************************************
71 // special matrix multipliers
73 // this assumes the 4th and 8th rows of B and C are zero.
75 static void Multiply2_p8r (dReal *A, dReal *B, dReal *C,
76 int p, int r, int Askip)
78 int i,j;
79 dReal sum,*bb,*cc;
80 dIASSERT (p>0 && r>0 && A && B && C);
81 bb = B;
82 for (i=p; i; i--) {
83 cc = C;
84 for (j=r; j; j--) {
85 sum = bb[0]*cc[0];
86 sum += bb[1]*cc[1];
87 sum += bb[2]*cc[2];
88 sum += bb[4]*cc[4];
89 sum += bb[5]*cc[5];
90 sum += bb[6]*cc[6];
91 *(A++) = sum;
92 cc += 8;
94 A += Askip - r;
95 bb += 8;
100 // this assumes the 4th and 8th rows of B and C are zero.
102 static void MultiplyAdd2_p8r (dReal *A, dReal *B, dReal *C,
103 int p, int r, int Askip)
105 int i,j;
106 dReal sum,*bb,*cc;
107 dIASSERT (p>0 && r>0 && A && B && C);
108 bb = B;
109 for (i=p; i; i--) {
110 cc = C;
111 for (j=r; j; j--) {
112 sum = bb[0]*cc[0];
113 sum += bb[1]*cc[1];
114 sum += bb[2]*cc[2];
115 sum += bb[4]*cc[4];
116 sum += bb[5]*cc[5];
117 sum += bb[6]*cc[6];
118 *(A++) += sum;
119 cc += 8;
121 A += Askip - r;
122 bb += 8;
127 // this assumes the 4th and 8th rows of B are zero.
129 static void Multiply0_p81 (dReal *A, dReal *B, dReal *C, int p)
131 int i;
132 dIASSERT (p>0 && A && B && C);
133 dReal sum;
134 for (i=p; i; i--) {
135 sum = B[0]*C[0];
136 sum += B[1]*C[1];
137 sum += B[2]*C[2];
138 sum += B[4]*C[4];
139 sum += B[5]*C[5];
140 sum += B[6]*C[6];
141 *(A++) = sum;
142 B += 8;
147 // this assumes the 4th and 8th rows of B are zero.
149 static void MultiplyAdd0_p81 (dReal *A, dReal *B, dReal *C, int p)
151 int i;
152 dIASSERT (p>0 && A && B && C);
153 dReal sum;
154 for (i=p; i; i--) {
155 sum = B[0]*C[0];
156 sum += B[1]*C[1];
157 sum += B[2]*C[2];
158 sum += B[4]*C[4];
159 sum += B[5]*C[5];
160 sum += B[6]*C[6];
161 *(A++) += sum;
162 B += 8;
167 // this assumes the 4th and 8th rows of B are zero.
169 static void MultiplyAdd1_8q1 (dReal *A, dReal *B, dReal *C, int q)
171 int k;
172 dReal sum;
173 dIASSERT (q>0 && A && B && C);
174 sum = 0;
175 for (k=0; k<q; k++) sum += B[k*8] * C[k];
176 A[0] += sum;
177 sum = 0;
178 for (k=0; k<q; k++) sum += B[1+k*8] * C[k];
179 A[1] += sum;
180 sum = 0;
181 for (k=0; k<q; k++) sum += B[2+k*8] * C[k];
182 A[2] += sum;
183 sum = 0;
184 for (k=0; k<q; k++) sum += B[4+k*8] * C[k];
185 A[4] += sum;
186 sum = 0;
187 for (k=0; k<q; k++) sum += B[5+k*8] * C[k];
188 A[5] += sum;
189 sum = 0;
190 for (k=0; k<q; k++) sum += B[6+k*8] * C[k];
191 A[6] += sum;
195 // this assumes the 4th and 8th rows of B are zero.
197 static void Multiply1_8q1 (dReal *A, dReal *B, dReal *C, int q)
199 int k;
200 dReal sum;
201 dIASSERT (q>0 && A && B && C);
202 sum = 0;
203 for (k=0; k<q; k++) sum += B[k*8] * C[k];
204 A[0] = sum;
205 sum = 0;
206 for (k=0; k<q; k++) sum += B[1+k*8] * C[k];
207 A[1] = sum;
208 sum = 0;
209 for (k=0; k<q; k++) sum += B[2+k*8] * C[k];
210 A[2] = sum;
211 sum = 0;
212 for (k=0; k<q; k++) sum += B[4+k*8] * C[k];
213 A[4] = sum;
214 sum = 0;
215 for (k=0; k<q; k++) sum += B[5+k*8] * C[k];
216 A[5] = sum;
217 sum = 0;
218 for (k=0; k<q; k++) sum += B[6+k*8] * C[k];
219 A[6] = sum;
222 //****************************************************************************
223 // the slow, but sure way
224 // note that this does not do any joint feedback!
226 // given lists of bodies and joints that form an island, perform a first
227 // order timestep.
229 // `body' is the body array, `nb' is the size of the array.
230 // `_joint' is the body array, `nj' is the size of the array.
232 void dInternalStepIsland_x1 (dxWorld *world, dxBody * const *body, int nb,
233 dxJoint * const *_joint, int nj, dReal stepsize)
235 int i,j,k;
236 int n6 = 6*nb;
238 #ifdef TIMING
239 dTimerStart("preprocessing");
240 #endif
242 // number all bodies in the body list - set their tag values
243 for (i=0; i<nb; i++) body[i]->tag = i;
245 // make a local copy of the joint array, because we might want to modify it.
246 // (the "dxJoint *const*" declaration says we're allowed to modify the joints
247 // but not the joint array, because the caller might need it unchanged).
248 ALLOCA(dxJoint*,joint,nj*sizeof(dxJoint*));
249 #ifdef dUSE_MALLOC_FOR_ALLOCA
250 if (joint == NULL) {
251 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
252 return;
254 #endif
255 memcpy (joint,_joint,nj * sizeof(dxJoint*));
257 // for all bodies, compute the inertia tensor and its inverse in the global
258 // frame, and compute the rotational force and add it to the torque
259 // accumulator.
260 // @@@ check computation of rotational force.
261 ALLOCA(dReal,I,3*nb*4*sizeof(dReal));
262 #ifdef dUSE_MALLOC_FOR_ALLOCA
263 if (I == NULL) {
264 UNALLOCA(joint);
265 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
266 return;
268 #endif
269 ALLOCA(dReal,invI,3*nb*4*sizeof(dReal));
270 #ifdef dUSE_MALLOC_FOR_ALLOCA
271 if (invI == NULL) {
272 UNALLOCA(I);
273 UNALLOCA(joint);
274 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
275 return;
277 #endif
279 //dSetZero (I,3*nb*4);
280 //dSetZero (invI,3*nb*4);
281 for (i=0; i<nb; i++) {
282 dReal tmp[12];
283 // compute inertia tensor in global frame
284 dMULTIPLY2_333 (tmp,body[i]->mass.I,body[i]->posr.R);
285 dMULTIPLY0_333 (I+i*12,body[i]->posr.R,tmp);
286 // compute inverse inertia tensor in global frame
287 dMULTIPLY2_333 (tmp,body[i]->invI,body[i]->posr.R);
288 dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp);
289 // compute rotational force
290 dMULTIPLY0_331 (tmp,I+i*12,body[i]->avel);
291 dCROSS (body[i]->tacc,-=,body[i]->avel,tmp);
294 // add the gravity force to all bodies
295 for (i=0; i<nb; i++) {
296 if ((body[i]->flags & dxBodyNoGravity)==0) {
297 body[i]->facc[0] += body[i]->mass.mass * world->gravity[0];
298 body[i]->facc[1] += body[i]->mass.mass * world->gravity[1];
299 body[i]->facc[2] += body[i]->mass.mass * world->gravity[2];
303 // get m = total constraint dimension, nub = number of unbounded variables.
304 // create constraint offset array and number-of-rows array for all joints.
305 // the constraints are re-ordered as follows: the purely unbounded
306 // constraints, the mixed unbounded + LCP constraints, and last the purely
307 // LCP constraints.
309 // joints with m=0 are inactive and are removed from the joints array
310 // entirely, so that the code that follows does not consider them.
311 int m = 0;
312 ALLOCA(dxJoint::Info1,info,nj*sizeof(dxJoint::Info1));
313 #ifdef dUSE_MALLOC_FOR_ALLOCA
314 if (info == NULL) {
315 UNALLOCA(invI);
316 UNALLOCA(I);
317 UNALLOCA(joint);
318 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
319 return;
321 #endif
323 ALLOCA(int,ofs,nj*sizeof(int));
324 #ifdef dUSE_MALLOC_FOR_ALLOCA
325 if (ofs == NULL) {
326 UNALLOCA(info);
327 UNALLOCA(invI);
328 UNALLOCA(I);
329 UNALLOCA(joint);
330 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
331 return;
333 #endif
335 for (i=0, j=0; j<nj; j++) { // i=dest, j=src
336 joint[j]->vtable->getInfo1 (joint[j],info+i);
337 dIASSERT (info[i].m >= 0 && info[i].m <= 6 &&
338 info[i].nub >= 0 && info[i].nub <= info[i].m);
339 if (info[i].m > 0) {
340 joint[i] = joint[j];
341 i++;
344 nj = i;
346 // the purely unbounded constraints
347 for (i=0; i<nj; i++) if (info[i].nub == info[i].m) {
348 ofs[i] = m;
349 m += info[i].m;
351 //int nub = m;
352 // the mixed unbounded + LCP constraints
353 for (i=0; i<nj; i++) if (info[i].nub > 0 && info[i].nub < info[i].m) {
354 ofs[i] = m;
355 m += info[i].m;
357 // the purely LCP constraints
358 for (i=0; i<nj; i++) if (info[i].nub == 0) {
359 ofs[i] = m;
360 m += info[i].m;
363 // create (6*nb,6*nb) inverse mass matrix `invM', and fill it with mass
364 // parameters
365 #ifdef TIMING
366 dTimerNow ("create mass matrix");
367 #endif
368 int nskip = dPAD (n6);
369 ALLOCA(dReal, invM, n6*nskip*sizeof(dReal));
370 #ifdef dUSE_MALLOC_FOR_ALLOCA
371 if (invM == NULL) {
372 UNALLOCA(ofs);
373 UNALLOCA(info);
374 UNALLOCA(invI);
375 UNALLOCA(I);
376 UNALLOCA(joint);
377 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
378 return;
380 #endif
382 dSetZero (invM,n6*nskip);
383 for (i=0; i<nb; i++) {
384 dReal *MM = invM+(i*6)*nskip+(i*6);
385 MM[0] = body[i]->invMass;
386 MM[nskip+1] = body[i]->invMass;
387 MM[2*nskip+2] = body[i]->invMass;
388 MM += 3*nskip+3;
389 for (j=0; j<3; j++) for (k=0; k<3; k++) {
390 MM[j*nskip+k] = invI[i*12+j*4+k];
394 // assemble some body vectors: fe = external forces, v = velocities
395 ALLOCA(dReal,fe,n6*sizeof(dReal));
396 #ifdef dUSE_MALLOC_FOR_ALLOCA
397 if (fe == NULL) {
398 UNALLOCA(invM);
399 UNALLOCA(ofs);
400 UNALLOCA(info);
401 UNALLOCA(invI);
402 UNALLOCA(I);
403 UNALLOCA(joint);
404 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
405 return;
407 #endif
409 ALLOCA(dReal,v,n6*sizeof(dReal));
410 #ifdef dUSE_MALLOC_FOR_ALLOCA
411 if (v == NULL) {
412 UNALLOCA(fe);
413 UNALLOCA(invM);
414 UNALLOCA(ofs);
415 UNALLOCA(info);
416 UNALLOCA(invI);
417 UNALLOCA(I);
418 UNALLOCA(joint);
419 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
420 return;
422 #endif
424 //dSetZero (fe,n6);
425 //dSetZero (v,n6);
426 for (i=0; i<nb; i++) {
427 for (j=0; j<3; j++) fe[i*6+j] = body[i]->facc[j];
428 for (j=0; j<3; j++) fe[i*6+3+j] = body[i]->tacc[j];
429 for (j=0; j<3; j++) v[i*6+j] = body[i]->lvel[j];
430 for (j=0; j<3; j++) v[i*6+3+j] = body[i]->avel[j];
433 // this will be set to the velocity update
434 ALLOCA(dReal,vnew,n6*sizeof(dReal));
435 #ifdef dUSE_MALLOC_FOR_ALLOCA
436 if (vnew == NULL) {
437 UNALLOCA(v);
438 UNALLOCA(fe);
439 UNALLOCA(invM);
440 UNALLOCA(ofs);
441 UNALLOCA(info);
442 UNALLOCA(invI);
443 UNALLOCA(I);
444 UNALLOCA(joint);
445 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
446 return;
448 #endif
449 dSetZero (vnew,n6);
451 // if there are constraints, compute cforce
452 if (m > 0) {
453 // create a constraint equation right hand side vector `c', a constraint
454 // force mixing vector `cfm', and LCP low and high bound vectors, and an
455 // 'findex' vector.
456 ALLOCA(dReal,c,m*sizeof(dReal));
457 #ifdef dUSE_MALLOC_FOR_ALLOCA
458 if (c == NULL) {
459 UNALLOCA(vnew);
460 UNALLOCA(v);
461 UNALLOCA(fe);
462 UNALLOCA(invM);
463 UNALLOCA(ofs);
464 UNALLOCA(info);
465 UNALLOCA(invI);
466 UNALLOCA(I);
467 UNALLOCA(joint);
468 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
469 return;
471 #endif
472 ALLOCA(dReal,cfm,m*sizeof(dReal));
473 #ifdef dUSE_MALLOC_FOR_ALLOCA
474 if (cfm == NULL) {
475 UNALLOCA(c);
476 UNALLOCA(vnew);
477 UNALLOCA(v);
478 UNALLOCA(fe);
479 UNALLOCA(invM);
480 UNALLOCA(ofs);
481 UNALLOCA(info);
482 UNALLOCA(invI);
483 UNALLOCA(I);
484 UNALLOCA(joint);
485 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
486 return;
488 #endif
489 ALLOCA(dReal,lo,m*sizeof(dReal));
490 #ifdef dUSE_MALLOC_FOR_ALLOCA
491 if (lo == NULL) {
492 UNALLOCA(cfm);
493 UNALLOCA(c);
494 UNALLOCA(vnew);
495 UNALLOCA(v);
496 UNALLOCA(fe);
497 UNALLOCA(invM);
498 UNALLOCA(ofs);
499 UNALLOCA(info);
500 UNALLOCA(invI);
501 UNALLOCA(I);
502 UNALLOCA(joint);
503 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
504 return;
506 #endif
507 ALLOCA(dReal,hi,m*sizeof(dReal));
508 #ifdef dUSE_MALLOC_FOR_ALLOCA
509 if (hi == NULL) {
510 UNALLOCA(lo);
511 UNALLOCA(cfm);
512 UNALLOCA(c);
513 UNALLOCA(vnew);
514 UNALLOCA(v);
515 UNALLOCA(fe);
516 UNALLOCA(invM);
517 UNALLOCA(ofs);
518 UNALLOCA(info);
519 UNALLOCA(invI);
520 UNALLOCA(I);
521 UNALLOCA(joint);
522 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
523 return;
525 #endif
526 ALLOCA(int,findex,m*sizeof(int));
527 #ifdef dUSE_MALLOC_FOR_ALLOCA
528 if (findex == NULL) {
529 UNALLOCA(hi);
530 UNALLOCA(lo);
531 UNALLOCA(cfm);
532 UNALLOCA(c);
533 UNALLOCA(vnew);
534 UNALLOCA(v);
535 UNALLOCA(fe);
536 UNALLOCA(invM);
537 UNALLOCA(ofs);
538 UNALLOCA(info);
539 UNALLOCA(invI);
540 UNALLOCA(I);
541 UNALLOCA(joint);
542 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
543 return;
545 #endif
546 dSetZero (c,m);
547 dSetValue (cfm,m,world->global_cfm);
548 dSetValue (lo,m,-dInfinity);
549 dSetValue (hi,m, dInfinity);
550 for (i=0; i<m; i++) findex[i] = -1;
552 // create (m,6*nb) jacobian mass matrix `J', and fill it with constraint
553 // data. also fill the c vector.
554 # ifdef TIMING
555 dTimerNow ("create J");
556 # endif
557 ALLOCA(dReal,J,m*nskip*sizeof(dReal));
558 #ifdef dUSE_MALLOC_FOR_ALLOCA
559 if (J == NULL) {
560 UNALLOCA(findex);
561 UNALLOCA(hi);
562 UNALLOCA(lo);
563 UNALLOCA(cfm);
564 UNALLOCA(c);
565 UNALLOCA(vnew);
566 UNALLOCA(v);
567 UNALLOCA(fe);
568 UNALLOCA(invM);
569 UNALLOCA(ofs);
570 UNALLOCA(info);
571 UNALLOCA(invI);
572 UNALLOCA(I);
573 UNALLOCA(joint);
574 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
575 return;
577 #endif
578 dSetZero (J,m*nskip);
579 dxJoint::Info2 Jinfo;
580 Jinfo.rowskip = nskip;
581 Jinfo.fps = dRecip(stepsize);
582 Jinfo.erp = world->global_erp;
583 for (i=0; i<nj; i++) {
584 Jinfo.J1l = J + nskip*ofs[i] + 6*joint[i]->node[0].body->tag;
585 Jinfo.J1a = Jinfo.J1l + 3;
586 if (joint[i]->node[1].body) {
587 Jinfo.J2l = J + nskip*ofs[i] + 6*joint[i]->node[1].body->tag;
588 Jinfo.J2a = Jinfo.J2l + 3;
590 else {
591 Jinfo.J2l = 0;
592 Jinfo.J2a = 0;
594 Jinfo.c = c + ofs[i];
595 Jinfo.cfm = cfm + ofs[i];
596 Jinfo.lo = lo + ofs[i];
597 Jinfo.hi = hi + ofs[i];
598 Jinfo.findex = findex + ofs[i];
599 joint[i]->vtable->getInfo2 (joint[i],&Jinfo);
600 // adjust returned findex values for global index numbering
601 for (j=0; j<info[i].m; j++) {
602 if (findex[ofs[i] + j] >= 0) findex[ofs[i] + j] += ofs[i];
606 // compute A = J*invM*J'
607 # ifdef TIMING
608 dTimerNow ("compute A");
609 # endif
610 ALLOCA(dReal,JinvM,m*nskip*sizeof(dReal));
611 #ifdef dUSE_MALLOC_FOR_ALLOCA
612 if (JinvM == NULL) {
613 UNALLOCA(J);
614 UNALLOCA(findex);
615 UNALLOCA(hi);
616 UNALLOCA(lo);
617 UNALLOCA(cfm);
618 UNALLOCA(c);
619 UNALLOCA(vnew);
620 UNALLOCA(v);
621 UNALLOCA(fe);
622 UNALLOCA(invM);
623 UNALLOCA(ofs);
624 UNALLOCA(info);
625 UNALLOCA(invI);
626 UNALLOCA(I);
627 UNALLOCA(joint);
628 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
629 return;
631 #endif
632 //dSetZero (JinvM,m*nskip);
633 dMultiply0 (JinvM,J,invM,m,n6,n6);
634 int mskip = dPAD(m);
635 ALLOCA(dReal,A,m*mskip*sizeof(dReal));
636 #ifdef dUSE_MALLOC_FOR_ALLOCA
637 if (A == NULL) {
638 UNALLOCA(JinvM);
639 UNALLOCA(J);
640 UNALLOCA(findex);
641 UNALLOCA(hi);
642 UNALLOCA(lo);
643 UNALLOCA(cfm);
644 UNALLOCA(c);
645 UNALLOCA(vnew);
646 UNALLOCA(v);
647 UNALLOCA(fe);
648 UNALLOCA(invM);
649 UNALLOCA(ofs);
650 UNALLOCA(info);
651 UNALLOCA(invI);
652 UNALLOCA(I);
653 UNALLOCA(joint);
654 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
655 return;
657 #endif
658 //dSetZero (A,m*mskip);
659 dMultiply2 (A,JinvM,J,m,n6,m);
661 // add cfm to the diagonal of A
662 for (i=0; i<m; i++) A[i*mskip+i] += cfm[i] * Jinfo.fps;
664 # ifdef COMPARE_METHODS
665 comparator.nextMatrix (A,m,m,1,"A");
666 # endif
668 // compute `rhs', the right hand side of the equation J*a=c
669 # ifdef TIMING
670 dTimerNow ("compute rhs");
671 # endif
672 ALLOCA(dReal,tmp1,n6*sizeof(dReal));
673 #ifdef dUSE_MALLOC_FOR_ALLOCA
674 if (tmp1 == NULL) {
675 UNALLOCA(A);
676 UNALLOCA(JinvM);
677 UNALLOCA(J);
678 UNALLOCA(findex);
679 UNALLOCA(hi);
680 UNALLOCA(lo);
681 UNALLOCA(cfm);
682 UNALLOCA(c);
683 UNALLOCA(vnew);
684 UNALLOCA(v);
685 UNALLOCA(fe);
686 UNALLOCA(invM);
687 UNALLOCA(ofs);
688 UNALLOCA(info);
689 UNALLOCA(invI);
690 UNALLOCA(I);
691 UNALLOCA(joint);
692 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
693 return;
695 #endif
696 //dSetZero (tmp1,n6);
697 dMultiply0 (tmp1,invM,fe,n6,n6,1);
698 for (i=0; i<n6; i++) tmp1[i] += v[i]/stepsize;
699 ALLOCA(dReal,rhs,m*sizeof(dReal));
700 #ifdef dUSE_MALLOC_FOR_ALLOCA
701 if (rhs == NULL) {
702 UNALLOCA(tmp1);
703 UNALLOCA(A);
704 UNALLOCA(JinvM);
705 UNALLOCA(J);
706 UNALLOCA(findex);
707 UNALLOCA(hi);
708 UNALLOCA(lo);
709 UNALLOCA(cfm);
710 UNALLOCA(c);
711 UNALLOCA(vnew);
712 UNALLOCA(v);
713 UNALLOCA(fe);
714 UNALLOCA(invM);
715 UNALLOCA(ofs);
716 UNALLOCA(info);
717 UNALLOCA(invI);
718 UNALLOCA(I);
719 UNALLOCA(joint);
720 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
721 return;
723 #endif
724 //dSetZero (rhs,m);
725 dMultiply0 (rhs,J,tmp1,m,n6,1);
726 for (i=0; i<m; i++) rhs[i] = c[i]/stepsize - rhs[i];
728 # ifdef COMPARE_METHODS
729 comparator.nextMatrix (c,m,1,0,"c");
730 comparator.nextMatrix (rhs,m,1,0,"rhs");
731 # endif
737 #ifndef DIRECT_CHOLESKY
738 // solve the LCP problem and get lambda.
739 // this will destroy A but that's okay
740 # ifdef TIMING
741 dTimerNow ("solving LCP problem");
742 # endif
743 ALLOCA(dReal,lambda,m*sizeof(dReal));
744 #ifdef dUSE_MALLOC_FOR_ALLOCA
745 if (lambda == NULL) {
746 UNALLOCA(rhs);
747 UNALLOCA(tmp1);
748 UNALLOCA(A);
749 UNALLOCA(JinvM);
750 UNALLOCA(J);
751 UNALLOCA(findex);
752 UNALLOCA(hi);
753 UNALLOCA(lo);
754 UNALLOCA(cfm);
755 UNALLOCA(c);
756 UNALLOCA(vnew);
757 UNALLOCA(v);
758 UNALLOCA(fe);
759 UNALLOCA(invM);
760 UNALLOCA(ofs);
761 UNALLOCA(info);
762 UNALLOCA(invI);
763 UNALLOCA(I);
764 UNALLOCA(joint);
765 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
766 return;
768 #endif
769 ALLOCA(dReal,residual,m*sizeof(dReal));
770 #ifdef dUSE_MALLOC_FOR_ALLOCA
771 if (residual == NULL) {
772 UNALLOCA(lambda);
773 UNALLOCA(rhs);
774 UNALLOCA(tmp1);
775 UNALLOCA(A);
776 UNALLOCA(JinvM);
777 UNALLOCA(J);
778 UNALLOCA(findex);
779 UNALLOCA(hi);
780 UNALLOCA(lo);
781 UNALLOCA(cfm);
782 UNALLOCA(c);
783 UNALLOCA(vnew);
784 UNALLOCA(v);
785 UNALLOCA(fe);
786 UNALLOCA(invM);
787 UNALLOCA(ofs);
788 UNALLOCA(info);
789 UNALLOCA(invI);
790 UNALLOCA(I);
791 UNALLOCA(joint);
792 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
793 return;
795 #endif
796 dSolveLCP (m,A,lambda,rhs,residual,nub,lo,hi,findex);
797 UNALLOCA(residual);
798 UNALLOCA(lambda);
800 #ifdef dUSE_MALLOC_FOR_ALLOCA
801 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY)
802 return;
803 #endif
806 #else
808 // OLD WAY - direct factor and solve
810 // factorize A (L*L'=A)
811 # ifdef TIMING
812 dTimerNow ("factorize A");
813 # endif
814 ALLOCA(dReal,L,m*mskip*sizeof(dReal));
815 #ifdef dUSE_MALLOC_FOR_ALLOCA
816 if (L == NULL) {
817 UNALLOCA(rhs);
818 UNALLOCA(tmp1);
819 UNALLOCA(A);
820 UNALLOCA(JinvM);
821 UNALLOCA(J);
822 UNALLOCA(findex);
823 UNALLOCA(hi);
824 UNALLOCA(lo);
825 UNALLOCA(cfm);
826 UNALLOCA(c);
827 UNALLOCA(vnew);
828 UNALLOCA(v);
829 UNALLOCA(fe);
830 UNALLOCA(invM);
831 UNALLOCA(ofs);
832 UNALLOCA(info);
833 UNALLOCA(invI);
834 UNALLOCA(I);
835 UNALLOCA(joint);
836 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
837 return;
839 #endif
840 memcpy (L,A,m*mskip*sizeof(dReal));
841 if (dFactorCholesky (L,m)==0) dDebug (0,"A is not positive definite");
843 // compute lambda
844 # ifdef TIMING
845 dTimerNow ("compute lambda");
846 # endif
847 ALLOCA(dReal,lambda,m*sizeof(dReal));
848 #ifdef dUSE_MALLOC_FOR_ALLOCA
849 if (lambda == NULL) {
850 UNALLOCA(L);
851 UNALLOCA(rhs);
852 UNALLOCA(tmp1);
853 UNALLOCA(A);
854 UNALLOCA(JinvM);
855 UNALLOCA(J);
856 UNALLOCA(findex);
857 UNALLOCA(hi);
858 UNALLOCA(lo);
859 UNALLOCA(cfm);
860 UNALLOCA(c);
861 UNALLOCA(vnew);
862 UNALLOCA(v);
863 UNALLOCA(fe);
864 UNALLOCA(invM);
865 UNALLOCA(ofs);
866 UNALLOCA(info);
867 UNALLOCA(invI);
868 UNALLOCA(I);
869 UNALLOCA(joint);
870 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
871 return;
873 #endif
874 memcpy (lambda,rhs,m * sizeof(dReal));
875 dSolveCholesky (L,lambda,m);
876 #endif
878 # ifdef COMPARE_METHODS
879 comparator.nextMatrix (lambda,m,1,0,"lambda");
880 # endif
882 // compute the velocity update `vnew'
883 # ifdef TIMING
884 dTimerNow ("compute velocity update");
885 # endif
886 dMultiply1 (tmp1,J,lambda,n6,m,1);
887 for (i=0; i<n6; i++) tmp1[i] += fe[i];
888 dMultiply0 (vnew,invM,tmp1,n6,n6,1);
889 for (i=0; i<n6; i++) vnew[i] = v[i] + stepsize*vnew[i];
891 #ifdef REPORT_ERROR
892 // see if the constraint has worked: compute J*vnew and make sure it equals
893 // `c' (to within a certain tolerance).
894 # ifdef TIMING
895 dTimerNow ("verify constraint equation");
896 # endif
897 dMultiply0 (tmp1,J,vnew,m,n6,1);
898 dReal err = 0;
899 for (i=0; i<m; i++) {
900 err += dFabs(tmp1[i]-c[i]);
902 printf ("total constraint error=%.6e\n",err);
903 #endif
905 UNALLOCA(c);
906 UNALLOCA(cfm);
907 UNALLOCA(lo);
908 UNALLOCA(hi);
909 UNALLOCA(findex);
910 UNALLOCA(J);
911 UNALLOCA(JinvM);
912 UNALLOCA(A);
913 UNALLOCA(tmp1);
914 UNALLOCA(rhs);
915 UNALLOCA(lambda);
916 UNALLOCA(L);
918 else {
919 // no constraints
920 dMultiply0 (vnew,invM,fe,n6,n6,1);
921 for (i=0; i<n6; i++) vnew[i] = v[i] + stepsize*vnew[i];
924 #ifdef COMPARE_METHODS
925 comparator.nextMatrix (vnew,n6,1,0,"vnew");
926 #endif
928 // apply the velocity update to the bodies
929 #ifdef TIMING
930 dTimerNow ("update velocity");
931 #endif
932 for (i=0; i<nb; i++) {
933 for (j=0; j<3; j++) body[i]->lvel[j] = vnew[i*6+j];
934 for (j=0; j<3; j++) body[i]->avel[j] = vnew[i*6+3+j];
937 // update the position and orientation from the new linear/angular velocity
938 // (over the given timestep)
939 #ifdef TIMING
940 dTimerNow ("update position");
941 #endif
942 for (i=0; i<nb; i++) dxStepBody (body[i],stepsize);
944 #ifdef TIMING
945 dTimerNow ("tidy up");
946 #endif
948 // zero all force accumulators
949 for (i=0; i<nb; i++) {
950 body[i]->facc[0] = 0;
951 body[i]->facc[1] = 0;
952 body[i]->facc[2] = 0;
953 body[i]->facc[3] = 0;
954 body[i]->tacc[0] = 0;
955 body[i]->tacc[1] = 0;
956 body[i]->tacc[2] = 0;
957 body[i]->tacc[3] = 0;
960 #ifdef TIMING
961 dTimerEnd();
962 if (m > 0) dTimerReport (stdout,1);
963 #endif
965 UNALLOCA(joint);
966 UNALLOCA(I);
967 UNALLOCA(invI);
968 UNALLOCA(info);
969 UNALLOCA(ofs);
970 UNALLOCA(invM);
971 UNALLOCA(fe);
972 UNALLOCA(v);
973 UNALLOCA(vnew);
976 //****************************************************************************
977 // an optimized version of dInternalStepIsland1()
979 void dInternalStepIsland_x2 (dxWorld *world, dxBody * const *body, int nb,
980 dxJoint * const *_joint, int nj, dReal stepsize)
982 int i,j,k;
983 #ifdef TIMING
984 dTimerStart("preprocessing");
985 #endif
987 dReal stepsize1 = dRecip(stepsize);
989 // number all bodies in the body list - set their tag values
990 for (i=0; i<nb; i++) body[i]->tag = i;
992 // make a local copy of the joint array, because we might want to modify it.
993 // (the "dxJoint *const*" declaration says we're allowed to modify the joints
994 // but not the joint array, because the caller might need it unchanged).
995 ALLOCA(dxJoint*,joint,nj*sizeof(dxJoint*));
996 #ifdef dUSE_MALLOC_FOR_ALLOCA
997 if (joint == NULL) {
998 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
999 return;
1001 #endif
1002 memcpy (joint,_joint,nj * sizeof(dxJoint*));
1004 // for all bodies, compute the inertia tensor and its inverse in the global
1005 // frame, and compute the rotational force and add it to the torque
1006 // accumulator. I and invI are vertically stacked 3x4 matrices, one per body.
1007 // @@@ check computation of rotational force.
1008 #ifdef dGYROSCOPIC
1009 ALLOCA(dReal,I,3*nb*4*sizeof(dReal));
1010 # ifdef dUSE_MALLOC_FOR_ALLOCA
1011 if (I == NULL) {
1012 UNALLOCA(joint);
1013 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1014 return;
1016 # endif
1017 #else
1018 dReal *I = NULL;
1019 #endif // for dGYROSCOPIC
1021 ALLOCA(dReal,invI,3*nb*4*sizeof(dReal));
1022 #ifdef dUSE_MALLOC_FOR_ALLOCA
1023 if (invI == NULL) {
1024 UNALLOCA(I);
1025 UNALLOCA(joint);
1026 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1027 return;
1029 #endif
1031 //dSetZero (I,3*nb*4);
1032 //dSetZero (invI,3*nb*4);
1033 for (i=0; i<nb; i++) {
1034 dReal tmp[12];
1036 // compute inverse inertia tensor in global frame
1037 dMULTIPLY2_333 (tmp,body[i]->invI,body[i]->posr.R);
1038 dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp);
1039 #ifdef dGYROSCOPIC
1040 // compute inertia tensor in global frame
1041 dMULTIPLY2_333 (tmp,body[i]->mass.I,body[i]->posr.R);
1042 dMULTIPLY0_333 (I+i*12,body[i]->posr.R,tmp);
1044 // compute rotational force
1045 dMULTIPLY0_331 (tmp,I+i*12,body[i]->avel);
1046 dCROSS (body[i]->tacc,-=,body[i]->avel,tmp);
1047 #endif
1050 // add the gravity force to all bodies
1051 for (i=0; i<nb; i++) {
1052 if ((body[i]->flags & dxBodyNoGravity)==0) {
1053 body[i]->facc[0] += body[i]->mass.mass * world->gravity[0];
1054 body[i]->facc[1] += body[i]->mass.mass * world->gravity[1];
1055 body[i]->facc[2] += body[i]->mass.mass * world->gravity[2];
1059 // get m = total constraint dimension, nub = number of unbounded variables.
1060 // create constraint offset array and number-of-rows array for all joints.
1061 // the constraints are re-ordered as follows: the purely unbounded
1062 // constraints, the mixed unbounded + LCP constraints, and last the purely
1063 // LCP constraints. this assists the LCP solver to put all unbounded
1064 // variables at the start for a quick factorization.
1066 // joints with m=0 are inactive and are removed from the joints array
1067 // entirely, so that the code that follows does not consider them.
1068 // also number all active joints in the joint list (set their tag values).
1069 // inactive joints receive a tag value of -1.
1071 int m = 0;
1072 ALLOCA(dxJoint::Info1,info,nj*sizeof(dxJoint::Info1));
1073 #ifdef dUSE_MALLOC_FOR_ALLOCA
1074 if (info == NULL) {
1075 UNALLOCA(invI);
1076 UNALLOCA(I);
1077 UNALLOCA(joint);
1078 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1079 return;
1081 #endif
1082 ALLOCA(int,ofs,nj*sizeof(int));
1083 #ifdef dUSE_MALLOC_FOR_ALLOCA
1084 if (ofs == NULL) {
1085 UNALLOCA(info);
1086 UNALLOCA(invI);
1087 UNALLOCA(I);
1088 UNALLOCA(joint);
1089 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1090 return;
1092 #endif
1093 for (i=0, j=0; j<nj; j++) { // i=dest, j=src
1094 joint[j]->vtable->getInfo1 (joint[j],info+i);
1095 dIASSERT (info[i].m >= 0 && info[i].m <= 6 &&
1096 info[i].nub >= 0 && info[i].nub <= info[i].m);
1097 if (info[i].m > 0) {
1098 joint[i] = joint[j];
1099 joint[i]->tag = i;
1100 i++;
1102 else {
1103 joint[j]->tag = -1;
1106 nj = i;
1108 // the purely unbounded constraints
1109 for (i=0; i<nj; i++) if (info[i].nub == info[i].m) {
1110 ofs[i] = m;
1111 m += info[i].m;
1113 int nub = m;
1114 // the mixed unbounded + LCP constraints
1115 for (i=0; i<nj; i++) if (info[i].nub > 0 && info[i].nub < info[i].m) {
1116 ofs[i] = m;
1117 m += info[i].m;
1119 // the purely LCP constraints
1120 for (i=0; i<nj; i++) if (info[i].nub == 0) {
1121 ofs[i] = m;
1122 m += info[i].m;
1125 // this will be set to the force due to the constraints
1126 ALLOCA(dReal,cforce,nb*8*sizeof(dReal));
1127 #ifdef dUSE_MALLOC_FOR_ALLOCA
1128 if (cforce == NULL) {
1129 UNALLOCA(ofs);
1130 UNALLOCA(info);
1131 UNALLOCA(invI);
1132 UNALLOCA(I);
1133 UNALLOCA(joint);
1134 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1135 return;
1137 #endif
1138 dSetZero (cforce,nb*8);
1140 // if there are constraints, compute cforce
1141 if (m > 0) {
1142 // create a constraint equation right hand side vector `c', a constraint
1143 // force mixing vector `cfm', and LCP low and high bound vectors, and an
1144 // 'findex' vector.
1145 ALLOCA(dReal,c,m*sizeof(dReal));
1146 #ifdef dUSE_MALLOC_FOR_ALLOCA
1147 if (c == NULL) {
1148 UNALLOCA(cforce);
1149 UNALLOCA(ofs);
1150 UNALLOCA(info);
1151 UNALLOCA(invI);
1152 UNALLOCA(I);
1153 UNALLOCA(joint);
1154 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1155 return;
1157 #endif
1158 ALLOCA(dReal,cfm,m*sizeof(dReal));
1159 #ifdef dUSE_MALLOC_FOR_ALLOCA
1160 if (cfm == NULL) {
1161 UNALLOCA(c);
1162 UNALLOCA(cforce);
1163 UNALLOCA(ofs);
1164 UNALLOCA(info);
1165 UNALLOCA(invI);
1166 UNALLOCA(I);
1167 UNALLOCA(joint);
1168 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1169 return;
1171 #endif
1172 ALLOCA(dReal,lo,m*sizeof(dReal));
1173 #ifdef dUSE_MALLOC_FOR_ALLOCA
1174 if (lo == NULL) {
1175 UNALLOCA(cfm);
1176 UNALLOCA(c);
1177 UNALLOCA(cforce);
1178 UNALLOCA(ofs);
1179 UNALLOCA(info);
1180 UNALLOCA(invI);
1181 UNALLOCA(I);
1182 UNALLOCA(joint);
1183 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1184 return;
1186 #endif
1187 ALLOCA(dReal,hi,m*sizeof(dReal));
1188 #ifdef dUSE_MALLOC_FOR_ALLOCA
1189 if (hi == NULL) {
1190 UNALLOCA(lo);
1191 UNALLOCA(cfm);
1192 UNALLOCA(c);
1193 UNALLOCA(cforce);
1194 UNALLOCA(ofs);
1195 UNALLOCA(info);
1196 UNALLOCA(invI);
1197 UNALLOCA(I);
1198 UNALLOCA(joint);
1199 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1200 return;
1202 #endif
1203 ALLOCA(int,findex,m*sizeof(int));
1204 #ifdef dUSE_MALLOC_FOR_ALLOCA
1205 if (findex == NULL) {
1206 UNALLOCA(hi);
1207 UNALLOCA(lo);
1208 UNALLOCA(cfm);
1209 UNALLOCA(c);
1210 UNALLOCA(cforce);
1211 UNALLOCA(ofs);
1212 UNALLOCA(info);
1213 UNALLOCA(invI);
1214 UNALLOCA(I);
1215 UNALLOCA(joint);
1216 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1217 return;
1219 #endif
1220 dSetZero (c,m);
1221 dSetValue (cfm,m,world->global_cfm);
1222 dSetValue (lo,m,-dInfinity);
1223 dSetValue (hi,m, dInfinity);
1224 for (i=0; i<m; i++) findex[i] = -1;
1226 // get jacobian data from constraints. a (2*m)x8 matrix will be created
1227 // to store the two jacobian blocks from each constraint. it has this
1228 // format:
1230 // l l l 0 a a a 0 \ .
1231 // l l l 0 a a a 0 }-- jacobian body 1 block for joint 0 (3 rows)
1232 // l l l 0 a a a 0 /
1233 // l l l 0 a a a 0 \ .
1234 // l l l 0 a a a 0 }-- jacobian body 2 block for joint 0 (3 rows)
1235 // l l l 0 a a a 0 /
1236 // l l l 0 a a a 0 }--- jacobian body 1 block for joint 1 (1 row)
1237 // l l l 0 a a a 0 }--- jacobian body 2 block for joint 1 (1 row)
1238 // etc...
1240 // (lll) = linear jacobian data
1241 // (aaa) = angular jacobian data
1243 # ifdef TIMING
1244 dTimerNow ("create J");
1245 # endif
1246 ALLOCA(dReal,J,2*m*8*sizeof(dReal));
1247 #ifdef dUSE_MALLOC_FOR_ALLOCA
1248 if (J == NULL) {
1249 UNALLOCA(findex);
1250 UNALLOCA(hi);
1251 UNALLOCA(lo);
1252 UNALLOCA(cfm);
1253 UNALLOCA(c);
1254 UNALLOCA(cforce);
1255 UNALLOCA(ofs);
1256 UNALLOCA(info);
1257 UNALLOCA(invI);
1258 UNALLOCA(I);
1259 UNALLOCA(joint);
1260 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1261 return;
1263 #endif
1264 dSetZero (J,2*m*8);
1265 dxJoint::Info2 Jinfo;
1266 Jinfo.rowskip = 8;
1267 Jinfo.fps = stepsize1;
1268 Jinfo.erp = world->global_erp;
1269 for (i=0; i<nj; i++) {
1270 Jinfo.J1l = J + 2*8*ofs[i];
1271 Jinfo.J1a = Jinfo.J1l + 4;
1272 Jinfo.J2l = Jinfo.J1l + 8*info[i].m;
1273 Jinfo.J2a = Jinfo.J2l + 4;
1274 Jinfo.c = c + ofs[i];
1275 Jinfo.cfm = cfm + ofs[i];
1276 Jinfo.lo = lo + ofs[i];
1277 Jinfo.hi = hi + ofs[i];
1278 Jinfo.findex = findex + ofs[i];
1279 joint[i]->vtable->getInfo2 (joint[i],&Jinfo);
1280 // adjust returned findex values for global index numbering
1281 for (j=0; j<info[i].m; j++) {
1282 if (findex[ofs[i] + j] >= 0) findex[ofs[i] + j] += ofs[i];
1286 // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
1287 // format as J so we just go through the constraints in J multiplying by
1288 // the appropriate scalars and matrices.
1289 # ifdef TIMING
1290 dTimerNow ("compute A");
1291 # endif
1292 ALLOCA(dReal,JinvM,2*m*8*sizeof(dReal));
1293 #ifdef dUSE_MALLOC_FOR_ALLOCA
1294 if (JinvM == NULL) {
1295 UNALLOCA(J);
1296 UNALLOCA(findex);
1297 UNALLOCA(hi);
1298 UNALLOCA(lo);
1299 UNALLOCA(cfm);
1300 UNALLOCA(c);
1301 UNALLOCA(cforce);
1302 UNALLOCA(ofs);
1303 UNALLOCA(info);
1304 UNALLOCA(invI);
1305 UNALLOCA(I);
1306 UNALLOCA(joint);
1307 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1308 return;
1310 #endif
1311 dSetZero (JinvM,2*m*8);
1312 for (i=0; i<nj; i++) {
1313 int b = joint[i]->node[0].body->tag;
1314 dReal body_invMass = body[b]->invMass;
1315 dReal *body_invI = invI + b*12;
1316 dReal *Jsrc = J + 2*8*ofs[i];
1317 dReal *Jdst = JinvM + 2*8*ofs[i];
1318 for (j=info[i].m-1; j>=0; j--) {
1319 for (k=0; k<3; k++) Jdst[k] = Jsrc[k] * body_invMass;
1320 dMULTIPLY0_133 (Jdst+4,Jsrc+4,body_invI);
1321 Jsrc += 8;
1322 Jdst += 8;
1324 if (joint[i]->node[1].body) {
1325 b = joint[i]->node[1].body->tag;
1326 body_invMass = body[b]->invMass;
1327 body_invI = invI + b*12;
1328 for (j=info[i].m-1; j>=0; j--) {
1329 for (k=0; k<3; k++) Jdst[k] = Jsrc[k] * body_invMass;
1330 dMULTIPLY0_133 (Jdst+4,Jsrc+4,body_invI);
1331 Jsrc += 8;
1332 Jdst += 8;
1337 // now compute A = JinvM * J'. A's rows and columns are grouped by joint,
1338 // i.e. in the same way as the rows of J. block (i,j) of A is only nonzero
1339 // if joints i and j have at least one body in common. this fact suggests
1340 // the algorithm used to fill A:
1342 // for b = all bodies
1343 // n = number of joints attached to body b
1344 // for i = 1..n
1345 // for j = i+1..n
1346 // ii = actual joint number for i
1347 // jj = actual joint number for j
1348 // // (ii,jj) will be set to all pairs of joints around body b
1349 // compute blockwise: A(ii,jj) += JinvM(ii) * J(jj)'
1351 // this algorithm catches all pairs of joints that have at least one body
1352 // in common. it does not compute the diagonal blocks of A however -
1353 // another similar algorithm does that.
1355 int mskip = dPAD(m);
1356 ALLOCA(dReal,A,m*mskip*sizeof(dReal));
1357 #ifdef dUSE_MALLOC_FOR_ALLOCA
1358 if (A == NULL) {
1359 UNALLOCA(JinvM);
1360 UNALLOCA(J);
1361 UNALLOCA(findex);
1362 UNALLOCA(hi);
1363 UNALLOCA(lo);
1364 UNALLOCA(cfm);
1365 UNALLOCA(c);
1366 UNALLOCA(cforce);
1367 UNALLOCA(ofs);
1368 UNALLOCA(info);
1369 UNALLOCA(invI);
1370 UNALLOCA(I);
1371 UNALLOCA(joint);
1372 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1373 return;
1375 #endif
1376 dSetZero (A,m*mskip);
1377 for (i=0; i<nb; i++) {
1378 for (dxJointNode *n1=body[i]->firstjoint; n1; n1=n1->next) {
1379 for (dxJointNode *n2=n1->next; n2; n2=n2->next) {
1380 // get joint numbers and ensure ofs[j1] >= ofs[j2]
1381 int j1 = n1->joint->tag;
1382 int j2 = n2->joint->tag;
1383 if (ofs[j1] < ofs[j2]) {
1384 int tmp = j1;
1385 j1 = j2;
1386 j2 = tmp;
1389 // if either joint was tagged as -1 then it is an inactive (m=0)
1390 // joint that should not be considered
1391 if (j1==-1 || j2==-1) continue;
1393 // determine if body i is the 1st or 2nd body of joints j1 and j2
1394 int jb1 = (joint[j1]->node[1].body == body[i]);
1395 int jb2 = (joint[j2]->node[1].body == body[i]);
1396 // jb1/jb2 must be 0 for joints with only one body
1397 dIASSERT(joint[j1]->node[1].body || jb1==0);
1398 dIASSERT(joint[j2]->node[1].body || jb2==0);
1400 // set block of A
1401 MultiplyAdd2_p8r (A + ofs[j1]*mskip + ofs[j2],
1402 JinvM + 2*8*ofs[j1] + jb1*8*info[j1].m,
1403 J + 2*8*ofs[j2] + jb2*8*info[j2].m,
1404 info[j1].m,info[j2].m, mskip);
1408 // compute diagonal blocks of A
1409 for (i=0; i<nj; i++) {
1410 Multiply2_p8r (A + ofs[i]*(mskip+1),
1411 JinvM + 2*8*ofs[i],
1412 J + 2*8*ofs[i],
1413 info[i].m,info[i].m, mskip);
1414 if (joint[i]->node[1].body) {
1415 MultiplyAdd2_p8r (A + ofs[i]*(mskip+1),
1416 JinvM + 2*8*ofs[i] + 8*info[i].m,
1417 J + 2*8*ofs[i] + 8*info[i].m,
1418 info[i].m,info[i].m, mskip);
1422 // add cfm to the diagonal of A
1423 for (i=0; i<m; i++) A[i*mskip+i] += cfm[i] * stepsize1;
1425 # ifdef COMPARE_METHODS
1426 comparator.nextMatrix (A,m,m,1,"A");
1427 # endif
1429 // compute the right hand side `rhs'
1430 # ifdef TIMING
1431 dTimerNow ("compute rhs");
1432 # endif
1433 ALLOCA(dReal,tmp1,nb*8*sizeof(dReal));
1434 #ifdef dUSE_MALLOC_FOR_ALLOCA
1435 if (tmp1 == NULL) {
1436 UNALLOCA(A);
1437 UNALLOCA(JinvM);
1438 UNALLOCA(J);
1439 UNALLOCA(findex);
1440 UNALLOCA(hi);
1441 UNALLOCA(lo);
1442 UNALLOCA(cfm);
1443 UNALLOCA(c);
1444 UNALLOCA(cforce);
1445 UNALLOCA(ofs);
1446 UNALLOCA(info);
1447 UNALLOCA(invI);
1448 UNALLOCA(I);
1449 UNALLOCA(joint);
1450 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1451 return;
1453 #endif
1454 //dSetZero (tmp1,nb*8);
1455 // put v/h + invM*fe into tmp1
1456 for (i=0; i<nb; i++) {
1457 dReal body_invMass = body[i]->invMass;
1458 dReal *body_invI = invI + i*12;
1459 for (j=0; j<3; j++) tmp1[i*8+j] = body[i]->facc[j] * body_invMass +
1460 body[i]->lvel[j] * stepsize1;
1461 dMULTIPLY0_331 (tmp1 + i*8 + 4,body_invI,body[i]->tacc);
1462 for (j=0; j<3; j++) tmp1[i*8+4+j] += body[i]->avel[j] * stepsize1;
1464 // put J*tmp1 into rhs
1465 ALLOCA(dReal,rhs,m*sizeof(dReal));
1466 #ifdef dUSE_MALLOC_FOR_ALLOCA
1467 if (rhs == NULL) {
1468 UNALLOCA(tmp1);
1469 UNALLOCA(A);
1470 UNALLOCA(JinvM);
1471 UNALLOCA(J);
1472 UNALLOCA(findex);
1473 UNALLOCA(hi);
1474 UNALLOCA(lo);
1475 UNALLOCA(cfm);
1476 UNALLOCA(c);
1477 UNALLOCA(cforce);
1478 UNALLOCA(ofs);
1479 UNALLOCA(info);
1480 UNALLOCA(invI);
1481 UNALLOCA(I);
1482 UNALLOCA(joint);
1483 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1484 return;
1486 #endif
1487 //dSetZero (rhs,m);
1488 for (i=0; i<nj; i++) {
1489 dReal *JJ = J + 2*8*ofs[i];
1490 Multiply0_p81 (rhs+ofs[i],JJ,
1491 tmp1 + 8*joint[i]->node[0].body->tag, info[i].m);
1492 if (joint[i]->node[1].body) {
1493 MultiplyAdd0_p81 (rhs+ofs[i],JJ + 8*info[i].m,
1494 tmp1 + 8*joint[i]->node[1].body->tag, info[i].m);
1497 // complete rhs
1498 for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i];
1500 # ifdef COMPARE_METHODS
1501 comparator.nextMatrix (c,m,1,0,"c");
1502 comparator.nextMatrix (rhs,m,1,0,"rhs");
1503 # endif
1505 // solve the LCP problem and get lambda.
1506 // this will destroy A but that's okay
1507 # ifdef TIMING
1508 dTimerNow ("solving LCP problem");
1509 # endif
1510 ALLOCA(dReal,lambda,m*sizeof(dReal));
1511 #ifdef dUSE_MALLOC_FOR_ALLOCA
1512 if (lambda == NULL) {
1513 UNALLOCA(rhs);
1514 UNALLOCA(tmp1);
1515 UNALLOCA(A);
1516 UNALLOCA(JinvM);
1517 UNALLOCA(J);
1518 UNALLOCA(findex);
1519 UNALLOCA(hi);
1520 UNALLOCA(lo);
1521 UNALLOCA(cfm);
1522 UNALLOCA(c);
1523 UNALLOCA(cforce);
1524 UNALLOCA(ofs);
1525 UNALLOCA(info);
1526 UNALLOCA(invI);
1527 UNALLOCA(I);
1528 UNALLOCA(joint);
1529 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1530 return;
1532 #endif
1533 ALLOCA(dReal,residual,m*sizeof(dReal));
1534 #ifdef dUSE_MALLOC_FOR_ALLOCA
1535 if (residual == NULL) {
1536 UNALLOCA(lambda);
1537 UNALLOCA(rhs);
1538 UNALLOCA(tmp1);
1539 UNALLOCA(A);
1540 UNALLOCA(JinvM);
1541 UNALLOCA(J);
1542 UNALLOCA(findex);
1543 UNALLOCA(hi);
1544 UNALLOCA(lo);
1545 UNALLOCA(cfm);
1546 UNALLOCA(c);
1547 UNALLOCA(cforce);
1548 UNALLOCA(ofs);
1549 UNALLOCA(info);
1550 UNALLOCA(invI);
1551 UNALLOCA(I);
1552 UNALLOCA(joint);
1553 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1554 return;
1556 #endif
1557 dSolveLCP (m,A,lambda,rhs,residual,nub,lo,hi,findex);
1559 #ifdef dUSE_MALLOC_FOR_ALLOCA
1560 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY)
1561 return;
1562 #endif
1565 // OLD WAY - direct factor and solve
1567 // // factorize A (L*L'=A)
1568 //# ifdef TIMING
1569 // dTimerNow ("factorize A");
1570 //# endif
1571 // dReal *L = (dReal*) ALLOCA (m*mskip*sizeof(dReal));
1572 // memcpy (L,A,m*mskip*sizeof(dReal));
1573 //# ifdef FAST_FACTOR
1574 // dFastFactorCholesky (L,m); // does not report non positive definiteness
1575 //# else
1576 // if (dFactorCholesky (L,m)==0) dDebug (0,"A is not positive definite");
1577 //# endif
1579 // // compute lambda
1580 //# ifdef TIMING
1581 // dTimerNow ("compute lambda");
1582 //# endif
1583 // dReal *lambda = (dReal*) ALLOCA (m * sizeof(dReal));
1584 // memcpy (lambda,rhs,m * sizeof(dReal));
1585 // dSolveCholesky (L,lambda,m);
1587 # ifdef COMPARE_METHODS
1588 comparator.nextMatrix (lambda,m,1,0,"lambda");
1589 # endif
1591 // compute the constraint force `cforce'
1592 # ifdef TIMING
1593 dTimerNow ("compute constraint force");
1594 # endif
1595 // compute cforce = J'*lambda
1596 for (i=0; i<nj; i++) {
1597 dReal *JJ = J + 2*8*ofs[i];
1598 dxBody* b1 = joint[i]->node[0].body;
1599 dxBody* b2 = joint[i]->node[1].body;
1600 dJointFeedback *fb = joint[i]->feedback;
1602 if (fb) {
1603 // the user has requested feedback on the amount of force that this
1604 // joint is applying to the bodies. we use a slightly slower
1605 // computation that splits out the force components and puts them
1606 // in the feedback structure.
1607 dReal data[8];
1609 Multiply1_8q1 (data, JJ, lambda+ofs[i], info[i].m);
1610 dReal *cf1 = cforce + 8*b1->tag;
1611 cf1[0] += (fb->f1[0] = data[0]);
1612 cf1[1] += (fb->f1[1] = data[1]);
1613 cf1[2] += (fb->f1[2] = data[2]);
1614 cf1[4] += (fb->t1[0] = data[4]);
1615 cf1[5] += (fb->t1[1] = data[5]);
1616 cf1[6] += (fb->t1[2] = data[6]);
1617 if (b2){
1618 Multiply1_8q1 (data, JJ + 8*info[i].m, lambda+ofs[i], info[i].m);
1619 dReal *cf2 = cforce + 8*b2->tag;
1620 cf2[0] += (fb->f2[0] = data[0]);
1621 cf2[1] += (fb->f2[1] = data[1]);
1622 cf2[2] += (fb->f2[2] = data[2]);
1623 cf2[4] += (fb->t2[0] = data[4]);
1624 cf2[5] += (fb->t2[1] = data[5]);
1625 cf2[6] += (fb->t2[2] = data[6]);
1628 else {
1629 // no feedback is required, let's compute cforce the faster way
1630 MultiplyAdd1_8q1 (cforce + 8*b1->tag,JJ, lambda+ofs[i], info[i].m);
1631 if (b2) {
1632 MultiplyAdd1_8q1 (cforce + 8*b2->tag,
1633 JJ + 8*info[i].m, lambda+ofs[i], info[i].m);
1637 UNALLOCA(c);
1638 UNALLOCA(cfm);
1639 UNALLOCA(lo);
1640 UNALLOCA(hi);
1641 UNALLOCA(findex);
1642 UNALLOCA(J);
1643 UNALLOCA(JinvM);
1644 UNALLOCA(A);
1645 UNALLOCA(tmp1);
1646 UNALLOCA(rhs);
1647 UNALLOCA(lambda);
1648 UNALLOCA(residual);
1651 // compute the velocity update
1652 #ifdef TIMING
1653 dTimerNow ("compute velocity update");
1654 #endif
1656 // add fe to cforce
1657 for (i=0; i<nb; i++) {
1658 for (j=0; j<3; j++) cforce[i*8+j] += body[i]->facc[j];
1659 for (j=0; j<3; j++) cforce[i*8+4+j] += body[i]->tacc[j];
1661 // multiply cforce by stepsize
1662 for (i=0; i < nb*8; i++) cforce[i] *= stepsize;
1663 // add invM * cforce to the body velocity
1664 for (i=0; i<nb; i++) {
1665 dReal body_invMass = body[i]->invMass;
1666 dReal *body_invI = invI + i*12;
1667 for (j=0; j<3; j++) body[i]->lvel[j] += body_invMass * cforce[i*8+j];
1668 dMULTIPLYADD0_331 (body[i]->avel,body_invI,cforce+i*8+4);
1671 // update the position and orientation from the new linear/angular velocity
1672 // (over the given timestep)
1673 # ifdef TIMING
1674 dTimerNow ("update position");
1675 # endif
1676 for (i=0; i<nb; i++) dxStepBody (body[i],stepsize);
1678 #ifdef COMPARE_METHODS
1679 ALLOCA(dReal,tmp, ALLOCA (nb*6*sizeof(dReal));
1680 #ifdef dUSE_MALLOC_FOR_ALLOCA
1681 if (tmp == NULL) {
1682 UNALLOCA(cforce);
1683 UNALLOCA(ofs);
1684 UNALLOCA(info);
1685 UNALLOCA(invI);
1686 UNALLOCA(I);
1687 UNALLOCA(joint);
1688 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1689 return;
1691 #endif
1692 for (i=0; i<nb; i++) {
1693 for (j=0; j<3; j++) tmp_vnew[i*6+j] = body[i]->lvel[j];
1694 for (j=0; j<3; j++) tmp_vnew[i*6+3+j] = body[i]->avel[j];
1696 comparator.nextMatrix (tmp_vnew,nb*6,1,0,"vnew");
1697 UNALLOCA(tmp);
1698 #endif
1700 #ifdef TIMING
1701 dTimerNow ("tidy up");
1702 #endif
1704 // zero all force accumulators
1705 for (i=0; i<nb; i++) {
1706 body[i]->facc[0] = 0;
1707 body[i]->facc[1] = 0;
1708 body[i]->facc[2] = 0;
1709 body[i]->facc[3] = 0;
1710 body[i]->tacc[0] = 0;
1711 body[i]->tacc[1] = 0;
1712 body[i]->tacc[2] = 0;
1713 body[i]->tacc[3] = 0;
1716 #ifdef TIMING
1717 dTimerEnd();
1718 if (m > 0) dTimerReport (stdout,1);
1719 #endif
1721 UNALLOCA(joint);
1722 UNALLOCA(I);
1723 UNALLOCA(invI);
1724 UNALLOCA(info);
1725 UNALLOCA(ofs);
1726 UNALLOCA(cforce);
1729 //****************************************************************************
1731 void dInternalStepIsland (dxWorld *world, dxBody * const *body, int nb,
1732 dxJoint * const *joint, int nj, dReal stepsize)
1735 #ifdef dUSE_MALLOC_FOR_ALLOCA
1736 dMemoryFlag = d_MEMORY_OK;
1737 #endif
1739 #ifndef COMPARE_METHODS
1740 dInternalStepIsland_x2 (world,body,nb,joint,nj,stepsize);
1742 #ifdef dUSE_MALLOC_FOR_ALLOCA
1743 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1744 REPORT_OUT_OF_MEMORY;
1745 return;
1747 #endif
1749 #endif
1751 #ifdef COMPARE_METHODS
1752 int i;
1754 // save body state
1755 ALLOCA(dxBody,state,nb*sizeof(dxBody));
1756 #ifdef dUSE_MALLOC_FOR_ALLOCA
1757 if (state == NULL) {
1758 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1759 REPORT_OUT_OF_MEMORY;
1760 return;
1762 #endif
1763 for (i=0; i<nb; i++) memcpy (state+i,body[i],sizeof(dxBody));
1765 // take slow step
1766 comparator.reset();
1767 dInternalStepIsland_x1 (world,body,nb,joint,nj,stepsize);
1768 comparator.end();
1769 #ifdef dUSE_MALLOC_FOR_ALLOCA
1770 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1771 UNALLOCA(state);
1772 REPORT_OUT_OF_MEMORY;
1773 return;
1775 #endif
1777 // restore state
1778 for (i=0; i<nb; i++) memcpy (body[i],state+i,sizeof(dxBody));
1780 // take fast step
1781 dInternalStepIsland_x2 (world,body,nb,joint,nj,stepsize);
1782 comparator.end();
1783 #ifdef dUSE_MALLOC_FOR_ALLOCA
1784 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1785 UNALLOCA(state);
1786 REPORT_OUT_OF_MEMORY;
1787 return;
1789 #endif
1791 //comparator.dump();
1792 //_exit (1);
1793 UNALLOCA(state);
1794 #endif