ode/src/stepfast.cpp

   1 /*************************************************************************
   2  *                                                                       *
   3  * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
   4  * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
   5  *                                                                       *
   6  * Fast iterative solver, David Whittaker. Email: david@csworkbench.com  *
   7  *                                                                       *
   8  * This library is free software; you can redistribute it and/or         *
   9  * modify it under the terms of EITHER:                                  *
  10  *   (1) The GNU Lesser General Public License as published by the Free  *
  11  *       Software Foundation; either version 2.1 of the License, or (at  *
  12  *       your option) any later version. The text of the GNU Lesser      *
  13  *       General Public License is included with this library in the     *
  14  *       file LICENSE.TXT.                                               *
  15  *   (2) The BSD-style license that is included with this library in     *
  16  *       the file LICENSE-BSD.TXT.                                       *
  17  *                                                                       *
  18  * This library is distributed in the hope that it will be useful,       *
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
  21  * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
  22  *                                                                       *
  23  *************************************************************************/
  24
  25 // This is the StepFast code by David Whittaker. This code is faster, but
  26 // sometimes less stable than, the original "big matrix" code.
  27 // Refer to the user's manual for more information.
  28 // Note that this source file duplicates a lot of stuff from step.cpp,
  29 // eventually we should move the common code to a third file.
  30
  31 #include "objects.h"
  32 #include "joint.h"
  33 #include <ode/odeconfig.h>
  34 #include "config.h"
  35 #include <ode/objects.h>
  36 #include <ode/odemath.h>
  37 #include <ode/rotation.h>
  38 #include <ode/timer.h>
  39 #include <ode/error.h>
  40 #include <ode/matrix.h>
  41 #include <ode/misc.h>
  42 #include "lcp.h"
  43 #include "step.h"
  44 #include "util.h"
  45
  46
  47 // misc defines
  48
  49 #define ALLOCA dALLOCA16
  50
  51 #define RANDOM_JOINT_ORDER
  52 //#define FAST_FACTOR   //use a factorization approximation to the LCP solver (fast, theoretically less accurate)
  53 #define SLOW_LCP      //use the old LCP solver
  54 //#define NO_ISLANDS    //does not perform island creation code (3~4% of simulation time), body disabling doesn't work
  55 //#define TIMING
  56
  57
  58 static int autoEnableDepth = 2;
  59
  60 void dWorldSetAutoEnableDepthSF1 (dxWorld *world, int autodepth)
  61 {
  62         if (autodepth > 0)
  63                 autoEnableDepth = autodepth;
  64         else
  65                 autoEnableDepth = 0;
  66 }
  67
  68 int dWorldGetAutoEnableDepthSF1 (dxWorld *world)
  69 {
  70         return autoEnableDepth;
  71 }
  72
  73 //little bit of math.... the _sym_ functions assume the return matrix will be symmetric
  74 static void
  75 Multiply2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
  76 {
  77         int i, j;
  78         dReal sum, *aa, *ad, *bb, *cc;
  79         dIASSERT (p > 0 && A && B && C);
  80         bb = B;
  81         for (i = 0; i < p; i++)
  82         {
  83                 //aa is going accross the matrix, ad down
  84                 aa = ad = A;
  85                 cc = C;
  86                 for (j = i; j < p; j++)
  87                 {
  88                         sum = bb[0] * cc[0];
  89                         sum += bb[1] * cc[1];
  90                         sum += bb[2] * cc[2];
  91                         sum += bb[4] * cc[4];
  92                         sum += bb[5] * cc[5];
  93                         sum += bb[6] * cc[6];
  94                         *(aa++) = *ad = sum;
  95                         ad += Askip;
  96                         cc += 8;
  97                 }
  98                 bb += 8;
  99                 A += Askip + 1;
 100                 C += 8;
 101         }
 102 }
 103
 104 static void
 105 MultiplyAdd2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
 106 {
 107         int i, j;
 108         dReal sum, *aa, *ad, *bb, *cc;
 109         dIASSERT (p > 0 && A && B && C);
 110         bb = B;
 111         for (i = 0; i < p; i++)
 112         {
 113                 //aa is going accross the matrix, ad down
 114                 aa = ad = A;
 115                 cc = C;
 116                 for (j = i; j < p; j++)
 117                 {
 118                         sum = bb[0] * cc[0];
 119                         sum += bb[1] * cc[1];
 120                         sum += bb[2] * cc[2];
 121                         sum += bb[4] * cc[4];
 122                         sum += bb[5] * cc[5];
 123                         sum += bb[6] * cc[6];
 124                         *(aa++) += sum;
 125                         *ad += sum;
 126                         ad += Askip;
 127                         cc += 8;
 128                 }
 129                 bb += 8;
 130                 A += Askip + 1;
 131                 C += 8;
 132         }
 133 }
 134
 135
 136 // this assumes the 4th and 8th rows of B are zero.
 137
 138 static void
 139 Multiply0_p81 (dReal * A, dReal * B, dReal * C, int p)
 140 {
 141         int i;
 142         dIASSERT (p > 0 && A && B && C);
 143         dReal sum;
 144         for (i = p; i; i--)
 145         {
 146                 sum = B[0] * C[0];
 147                 sum += B[1] * C[1];
 148                 sum += B[2] * C[2];
 149                 sum += B[4] * C[4];
 150                 sum += B[5] * C[5];
 151                 sum += B[6] * C[6];
 152                 *(A++) = sum;
 153                 B += 8;
 154         }
 155 }
 156
 157
 158 // this assumes the 4th and 8th rows of B are zero.
 159
 160 static void
 161 MultiplyAdd0_p81 (dReal * A, dReal * B, dReal * C, int p)
 162 {
 163         int i;
 164         dIASSERT (p > 0 && A && B && C);
 165         dReal sum;
 166         for (i = p; i; i--)
 167         {
 168                 sum = B[0] * C[0];
 169                 sum += B[1] * C[1];
 170                 sum += B[2] * C[2];
 171                 sum += B[4] * C[4];
 172                 sum += B[5] * C[5];
 173                 sum += B[6] * C[6];
 174                 *(A++) += sum;
 175                 B += 8;
 176         }
 177 }
 178
 179
 180 // this assumes the 4th and 8th rows of B are zero.
 181
 182 static void
 183 Multiply1_8q1 (dReal * A, dReal * B, dReal * C, int q)
 184 {
 185         int k;
 186         dReal sum;
 187         dIASSERT (q > 0 && A && B && C);
 188         sum = 0;
 189         for (k = 0; k < q; k++)
 190                 sum += B[k * 8] * C[k];
 191         A[0] = sum;
 192         sum = 0;
 193         for (k = 0; k < q; k++)
 194                 sum += B[1 + k * 8] * C[k];
 195         A[1] = sum;
 196         sum = 0;
 197         for (k = 0; k < q; k++)
 198                 sum += B[2 + k * 8] * C[k];
 199         A[2] = sum;
 200         sum = 0;
 201         for (k = 0; k < q; k++)
 202                 sum += B[4 + k * 8] * C[k];
 203         A[4] = sum;
 204         sum = 0;
 205         for (k = 0; k < q; k++)
 206                 sum += B[5 + k * 8] * C[k];
 207         A[5] = sum;
 208         sum = 0;
 209         for (k = 0; k < q; k++)
 210                 sum += B[6 + k * 8] * C[k];
 211         A[6] = sum;
 212 }
 213
 214 //****************************************************************************
 215 // body rotation
 216
 217 // return sin(x)/x. this has a singularity at 0 so special handling is needed
 218 // for small arguments.
 219
 220 static inline dReal
 221 sinc (dReal x)
 222 {
 223         // if |x| < 1e-4 then use a taylor series expansion. this two term expansion
 224         // is actually accurate to one LS bit within this range if double precision
 225         // is being used - so don't worry!
 226         if (dFabs (x) < 1.0e-4)
 227                 return REAL (1.0) - x * x * REAL (0.166666666666666666667);
 228         else
 229                 return dSin (x) / x;
 230 }
 231
 232 #if 0 // this is just dxStepBody()
 233 // given a body b, apply its linear and angular rotation over the time
 234 // interval h, thereby adjusting its position and orientation.
 235
 236 static inline void
 237 moveAndRotateBody (dxBody * b, dReal h)
 238 {
 239         int j;
 240
 241         // handle linear velocity
 242         for (j = 0; j < 3; j++)
 243                 b->posr.pos[j] += h * b->lvel[j];
 244
 245         if (b->flags & dxBodyFlagFiniteRotation)
 246         {
 247                 dVector3 irv;                   // infitesimal rotation vector
 248                 dQuaternion q;                  // quaternion for finite rotation
 249
 250                 if (b->flags & dxBodyFlagFiniteRotationAxis)
 251                 {
 252                         // split the angular velocity vector into a component along the finite
 253                         // rotation axis, and a component orthogonal to it.
 254                         dVector3 frv, irv;      // finite rotation vector
 255                         dReal k = dDOT (b->finite_rot_axis, b->avel);
 256                         frv[0] = b->finite_rot_axis[0] * k;
 257                         frv[1] = b->finite_rot_axis[1] * k;
 258                         frv[2] = b->finite_rot_axis[2] * k;
 259                         irv[0] = b->avel[0] - frv[0];
 260                         irv[1] = b->avel[1] - frv[1];
 261                         irv[2] = b->avel[2] - frv[2];
 262
 263                         // make a rotation quaternion q that corresponds to frv * h.
 264                         // compare this with the full-finite-rotation case below.
 265                         h *= REAL (0.5);
 266                         dReal theta = k * h;
 267                         q[0] = dCos (theta);
 268                         dReal s = sinc (theta) * h;
 269                         q[1] = frv[0] * s;
 270                         q[2] = frv[1] * s;
 271                         q[3] = frv[2] * s;
 272                 }
 273                 else
 274                 {
 275                         // make a rotation quaternion q that corresponds to w * h
 276                         dReal wlen = dSqrt (b->avel[0] * b->avel[0] + b->avel[1] * b->avel[1] + b->avel[2] * b->avel[2]);
 277                         h *= REAL (0.5);
 278                         dReal theta = wlen * h;
 279                         q[0] = dCos (theta);
 280                         dReal s = sinc (theta) * h;
 281                         q[1] = b->avel[0] * s;
 282                         q[2] = b->avel[1] * s;
 283                         q[3] = b->avel[2] * s;
 284                 }
 285
 286                 // do the finite rotation
 287                 dQuaternion q2;
 288                 dQMultiply0 (q2, q, b->q);
 289                 for (j = 0; j < 4; j++)
 290                         b->q[j] = q2[j];
 291
 292                 // do the infitesimal rotation if required
 293                 if (b->flags & dxBodyFlagFiniteRotationAxis)
 294                 {
 295                         dReal dq[4];
 296                         dWtoDQ (irv, b->q, dq);
 297                         for (j = 0; j < 4; j++)
 298                                 b->q[j] += h * dq[j];
 299                 }
 300         }
 301         else
 302         {
 303                 // the normal way - do an infitesimal rotation
 304                 dReal dq[4];
 305                 dWtoDQ (b->avel, b->q, dq);
 306                 for (j = 0; j < 4; j++)
 307                         b->q[j] += h * dq[j];
 308         }
 309
 310         // normalize the quaternion and convert it to a rotation matrix
 311         dNormalize4 (b->q);
 312         dQtoR (b->q, b->posr.R);
 313
 314         // notify all attached geoms that this body has moved
 315         for (dxGeom * geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
 316                 dGeomMoved (geom);
 317 }
 318 #endif
 319
 320 //****************************************************************************
 321 //This is an implementation of the iterated/relaxation algorithm.
 322 //Here is a quick overview of the algorithm per Sergi Valverde's posts to the
 323 //mailing list:
 324 //
 325 //  for i=0..N-1 do
 326 //      for c = 0..C-1 do
 327 //          Solve constraint c-th
 328 //          Apply forces to constraint bodies
 329 //      next
 330 //  next
 331 //  Integrate bodies
 332
 333 void
 334 dInternalStepFast (dxWorld * world, dxBody * body[2], dReal * GI[2], dReal * GinvI[2], dxJoint * joint, dxJoint::Info1 info, dxJoint::Info2 Jinfo, dReal stepsize)
 335 {
 336         int i, j, k;
 337 # ifdef TIMING
 338         dTimerNow ("constraint preprocessing");
 339 # endif
 340
 341         dReal stepsize1 = dRecip (stepsize);
 342
 343         int m = info.m;
 344         // nothing to do if no constraints.
 345         if (m <= 0)
 346                 return;
 347
 348         int nub = 0;
 349         if (info.nub == info.m)
 350                 nub = m;
 351
 352         // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
 353         // format as J so we just go through the constraints in J multiplying by
 354         // the appropriate scalars and matrices.
 355 #   ifdef TIMING
 356         dTimerNow ("compute A");
 357 #   endif
 358         dReal JinvM[2 * 6 * 8];
 359         //dSetZero (JinvM, 2 * m * 8);
 360
 361         dReal *Jsrc = Jinfo.J1l;
 362         dReal *Jdst = JinvM;
 363         if (body[0])
 364         {
 365                 for (j = m - 1; j >= 0; j--)
 366                 {
 367                         for (k = 0; k < 3; k++)
 368                                 Jdst[k] = Jsrc[k] * body[0]->invMass;
 369                         dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[0]);
 370                         Jsrc += 8;
 371                         Jdst += 8;
 372                 }
 373         }
 374         if (body[1])
 375         {
 376                 Jsrc = Jinfo.J2l;
 377                 Jdst = JinvM + 8 * m;
 378                 for (j = m - 1; j >= 0; j--)
 379                 {
 380                         for (k = 0; k < 3; k++)
 381                                 Jdst[k] = Jsrc[k] * body[1]->invMass;
 382                         dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[1]);
 383                         Jsrc += 8;
 384                         Jdst += 8;
 385                 }
 386         }
 387
 388
 389         // now compute A = JinvM * J'.
 390         int mskip = dPAD (m);
 391         dReal A[6 * 8];
 392         //dSetZero (A, 6 * 8);
 393
 394         if (body[0]) {
 395                 Multiply2_sym_p8p (A, JinvM, Jinfo.J1l, m, mskip);
 396                 if (body[1])
 397                         MultiplyAdd2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
 398                                               m, mskip);
 399         } else {
 400                 if (body[1])
 401                         Multiply2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
 402                                            m, mskip);
 403         }
 404
 405         // add cfm to the diagonal of A
 406         for (i = 0; i < m; i++)
 407                 A[i * mskip + i] += Jinfo.cfm[i] * stepsize1;
 408
 409         // compute the right hand side `rhs'
 410 #   ifdef TIMING
 411         dTimerNow ("compute rhs");
 412 #   endif
 413         dReal tmp1[16];
 414         //dSetZero (tmp1, 16);
 415         // put v/h + invM*fe into tmp1
 416         for (i = 0; i < 2; i++)
 417         {
 418                 if (!body[i])
 419                         continue;
 420                 for (j = 0; j < 3; j++)
 421                         tmp1[i * 8 + j] = body[i]->facc[j] * body[i]->invMass + body[i]->lvel[j] * stepsize1;
 422                 dMULTIPLY0_331 (tmp1 + i * 8 + 4, GinvI[i], body[i]->tacc);
 423                 for (j = 0; j < 3; j++)
 424                         tmp1[i * 8 + 4 + j] += body[i]->avel[j] * stepsize1;
 425         }
 426         // put J*tmp1 into rhs
 427         dReal rhs[6];
 428         //dSetZero (rhs, 6);
 429
 430         if (body[0]) {
 431                 Multiply0_p81 (rhs, Jinfo.J1l, tmp1, m);
 432                 if (body[1])
 433                         MultiplyAdd0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
 434         } else {
 435                 if (body[1])
 436                         Multiply0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
 437         }
 438
 439         // complete rhs
 440         for (i = 0; i < m; i++)
 441                 rhs[i] = Jinfo.c[i] * stepsize1 - rhs[i];
 442
 443 #ifdef SLOW_LCP
 444         // solve the LCP problem and get lambda.
 445         // this will destroy A but that's okay
 446 #       ifdef TIMING
 447         dTimerNow ("solving LCP problem");
 448 #       endif
 449         dReal *lambda = (dReal *) ALLOCA (m * sizeof (dReal));
 450         dReal *residual = (dReal *) ALLOCA (m * sizeof (dReal));
 451         dReal lo[6], hi[6];
 452         memcpy (lo, Jinfo.lo, m * sizeof (dReal));
 453         memcpy (hi, Jinfo.hi, m * sizeof (dReal));
 454         dSolveLCP (m, A, lambda, rhs, residual, nub, lo, hi, Jinfo.findex);
 455 #endif
 456
 457         // LCP Solver replacement:
 458         // This algorithm goes like this:
 459         // Do a straightforward LDLT factorization of the matrix A, solving for
 460         // A*x = rhs
 461         // For each x[i] that is outside of the bounds of lo[i] and hi[i],
 462         //    clamp x[i] into that range.
 463         //    Substitute into A the now known x's
 464         //    subtract the residual away from the rhs.
 465         //    Remove row and column i from L, updating the factorization
 466         //    place the known x's at the end of the array, keeping up with location in p
 467         // Repeat until all constraints have been clamped or all are within bounds
 468         //
 469         // This is probably only faster in the single joint case where only one repeat is
 470         // the norm.
 471
 472 #ifdef FAST_FACTOR
 473         // factorize A (L*D*L'=A)
 474 #       ifdef TIMING
 475         dTimerNow ("factorize A");
 476 #       endif
 477         dReal d[6];
 478         dReal L[6 * 8];
 479         memcpy (L, A, m * mskip * sizeof (dReal));
 480         dFactorLDLT (L, d, m, mskip);
 481
 482         // compute lambda
 483 #       ifdef TIMING
 484         dTimerNow ("compute lambda");
 485 #       endif
 486
 487         int left = m;                           //constraints left to solve.
 488         int remove[6];
 489         dReal lambda[6];
 490         dReal x[6];
 491         int p[6];
 492         for (i = 0; i < 6; i++)
 493                 p[i] = i;
 494         while (true)
 495         {
 496                 memcpy (x, rhs, left * sizeof (dReal));
 497                 dSolveLDLT (L, d, x, left, mskip);
 498
 499                 int fixed = 0;
 500                 for (i = 0; i < left; i++)
 501                 {
 502                         j = p[i];
 503                         remove[i] = false;
 504                         // This isn't the exact same use of findex as dSolveLCP.... since x[findex]
 505                         // may change after I've already clamped x[i], but it should be close
 506                         if (Jinfo.findex[j] > -1)
 507                         {
 508                                 dReal f = fabs (Jinfo.hi[j] * x[p[Jinfo.findex[j]]]);
 509                                 if (x[i] > f)
 510                                         x[i] = f;
 511                                 else if (x[i] < -f)
 512                                         x[i] = -f;
 513                                 else
 514                                         continue;
 515                         }
 516                         else
 517                         {
 518                                 if (x[i] > Jinfo.hi[j])
 519                                         x[i] = Jinfo.hi[j];
 520                                 else if (x[i] < Jinfo.lo[j])
 521                                         x[i] = Jinfo.lo[j];
 522                                 else
 523                                         continue;
 524                         }
 525                         remove[i] = true;
 526                         fixed++;
 527                 }
 528                 if (fixed == 0 || fixed == left)        //no change or all constraints solved
 529                         break;
 530
 531                 for (i = 0; i < left; i++)      //sub in to right hand side.
 532                         if (remove[i])
 533                                 for (j = 0; j < left; j++)
 534                                         if (!remove[j])
 535                                                 rhs[j] -= A[j * mskip + i] * x[i];
 536
 537                 for (int r = left - 1; r >= 0; r--)     //eliminate row/col for fixed variables
 538                 {
 539                         if (remove[r])
 540                         {
 541                                 //dRemoveLDLT adapted for use without row pointers.
 542                                 if (r == left - 1)
 543                                 {
 544                                         left--;
 545                                         continue;       // deleting last row/col is easy
 546                                 }
 547                                 else if (r == 0)
 548                                 {
 549                                         dReal a[6];
 550                                         for (i = 0; i < left; i++)
 551                                                 a[i] = -A[i * mskip];
 552                                         a[0] += REAL (1.0);
 553                                         dLDLTAddTL (L, d, a, left, mskip);
 554                                 }
 555                                 else
 556                                 {
 557                                         dReal t[6];
 558                                         dReal a[6];
 559                                         for (i = 0; i < r; i++)
 560                                                 t[i] = L[r * mskip + i] / d[i];
 561                                         for (i = 0; i < left - r; i++)
 562                                                 a[i] = dDot (L + (r + i) * mskip, t, r) - A[(r + i) * mskip + r];
 563                                         a[0] += REAL (1.0);
 564                                         dLDLTAddTL (L + r * mskip + r, d + r, a, left - r, mskip);
 565                                 }
 566
 567                                 dRemoveRowCol (L, left, mskip, r);
 568                                 //end dRemoveLDLT
 569
 570                                 left--;
 571                                 if (r < (left - 1))
 572                                 {
 573                                         dReal tx = x[r];
 574                                         memmove (d + r, d + r + 1, (left - r) * sizeof (dReal));
 575                                         memmove (rhs + r, rhs + r + 1, (left - r) * sizeof (dReal));
 576                                         //x will get written over by rhs anyway, no need to move it around
 577                                         //just store the fixed value we just discovered in it.
 578                                         x[left] = tx;
 579                                         for (i = 0; i < m; i++)
 580                                                 if (p[i] > r && p[i] <= left)
 581                                                         p[i]--;
 582                                         p[r] = left;
 583                                 }
 584                         }
 585                 }
 586         }
 587
 588         for (i = 0; i < m; i++)
 589                 lambda[i] = x[p[i]];
 590 #       endif
 591         // compute the constraint force `cforce'
 592 #       ifdef TIMING
 593         dTimerNow ("compute constraint force");
 594 #endif
 595
 596         // compute cforce = J'*lambda
 597         dJointFeedback *fb = joint->feedback;
 598         dReal cforce[16];
 599         //dSetZero (cforce, 16);
 600
 601         if (fb)
 602         {
 603                 // the user has requested feedback on the amount of force that this
 604                 // joint is applying to the bodies. we use a slightly slower
 605                 // computation that splits out the force components and puts them
 606                 // in the feedback structure.
 607                 dReal data1[8], data2[8];
 608                 if (body[0])
 609                 {
 610                         Multiply1_8q1 (data1, Jinfo.J1l, lambda, m);
 611                         dReal *cf1 = cforce;
 612                         cf1[0] = (fb->f1[0] = data1[0]);
 613                         cf1[1] = (fb->f1[1] = data1[1]);
 614                         cf1[2] = (fb->f1[2] = data1[2]);
 615                         cf1[4] = (fb->t1[0] = data1[4]);
 616                         cf1[5] = (fb->t1[1] = data1[5]);
 617                         cf1[6] = (fb->t1[2] = data1[6]);
 618                 }
 619                 if (body[1])
 620                 {
 621                         Multiply1_8q1 (data2, Jinfo.J2l, lambda, m);
 622                         dReal *cf2 = cforce + 8;
 623                         cf2[0] = (fb->f2[0] = data2[0]);
 624                         cf2[1] = (fb->f2[1] = data2[1]);
 625                         cf2[2] = (fb->f2[2] = data2[2]);
 626                         cf2[4] = (fb->t2[0] = data2[4]);
 627                         cf2[5] = (fb->t2[1] = data2[5]);
 628                         cf2[6] = (fb->t2[2] = data2[6]);
 629                 }
 630         }
 631         else
 632         {
 633                 // no feedback is required, let's compute cforce the faster way
 634                 if (body[0])
 635                         Multiply1_8q1 (cforce, Jinfo.J1l, lambda, m);
 636                 if (body[1])
 637                         Multiply1_8q1 (cforce + 8, Jinfo.J2l, lambda, m);
 638         }
 639
 640         for (i = 0; i < 2; i++)
 641         {
 642                 if (!body[i])
 643                         continue;
 644                 for (j = 0; j < 3; j++)
 645                 {
 646                         body[i]->facc[j] += cforce[i * 8 + j];
 647                         body[i]->tacc[j] += cforce[i * 8 + 4 + j];
 648                 }
 649         }
 650 }
 651
 652 void
 653 dInternalStepIslandFast (dxWorld * world, dxBody * const *bodies, int nb, dxJoint * const *_joints, int nj, dReal stepsize, int maxiterations)
 654 {
 655 #   ifdef TIMING
 656         dTimerNow ("preprocessing");
 657 #   endif
 658         dxBody *bodyPair[2], *body;
 659         dReal *GIPair[2], *GinvIPair[2];
 660         dxJoint *joint;
 661         int iter, b, j, i;
 662         dReal ministep = stepsize / maxiterations;
 663
 664         // make a local copy of the joint array, because we might want to modify it.
 665         // (the "dxJoint *const*" declaration says we're allowed to modify the joints
 666         // but not the joint array, because the caller might need it unchanged).
 667         dxJoint **joints = (dxJoint **) ALLOCA (nj * sizeof (dxJoint *));
 668         memcpy (joints, _joints, nj * sizeof (dxJoint *));
 669
 670         // get m = total constraint dimension, nub = number of unbounded variables.
 671         // create constraint offset array and number-of-rows array for all joints.
 672         // the constraints are re-ordered as follows: the purely unbounded
 673         // constraints, the mixed unbounded + LCP constraints, and last the purely
 674         // LCP constraints. this assists the LCP solver to put all unbounded
 675         // variables at the start for a quick factorization.
 676         //
 677         // joints with m=0 are inactive and are removed from the joints array
 678         // entirely, so that the code that follows does not consider them.
 679         // also number all active joints in the joint list (set their tag values).
 680         // inactive joints receive a tag value of -1.
 681
 682         int m = 0;
 683         dxJoint::Info1 * info = (dxJoint::Info1 *) ALLOCA (nj * sizeof (dxJoint::Info1));
 684         int *ofs = (int *) ALLOCA (nj * sizeof (int));
 685         for (i = 0, j = 0; j < nj; j++)
 686         {       // i=dest, j=src
 687                 joints[j]->vtable->getInfo1 (joints[j], info + i);
 688                 dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
 689                 if (info[i].m > 0)
 690                 {
 691                         joints[i] = joints[j];
 692                         joints[i]->tag = i;
 693                         i++;
 694                 }
 695                 else
 696                 {
 697                         joints[j]->tag = -1;
 698                 }
 699         }
 700         nj = i;
 701
 702         // the purely unbounded constraints
 703         for (i = 0; i < nj; i++)
 704         {
 705                 ofs[i] = m;
 706                 m += info[i].m;
 707         }
 708         dReal *c = NULL;
 709         dReal *cfm = NULL;
 710         dReal *lo = NULL;
 711         dReal *hi = NULL;
 712         int *findex = NULL;
 713
 714         dReal *J = NULL;
 715         dxJoint::Info2 * Jinfo = NULL;
 716
 717         if (m)
 718         {
 719         // create a constraint equation right hand side vector `c', a constraint
 720         // force mixing vector `cfm', and LCP low and high bound vectors, and an
 721         // 'findex' vector.
 722                 c = (dReal *) ALLOCA (m * sizeof (dReal));
 723                 cfm = (dReal *) ALLOCA (m * sizeof (dReal));
 724                 lo = (dReal *) ALLOCA (m * sizeof (dReal));
 725                 hi = (dReal *) ALLOCA (m * sizeof (dReal));
 726                 findex = (int *) ALLOCA (m * sizeof (int));
 727         dSetZero (c, m);
 728         dSetValue (cfm, m, world->global_cfm);
 729         dSetValue (lo, m, -dInfinity);
 730         dSetValue (hi, m, dInfinity);
 731         for (i = 0; i < m; i++)
 732                 findex[i] = -1;
 733
 734         // get jacobian data from constraints. a (2*m)x8 matrix will be created
 735         // to store the two jacobian blocks from each constraint. it has this
 736         // format:
 737         //
 738         //   l l l 0 a a a 0  \    .
 739         //   l l l 0 a a a 0   }-- jacobian body 1 block for joint 0 (3 rows)
 740         //   l l l 0 a a a 0  /
 741         //   l l l 0 a a a 0  \    .
 742         //   l l l 0 a a a 0   }-- jacobian body 2 block for joint 0 (3 rows)
 743         //   l l l 0 a a a 0  /
 744         //   l l l 0 a a a 0  }--- jacobian body 1 block for joint 1 (1 row)
 745         //   l l l 0 a a a 0  }--- jacobian body 2 block for joint 1 (1 row)
 746         //   etc...
 747         //
 748         //   (lll) = linear jacobian data
 749         //   (aaa) = angular jacobian data
 750         //
 751 #   ifdef TIMING
 752         dTimerNow ("create J");
 753 #   endif
 754                 J = (dReal *) ALLOCA (2 * m * 8 * sizeof (dReal));
 755                 dSetZero (J, 2 * m * 8);
 756                 Jinfo = (dxJoint::Info2 *) ALLOCA (nj * sizeof (dxJoint::Info2));
 757         for (i = 0; i < nj; i++)
 758         {
 759                 Jinfo[i].rowskip = 8;
 760                 Jinfo[i].fps = dRecip (stepsize);
 761                 Jinfo[i].erp = world->global_erp;
 762                 Jinfo[i].J1l = J + 2 * 8 * ofs[i];
 763                 Jinfo[i].J1a = Jinfo[i].J1l + 4;
 764                 Jinfo[i].J2l = Jinfo[i].J1l + 8 * info[i].m;
 765                 Jinfo[i].J2a = Jinfo[i].J2l + 4;
 766                 Jinfo[i].c = c + ofs[i];
 767                 Jinfo[i].cfm = cfm + ofs[i];
 768                 Jinfo[i].lo = lo + ofs[i];
 769                 Jinfo[i].hi = hi + ofs[i];
 770                 Jinfo[i].findex = findex + ofs[i];
 771                 //joints[i]->vtable->getInfo2 (joints[i], Jinfo+i);
 772         }
 773
 774         }
 775
 776         dReal *saveFacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
 777         dReal *saveTacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
 778         dReal *globalI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
 779         dReal *globalInvI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
 780         for (b = 0; b < nb; b++)
 781         {
 782                 for (i = 0; i < 4; i++)
 783                 {
 784                         saveFacc[b * 4 + i] = bodies[b]->facc[i];
 785                         saveTacc[b * 4 + i] = bodies[b]->tacc[i];
 786                 }
 787                 bodies[b]->tag = b;
 788         }
 789
 790         for (iter = 0; iter < maxiterations; iter++)
 791         {
 792 #       ifdef TIMING
 793                 dTimerNow ("applying inertia and gravity");
 794 #       endif
 795                 dReal tmp[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 796
 797                 for (b = 0; b < nb; b++)
 798                 {
 799                         body = bodies[b];
 800
 801                         // for all bodies, compute the inertia tensor and its inverse in the global
 802                         // frame, and compute the rotational force and add it to the torque
 803                         // accumulator. I and invI are vertically stacked 3x4 matrices, one per body.
 804                         // @@@ check computation of rotational force.
 805
 806                         // compute inertia tensor in global frame
 807                         dMULTIPLY2_333 (tmp, body->mass.I, body->posr.R);
 808                         dMULTIPLY0_333 (globalI + b * 12, body->posr.R, tmp);
 809                         // compute inverse inertia tensor in global frame
 810                         dMULTIPLY2_333 (tmp, body->invI, body->posr.R);
 811                         dMULTIPLY0_333 (globalInvI + b * 12, body->posr.R, tmp);
 812
 813                         for (i = 0; i < 4; i++)
 814                                 body->tacc[i] = saveTacc[b * 4 + i];
 815 #ifdef dGYROSCOPIC
 816                         // compute rotational force
 817                         dMULTIPLY0_331 (tmp, globalI + b * 12, body->avel);
 818                         dCROSS (body->tacc, -=, body->avel, tmp);
 819 #endif
 820
 821                         // add the gravity force to all bodies
 822                         if ((body->flags & dxBodyNoGravity) == 0)
 823                         {
 824                                 body->facc[0] = saveFacc[b * 4 + 0] + body->mass.mass * world->gravity[0];
 825                                 body->facc[1] = saveFacc[b * 4 + 1] + body->mass.mass * world->gravity[1];
 826                                 body->facc[2] = saveFacc[b * 4 + 2] + body->mass.mass * world->gravity[2];
 827                                 body->facc[3] = 0;
 828                         } else {
 829                                 body->facc[0] = saveFacc[b * 4 + 0];
 830                                 body->facc[1] = saveFacc[b * 4 + 1];
 831                                 body->facc[2] = saveFacc[b * 4 + 2];
 832                                 body->facc[3] = 0;
 833                         }
 834
 835                 }
 836
 837 #ifdef RANDOM_JOINT_ORDER
 838 #ifdef TIMING
 839                 dTimerNow ("randomizing joint order");
 840 #endif
 841                 //randomize the order of the joints by looping through the array
 842                 //and swapping the current joint pointer with a random one before it.
 843                 for (j = 0; j < nj; j++)
 844                 {
 845                         joint = joints[j];
 846                         dxJoint::Info1 i1 = info[j];
 847                         dxJoint::Info2 i2 = Jinfo[j];
 848                         const int r = dRandInt(j+1);
 849                         dIASSERT (r < nj);
 850                         joints[j] = joints[r];
 851                         info[j] = info[r];
 852                         Jinfo[j] = Jinfo[r];
 853                         joints[r] = joint;
 854                         info[r] = i1;
 855                         Jinfo[r] = i2;
 856                 }
 857 #endif
 858
 859                 //now iterate through the random ordered joint array we created.
 860                 for (j = 0; j < nj; j++)
 861                 {
 862 #ifdef TIMING
 863                         dTimerNow ("setting up joint");
 864 #endif
 865                         joint = joints[j];
 866                         bodyPair[0] = joint->node[0].body;
 867                         bodyPair[1] = joint->node[1].body;
 868
 869                         if (bodyPair[0] && (bodyPair[0]->flags & dxBodyDisabled))
 870                                 bodyPair[0] = 0;
 871                         if (bodyPair[1] && (bodyPair[1]->flags & dxBodyDisabled))
 872                                 bodyPair[1] = 0;
 873
 874                         //if this joint is not connected to any enabled bodies, skip it.
 875                         if (!bodyPair[0] && !bodyPair[1])
 876                                 continue;
 877
 878                         if (bodyPair[0])
 879                         {
 880                                 GIPair[0] = globalI + bodyPair[0]->tag * 12;
 881                                 GinvIPair[0] = globalInvI + bodyPair[0]->tag * 12;
 882                         }
 883                         if (bodyPair[1])
 884                         {
 885                                 GIPair[1] = globalI + bodyPair[1]->tag * 12;
 886                                 GinvIPair[1] = globalInvI + bodyPair[1]->tag * 12;
 887                         }
 888
 889                         joints[j]->vtable->getInfo2 (joints[j], Jinfo + j);
 890
 891                         //dInternalStepIslandFast is an exact copy of the old routine with one
 892                         //modification: the calculated forces are added back to the facc and tacc
 893                         //vectors instead of applying them to the bodies and moving them.
 894                         if (info[j].m > 0)
 895                         {
 896                         dInternalStepFast (world, bodyPair, GIPair, GinvIPair, joint, info[j], Jinfo[j], ministep);
 897                         }
 898                 }
 899                 //  }
 900 #       ifdef TIMING
 901                 dTimerNow ("moving bodies");
 902 #       endif
 903                 //Now we can simulate all the free floating bodies, and move them.
 904                 for (b = 0; b < nb; b++)
 905                 {
 906                         body = bodies[b];
 907
 908                         for (i = 0; i < 4; i++)
 909                         {
 910                                 body->facc[i] *= ministep;
 911                                 body->tacc[i] *= ministep;
 912                         }
 913
 914                         //apply torque
 915                         dMULTIPLYADD0_331 (body->avel, globalInvI + b * 12, body->tacc);
 916
 917                         //apply force
 918                         for (i = 0; i < 3; i++)
 919                                 body->lvel[i] += body->invMass * body->facc[i];
 920
 921                         //move It!
 922                         dxStepBody (body, ministep);
 923                 }
 924         }
 925         for (b = 0; b < nb; b++)
 926                 for (j = 0; j < 4; j++)
 927                         bodies[b]->facc[j] = bodies[b]->tacc[j] = 0;
 928 }
 929
 930
 931 #ifdef NO_ISLANDS
 932
 933 // Since the iterative algorithm doesn't care about islands of bodies, this is a
 934 // faster algorithm that just sends it all the joints and bodies in one array.
 935 // It's downfall is it's inability to handle disabled bodies as well as the old one.
 936 static void
 937 processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
 938 {
 939         // nothing to do if no bodies
 940         if (world->nb <= 0)
 941                 return;
 942
 943         dInternalHandleAutoDisabling (world,stepsize);
 944
 945 #       ifdef TIMING
 946         dTimerStart ("creating joint and body arrays");
 947 #       endif
 948         dxBody **bodies, *body;
 949         dxJoint **joints, *joint;
 950         joints = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
 951         bodies = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
 952
 953         int nj = 0;
 954         for (joint = world->firstjoint; joint; joint = (dxJoint *) joint->next)
 955                 joints[nj++] = joint;
 956
 957         int nb = 0;
 958         for (body = world->firstbody; body; body = (dxBody *) body->next)
 959                 bodies[nb++] = body;
 960
 961         dInternalStepIslandFast (world, bodies, nb, joints, nj, stepsize, maxiterations);
 962 #       ifdef TIMING
 963         dTimerEnd ();
 964         dTimerReport (stdout, 1);
 965 #       endif
 966 }
 967
 968 #else
 969
 970 //****************************************************************************
 971 // island processing
 972
 973 // this groups all joints and bodies in a world into islands. all objects
 974 // in an island are reachable by going through connected bodies and joints.
 975 // each island can be simulated separately.
 976 // note that joints that are not attached to anything will not be included
 977 // in any island, an so they do not affect the simulation.
 978 //
 979 // this function starts new island from unvisited bodies. however, it will
 980 // never start a new islands from a disabled body. thus islands of disabled
 981 // bodies will not be included in the simulation. disabled bodies are
 982 // re-enabled if they are found to be part of an active island.
 983
 984 static void
 985 processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
 986 {
 987 #ifdef TIMING
 988         dTimerStart ("Island Setup");
 989 #endif
 990         dxBody *b, *bb, **body;
 991         dxJoint *j, **joint;
 992
 993         // nothing to do if no bodies
 994         if (world->nb <= 0)
 995                 return;
 996
 997         dInternalHandleAutoDisabling (world,stepsize);
 998
 999         // make arrays for body and joint lists (for a single island) to go into
1000         body = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
1001         joint = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
1002         int bcount = 0;                         // number of bodies in `body'
1003         int jcount = 0;                         // number of joints in `joint'
1004         int tbcount = 0;
1005         int tjcount = 0;
1006
1007         // set all body/joint tags to 0
1008         for (b = world->firstbody; b; b = (dxBody *) b->next)
1009                 b->tag = 0;
1010         for (j = world->firstjoint; j; j = (dxJoint *) j->next)
1011                 j->tag = 0;
1012
1013         // allocate a stack of unvisited bodies in the island. the maximum size of
1014         // the stack can be the lesser of the number of bodies or joints, because
1015         // new bodies are only ever added to the stack by going through untagged
1016         // joints. all the bodies in the stack must be tagged!
1017         int stackalloc = (world->nj < world->nb) ? world->nj : world->nb;
1018         dxBody **stack = (dxBody **) ALLOCA (stackalloc * sizeof (dxBody *));
1019         int *autostack = (int *) ALLOCA (stackalloc * sizeof (int));
1020
1021         for (bb = world->firstbody; bb; bb = (dxBody *) bb->next)
1022         {
1023 #ifdef TIMING
1024                 dTimerNow ("Island Processing");
1025 #endif
1026                 // get bb = the next enabled, untagged body, and tag it
1027                 if (bb->tag || (bb->flags & dxBodyDisabled))
1028                         continue;
1029                 bb->tag = 1;
1030
1031                 // tag all bodies and joints starting from bb.
1032                 int stacksize = 0;
1033                 int autoDepth = autoEnableDepth;
1034                 b = bb;
1035                 body[0] = bb;
1036                 bcount = 1;
1037                 jcount = 0;
1038                 goto quickstart;
1039                 while (stacksize > 0)
1040                 {
1041                         b = stack[--stacksize]; // pop body off stack
1042                         autoDepth = autostack[stacksize];
1043                         body[bcount++] = b;     // put body on body list
1044                   quickstart:
1045
1046                         // traverse and tag all body's joints, add untagged connected bodies
1047                         // to stack
1048                         for (dxJointNode * n = b->firstjoint; n; n = n->next)
1049                         {
1050                                 if (!n->joint->tag)
1051                                 {
1052                                         int thisDepth = autoEnableDepth;
1053                                         n->joint->tag = 1;
1054                                         joint[jcount++] = n->joint;
1055                                         if (n->body && !n->body->tag)
1056                                         {
1057                                                 if (n->body->flags & dxBodyDisabled)
1058                                                         thisDepth = autoDepth - 1;
1059                                                 if (thisDepth < 0)
1060                                                         continue;
1061                                                 n->body->flags &= ~dxBodyDisabled;
1062                                                 n->body->tag = 1;
1063                                                 autostack[stacksize] = thisDepth;
1064                                                 stack[stacksize++] = n->body;
1065                                         }
1066                                 }
1067                         }
1068                         dIASSERT (stacksize <= world->nb);
1069                         dIASSERT (stacksize <= world->nj);
1070                 }
1071
1072                 // now do something with body and joint lists
1073                 dInternalStepIslandFast (world, body, bcount, joint, jcount, stepsize, maxiterations);
1074
1075                 // what we've just done may have altered the body/joint tag values.
1076                 // we must make sure that these tags are nonzero.
1077                 // also make sure all bodies are in the enabled state.
1078                 int i;
1079                 for (i = 0; i < bcount; i++)
1080                 {
1081                         body[i]->tag = 1;
1082                         body[i]->flags &= ~dxBodyDisabled;
1083                 }
1084                 for (i = 0; i < jcount; i++)
1085                         joint[i]->tag = 1;
1086
1087                 tbcount += bcount;
1088                 tjcount += jcount;
1089         }
1090
1091 #ifdef TIMING
1092         dMessage(0, "Total joints processed: %i, bodies: %i", tjcount, tbcount);
1093 #endif
1094
1095         // if debugging, check that all objects (except for disabled bodies,
1096         // unconnected joints, and joints that are connected to disabled bodies)
1097         // were tagged.
1098 # ifndef dNODEBUG
1099         for (b = world->firstbody; b; b = (dxBody *) b->next)
1100         {
1101                 if (b->flags & dxBodyDisabled)
1102                 {
1103                         if (b->tag)
1104                                 dDebug (0, "disabled body tagged");
1105                 }
1106                 else
1107                 {
1108                         if (!b->tag)
1109                                 dDebug (0, "enabled body not tagged");
1110                 }
1111         }
1112         for (j = world->firstjoint; j; j = (dxJoint *) j->next)
1113         {
1114                 if ((j->node[0].body && (j->node[0].body->flags & dxBodyDisabled) == 0) || (j->node[1].body && (j->node[1].body->flags & dxBodyDisabled) == 0))
1115                 {
1116                         if (!j->tag)
1117                                 dDebug (0, "attached enabled joint not tagged");
1118                 }
1119                 else
1120                 {
1121                         if (j->tag)
1122                                 dDebug (0, "unattached or disabled joint tagged");
1123                 }
1124         }
1125 # endif
1126
1127 #       ifdef TIMING
1128         dTimerEnd ();
1129         dTimerReport (stdout, 1);
1130 #       endif
1131 }
1132
1133 #endif
1134
1135
1136 void dWorldStepFast1 (dWorldID w, dReal stepsize, int maxiterations)
1137 {
1138         dUASSERT (w, "bad world argument");
1139         dUASSERT (stepsize > 0, "stepsize must be > 0");
1140         processIslandsFast (w, stepsize, maxiterations);
1141 }