Cosmetic: Cosmetic optimizations and identifier name corrections to avoid conflicts...
[ode.git] / ode / src / lcp.cpp
blob550c9e2d6b46fe038923f22ca159241eb0e2ad65
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
26 THE ALGORITHM
27 -------------
29 solve A*x = b+w, with x and w subject to certain LCP conditions.
30 each x(i),w(i) must lie on one of the three line segments in the following
31 diagram. each line segment corresponds to one index set :
33 w(i)
34 /|\ | :
35 | | :
36 | |i in N :
37 w>0 | |state[i]=0 :
38 | | :
39 | | : i in C
40 w=0 + +-----------------------+
41 | : |
42 | : |
43 w<0 | : |i in N
44 | : |state[i]=1
45 | : |
46 | : |
47 +-------|-----------|-----------|----------> x(i)
48 lo 0 hi
50 the Dantzig algorithm proceeds as follows:
51 for i=1:n
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
57 it hits.
59 we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60 simpler, because the starting point for x(i),w(i) is always on the dotted
61 line x=0 and x will only ever increase in one direction, so it can only hit
62 two out of the three line segments.
65 NOTES
66 -----
68 this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69 the implementation is split into an LCP problem object (dLCP) and an LCP
70 driver function. most optimization occurs in the dLCP object.
72 a naive implementation of the algorithm requires either a lot of data motion
73 or a lot of permutation-array lookup, because we are constantly re-ordering
74 rows and columns. to avoid this and make a more optimized algorithm, a
75 non-trivial data structure is used to represent the matrix A (this is
76 implemented in the fast version of the dLCP object).
78 during execution of this algorithm, some indexes in A are clamped (set C),
79 some are non-clamped (set N), and some are "don't care" (where x=0).
80 A,x,b,w (and other problem vectors) are permuted such that the clamped
81 indexes are first, the unclamped indexes are next, and the don't-care
82 indexes are last. this permutation is recorded in the array `p'.
83 initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84 the corresponding elements of p are swapped.
86 because the C and N elements are grouped together in the rows of A, we can do
87 lots of work with a fast dot product function. if A,x,etc were not permuted
88 and we only had a permutation array, then those dot products would be much
89 slower as we would have a permutation array lookup in some inner loops.
91 A is accessed through an array of row pointers, so that element (i,j) of the
92 permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93 we still have to actually move the data.
95 during execution of this algorithm we maintain an L*D*L' factorization of
96 the clamped submatrix of A (call it `AC') which is the top left nC*nC
97 submatrix of A. there are two ways we could arrange the rows/columns in AC.
99 (1) AC is always permuted such that L*D*L' = AC. this causes a problem
100 when a row/column is removed from C, because then all the rows/columns of A
101 between the deleted index and the end of C need to be rotated downward.
102 this results in a lot of data motion and slows things down.
103 (2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104 itself a permutation of the underlying A). this is what we do - the
105 permutation is recorded in the vector C. call this permutation A[C,C].
106 when a row/column is removed from C, all we have to do is swap two
107 rows/columns and manipulate C.
111 #include <ode/common.h>
112 #include <ode/misc.h>
113 #include <ode/timer.h> // for testing
114 #include "config.h"
115 #include "lcp.h"
116 #include "util.h"
117 #include "matrix.h"
118 #include "mat.h" // for testing
119 #include "threaded_solver_ldlt.h"
121 #include "fastdot_impl.h"
122 #include "fastldltfactor_impl.h"
123 #include "fastldltsolve_impl.h"
126 //***************************************************************************
127 // code generation parameters
129 // LCP debugging (mostly for fast dLCP) - this slows things down a lot
130 //#define DEBUG_LCP
132 #define dLCP_FAST // use fast dLCP object
134 #define NUB_OPTIMIZATIONS // use NUB optimizations
137 // option 1 : matrix row pointers (less data copying)
138 #define ROWPTRS
139 #define ATYPE dReal **
140 #define AROW(i) (m_A[i])
142 // option 2 : no matrix row pointers (slightly faster inner loops)
143 //#define NOROWPTRS
144 //#define ATYPE dReal *
145 //#define AROW(i) (m_A+(i)*m_nskip)
148 //***************************************************************************
150 #define dMIN(A,B) ((A)>(B) ? (B) : (A))
151 #define dMAX(A,B) ((B)>(A) ? (B) : (A))
154 #define LMATRIX_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
156 //***************************************************************************
159 // transfer b-values to x-values
160 template<bool zero_b>
161 inline
162 void transfer_b_to_x(dReal pairsbx[PBX__MAX], unsigned n)
164 dReal *const endbx = pairsbx + (sizeint)n * PBX__MAX;
165 for (dReal *currbx = pairsbx; currbx != endbx; currbx += PBX__MAX) {
166 currbx[PBX_X] = currbx[PBX_B];
167 if (zero_b) {
168 currbx[PBX_B] = REAL(0.0);
173 // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
174 // A is nskip. this only references and swaps the lower triangle.
175 // if `do_fast_row_swaps' is nonzero and row pointers are being used, then
176 // rows will be swapped by exchanging row pointers. otherwise the data will
177 // be copied.
179 static
180 void swapRowsAndCols (ATYPE A, unsigned n, unsigned i1, unsigned i2, unsigned nskip,
181 int do_fast_row_swaps)
183 dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
184 nskip >= n && i1 < i2);
186 # ifdef ROWPTRS
187 dReal *A_i1 = A[i1];
188 dReal *A_i2 = A[i2];
189 for (unsigned i=i1+1; i<i2; ++i) {
190 dReal *A_i_i1 = A[i] + i1;
191 A_i1[i] = *A_i_i1;
192 *A_i_i1 = A_i2[i];
194 A_i1[i2] = A_i1[i1];
195 A_i1[i1] = A_i2[i1];
196 A_i2[i1] = A_i2[i2];
197 // swap rows, by swapping row pointers
198 if (do_fast_row_swaps) {
199 A[i1] = A_i2;
200 A[i2] = A_i1;
202 else {
203 // Only swap till i2 column to match A plain storage variant.
204 for (unsigned k = 0; k <= i2; ++k) {
205 dxSwap(A_i1[k], A_i2[k]);
208 // swap columns the hard way
209 for (unsigned j = i2 + 1; j < n; ++j) {
210 dReal *A_j = A[j];
211 dxSwap(A_j[i1], A_j[i2]);
213 # else
214 dReal *A_i1 = A + (sizeint)nskip * i1;
215 dReal *A_i2 = A + (sizeint)nskip * i2;
217 for (unsigned k = 0; k < i1; ++k) {
218 dxSwap(A_i1[k], A_i2[k]);
221 dReal *A_i = A_i1 + nskip;
222 for (unsigned i= i1 + 1; i < i2; A_i += nskip, ++i) {
223 dxSwap(A_i2[i], A_i[i1]);
226 dxSwap(A_i1[i1], A_i2[i2]);
228 dReal *A_j = A_i2 + nskip;
229 for (unsigned j = i2 + 1; j < n; A_j += nskip, ++j) {
230 dxSwap(A_j[i1], A_j[i2]);
232 # endif
236 // swap two indexes in the n*n LCP problem. i1 must be <= i2.
238 static
239 void swapProblem (ATYPE A, dReal pairsbx[PBX__MAX], dReal *w, dReal pairslh[PLH__MAX],
240 unsigned *p, bool *state, int *findex,
241 unsigned n, unsigned i1, unsigned i2, unsigned nskip,
242 int do_fast_row_swaps)
244 dIASSERT (n>0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
246 if (i1 != i2) {
247 swapRowsAndCols (A, n, i1, i2, nskip, do_fast_row_swaps);
249 dxSwap((pairsbx + (sizeint)i1 * PBX__MAX)[PBX_B], (pairsbx + (sizeint)i2 * PBX__MAX)[PBX_B]);
250 dxSwap((pairsbx + (sizeint)i1 * PBX__MAX)[PBX_X], (pairsbx + (sizeint)i2 * PBX__MAX)[PBX_X]);
251 dSASSERT(PBX__MAX == 2);
253 dxSwap(w[i1], w[i2]);
255 dxSwap((pairslh + (sizeint)i1 * PLH__MAX)[PLH_LO], (pairslh + (sizeint)i2 * PLH__MAX)[PLH_LO]);
256 dxSwap((pairslh + (sizeint)i1 * PLH__MAX)[PLH_HI], (pairslh + (sizeint)i2 * PLH__MAX)[PLH_HI]);
257 dSASSERT(PLH__MAX == 2);
259 dxSwap(p[i1], p[i2]);
260 dxSwap(state[i1], state[i2]);
262 if (findex != NULL) {
263 dxSwap(findex[i1], findex[i2]);
269 // for debugging - check that L,d is the factorization of A[C,C].
270 // A[C,C] has size nC*nC and leading dimension nskip.
271 // L has size nC*nC and leading dimension nskip.
272 // d has size nC.
274 #ifdef DEBUG_LCP
276 static
277 void checkFactorization (ATYPE A, dReal *_L, dReal *_d,
278 unsigned nC, unsigned *C, unsigned nskip)
280 unsigned i, j;
281 if (nC == 0) return;
283 // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
284 dMatrix A1 (nC, nC);
285 for (i=0; i < nC; i++) {
286 for (j = 0; j <= i; j++) A1(i, j) = A1(j, i) = AROW(i)[j];
288 dMatrix A2 = A1.select (nC, C, nC, C);
290 // printf ("A1=\n"); A1.print(); printf ("\n");
291 // printf ("A2=\n"); A2.print(); printf ("\n");
293 // compute A3 = L*D*L'
294 dMatrix L (nC, nC, _L, nskip, 1);
295 dMatrix D (nC, nC);
296 for (i = 0; i < nC; i++) D(i, i) = 1.0 / _d[i];
297 L.clearUpperTriangle();
298 for (i = 0; i < nC; i++) L(i, i) = 1;
299 dMatrix A3 = L * D * L.transpose();
301 // printf ("L=\n"); L.print(); printf ("\n");
302 // printf ("D=\n"); D.print(); printf ("\n");
303 // printf ("A3=\n"); A2.print(); printf ("\n");
305 // compare A2 and A3
306 dReal diff = A2.maxDifference (A3);
307 if (diff > 1e-8)
308 dDebug (0, "L*D*L' check, maximum difference = %.6e\n", diff);
311 #endif
314 // for debugging
316 #ifdef DEBUG_LCP
318 static
319 void checkPermutations (unsigned i, unsigned n, unsigned nC, unsigned nN, unsigned *p, unsigned *C)
321 unsigned j,k;
322 dIASSERT (/*nC >= 0 && nN >= 0 && */(nC + nN) == i && i < n);
323 for (k=0; k<i; k++) dIASSERT (p[k] >= 0 && p[k] < i);
324 for (k=i; k<n; k++) dIASSERT (p[k] == k);
325 for (j=0; j<nC; j++) {
326 int C_is_bad = 1;
327 for (k=0; k<nC; k++) if (C[k]==j) C_is_bad = 0;
328 dIASSERT (C_is_bad==0);
332 #endif
334 //***************************************************************************
335 // dLCP manipulator object. this represents an n*n LCP problem.
337 // two index sets C and N are kept. each set holds a subset of
338 // the variable indexes 0..n-1. an index can only be in one set.
339 // initially both sets are empty.
341 // the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
343 //***************************************************************************
344 // fast implementation of dLCP. see the above definition of dLCP for
345 // interface comments.
347 // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
348 // permuted as the other vectors/matrices are permuted.
350 // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
351 // contiguous indexes. the don't-care indexes follow N.
353 // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
354 // added or removed from the set C the factorization is updated.
355 // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
356 // the leading dimension of the matrix L is always `nskip'.
358 // at the start there may be other indexes that are unbounded but are not
359 // included in `nub'. dLCP will permute the matrix so that absolutely all
360 // unbounded vectors are at the start. thus there may be some initial
361 // permutation.
363 // the algorithms here assume certain patterns, particularly with respect to
364 // index transfer.
366 #ifdef dLCP_FAST
368 struct dLCP {
369 const unsigned m_n;
370 const unsigned m_nskip;
371 unsigned m_nub;
372 unsigned m_nC, m_nN; // size of each index set
373 ATYPE const m_A; // A rows
374 dReal *const m_pairsbx, *const m_w, *const m_pairslh; // permuted LCP problem data
375 dReal *const m_L, *const m_d; // L*D*L' factorization of set C
376 dReal *const m_Dell, *const m_ell, *const m_tmp;
377 bool *const m_state;
378 int *const m_findex;
379 unsigned *const m_p, *const m_C;
381 dLCP (unsigned n, unsigned nskip, unsigned nub, dReal *Adata, dReal *pairsbx, dReal *w,
382 dReal *pairslh, dReal *L, dReal *d,
383 dReal *Dell, dReal *ell, dReal *tmp,
384 bool *state, int *findex, unsigned *p, unsigned *C, dReal **Arows);
385 unsigned getNub() const { return m_nub; }
386 void transfer_i_to_C (unsigned i);
387 void transfer_i_to_N (unsigned /*i*/) { m_nN++; } // because we can assume C and N span 1:i-1
388 void transfer_i_from_N_to_C (unsigned i);
389 void transfer_i_from_C_to_N (unsigned i, void *tmpbuf);
390 static sizeint estimate_transfer_i_from_C_to_N_mem_req(unsigned nC, unsigned nskip) { return dEstimateLDLTRemoveTmpbufSize(nC, nskip); }
391 unsigned numC() const { return m_nC; }
392 unsigned numN() const { return m_nN; }
393 unsigned indexC (unsigned i) const { return i; }
394 unsigned indexN (unsigned i) const { return i+m_nC; }
395 dReal Aii (unsigned i) const { return AROW(i)[i]; }
396 template<unsigned q_stride>
397 dReal AiC_times_qC (unsigned i, dReal *q) const { return calculateLargeVectorDot<q_stride> (AROW(i), q, m_nC); }
398 template<unsigned q_stride>
399 dReal AiN_times_qN (unsigned i, dReal *q) const { return calculateLargeVectorDot<q_stride> (AROW(i) + m_nC, q + (sizeint)m_nC * q_stride, m_nN); }
400 void pN_equals_ANC_times_qC (dReal *p, dReal *q);
401 void pN_plusequals_ANi (dReal *p, unsigned i, bool dir_positive);
402 template<unsigned p_stride>
403 void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q);
404 void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q);
405 void solve1 (dReal *a, unsigned i, bool dir_positive, int only_transfer=0);
406 void unpermute_X();
407 void unpermute_W();
411 dLCP::dLCP (unsigned n, unsigned nskip, unsigned nub, dReal *Adata, dReal *pairsbx, dReal *w,
412 dReal *pairslh, dReal *L, dReal *d,
413 dReal *Dell, dReal *ell, dReal *tmp,
414 bool *state, int *findex, unsigned *p, unsigned *C, dReal **Arows):
415 m_n(n), m_nskip(nskip), m_nub(nub), m_nC(0), m_nN(0),
416 # ifdef ROWPTRS
417 m_A(Arows),
418 #else
419 m_A(Adata),
420 #endif
421 m_pairsbx(pairsbx), m_w(w), m_pairslh(pairslh),
422 m_L(L), m_d(d), m_Dell(Dell), m_ell(ell), m_tmp(tmp),
423 m_state(state), m_findex(findex), m_p(p), m_C(C)
425 dxtSetZero<PBX__MAX>(pairsbx + PBX_X, n);
428 # ifdef ROWPTRS
429 // make matrix row pointers
430 dReal *aptr = Adata;
431 ATYPE A = m_A;
432 for (unsigned k=0; k<n; aptr+=nskip, ++k) A[k] = aptr;
433 # endif
437 for (unsigned k=0; k != n; ++k) p[k] = k; // initially unpermutted
441 // for testing, we can do some random swaps in the area i > nub
443 if (nub < n) {
444 for (unsigned k=0; k<100; k++) {
445 unsigned i1,i2;
446 do {
447 i1 = dRandInt(n-nub)+nub;
448 i2 = dRandInt(n-nub)+nub;
450 while (i1 > i2);
451 //printf ("--> %d %d\n",i1,i2);
452 swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, n, i1, i2, m_nskip, 0);
457 // permute the problem so that *all* the unbounded variables are at the
458 // start, i.e. look for unbounded variables not included in `nub'. we can
459 // potentially push up `nub' this way and get a bigger initial factorization.
460 // note that when we swap rows/cols here we must not just swap row pointers,
461 // as the initial factorization relies on the data being all in one chunk.
462 // variables that have findex >= 0 are *not* considered to be unbounded even
463 // if lo=-inf and hi=inf - this is because these limits may change during the
464 // solution process.
466 unsigned currNub = nub;
468 for (unsigned k = currNub; k < n; ++k) {
469 if (findex && findex[k] >= 0) continue;
470 if ((pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) {
471 swapProblem (m_A, m_pairsbx, m_w, pairslh, m_p, m_state, findex, n, currNub, k, nskip, 0);
472 m_nub = ++currNub;
477 // if there are unbounded variables at the start, factorize A up to that
478 // point and solve for x. this puts all indexes 0..currNub-1 into C.
479 if (currNub > 0) {
481 dReal *Lrow = m_L;
482 for (unsigned j = 0; j < currNub; Lrow += nskip, ++j) memcpy(Lrow, AROW(j), (j + 1) * sizeof(dReal));
484 transfer_b_to_x<false> (m_pairsbx, currNub);
485 factorMatrixAsLDLT<1> (m_L, m_d, currNub, nskip);
486 solveEquationSystemWithLDLT<1, PBX__MAX> (m_L, m_d, m_pairsbx + PBX_X, currNub, nskip);
487 dSetZero (m_w, currNub);
489 unsigned *C = m_C;
490 for (unsigned k = 0; k < currNub; ++k) C[k] = k;
492 m_nC = currNub;
495 // permute the indexes > currNub such that all findex variables are at the end
496 if (findex) {
497 unsigned num_at_end = 0;
498 for (unsigned k = m_n; k > currNub; ) {
499 --k;
500 if (findex[k] >= 0) {
501 swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, findex, m_n, k, m_n - 1 - num_at_end, nskip, 1);
502 num_at_end++;
507 // print info about indexes
510 for (unsigned k=0; k<n; k++) {
511 if (k<currNub) printf ("C");
512 else if ((m_pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (m_pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) printf ("c");
513 else printf (".");
515 printf ("\n");
521 void dLCP::transfer_i_to_C (unsigned i)
524 const unsigned nC = m_nC;
526 if (nC > 0) {
527 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
528 dReal *const Ltgt = m_L + (sizeint)m_nskip * nC, *ell = m_ell;
529 memcpy(Ltgt, ell, nC * sizeof(dReal));
531 dReal ell_Dell_dot = dxDot(m_ell, m_Dell, nC);
532 dReal AROW_i_i = AROW(i)[i] != ell_Dell_dot ? AROW(i)[i] : dNextAfter(AROW(i)[i], dInfinity); // A hack to avoid getting a zero in the denominator
533 m_d[nC] = dRecip (AROW_i_i - ell_Dell_dot);
535 else {
536 m_d[0] = dRecip (AROW(i)[i]);
539 swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, nC, i, m_nskip, 1);
541 m_C[nC] = nC;
542 m_nC = nC + 1; // nC value is outdated after this line
545 # ifdef DEBUG_LCP
546 checkFactorization (m_A, m_L, m_d, m_nC, m_C, m_nskip);
547 if (i < (m_n-1)) checkPermutations (i+1, m_n, m_nC, m_nN, m_p, m_C);
548 # endif
552 void dLCP::transfer_i_from_N_to_C (unsigned i)
555 const unsigned nC = m_nC;
556 if (nC > 0) {
558 dReal *const aptr = AROW(i);
559 dReal *Dell = m_Dell;
560 const unsigned *C = m_C;
561 # ifdef NUB_OPTIMIZATIONS
562 // if nub>0, initial part of aptr unpermuted
563 const unsigned nub = m_nub;
564 unsigned j=0;
565 for ( ; j<nub; ++j) Dell[j] = aptr[j];
566 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
567 # else
568 for (unsigned j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
569 # endif
571 solveL1Straight<1>(m_L, m_Dell, nC, m_nskip);
573 dReal ell_Dell_dot = REAL(0.0);
574 dReal *const Ltgt = m_L + (sizeint)m_nskip * nC;
575 dReal *ell = m_ell, *Dell = m_Dell, *d = m_d;
576 for (unsigned j = 0; j < nC; ++j) {
577 dReal ell_j, Dell_j = Dell[j];
578 Ltgt[j] = ell[j] = ell_j = Dell_j * d[j];
579 ell_Dell_dot += ell_j * Dell_j;
582 dReal AROW_i_i = AROW(i)[i] != ell_Dell_dot ? AROW(i)[i] : dNextAfter(AROW(i)[i], dInfinity); // A hack to avoid getting a zero in the denominator
583 m_d[nC] = dRecip (AROW_i_i - ell_Dell_dot);
585 else {
586 m_d[0] = dRecip (AROW(i)[i]);
589 swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, nC, i, m_nskip, 1);
591 m_C[nC] = nC;
592 m_nN--;
593 m_nC = nC + 1; // nC value is outdated after this line
596 // @@@ TO DO LATER
597 // if we just finish here then we'll go back and re-solve for
598 // delta_x. but actually we can be more efficient and incrementally
599 // update delta_x here. but if we do this, we wont have ell and Dell
600 // to use in updating the factorization later.
602 # ifdef DEBUG_LCP
603 checkFactorization (m_A,m_L,m_d,m_nC,m_C,m_nskip);
604 # endif
608 void dLCP::transfer_i_from_C_to_N (unsigned i, void *tmpbuf)
611 unsigned *C = m_C;
612 // remove a row/column from the factorization, and adjust the
613 // indexes (black magic!)
614 int last_idx = -1;
615 const unsigned nC = m_nC;
616 unsigned j = 0;
617 for ( ; j < nC; ++j) {
618 if (C[j] == nC - 1) {
619 last_idx = j;
621 if (C[j] == i) {
622 dxLDLTRemove (m_A, C, m_L, m_d, m_n, nC, j, m_nskip, tmpbuf);
623 unsigned k;
624 if (last_idx == -1) {
625 for (k = j + 1 ; k < nC; ++k) {
626 if (C[k] == nC - 1) {
627 break;
630 dIASSERT (k < nC);
632 else {
633 k = last_idx;
635 C[k] = C[j];
636 if (j != (nC - 1)) memmove (C + j, C + j + 1, (nC - j - 1) * sizeof(C[0]));
637 break;
640 dIASSERT (j < nC);
642 swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, i, nC - 1, m_nskip, 1);
644 m_nN++;
645 m_nC = nC - 1; // nC value is outdated after this line
648 # ifdef DEBUG_LCP
649 checkFactorization (m_A, m_L, m_d, m_nC, m_C, m_nskip);
650 # endif
654 void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
656 // we could try to make this matrix-vector multiplication faster using
657 // outer product matrix tricks, e.g. with the dMultidotX() functions.
658 // but i tried it and it actually made things slower on random 100x100
659 // problems because of the overhead involved. so we'll stick with the
660 // simple method for now.
661 const unsigned nC = m_nC;
662 dReal *ptgt = p + nC;
663 const unsigned nN = m_nN;
664 for (unsigned i = 0; i < nN; ++i) {
665 ptgt[i] = dxDot (AROW(i + nC), q, nC);
670 void dLCP::pN_plusequals_ANi (dReal *p, unsigned i, bool dir_positive)
672 const unsigned nC = m_nC;
673 dReal *aptr = AROW(i) + nC;
674 dReal *ptgt = p + nC;
675 if (dir_positive) {
676 const unsigned nN = m_nN;
677 for (unsigned j=0; j < nN; ++j) ptgt[j] += aptr[j];
679 else {
680 const unsigned nN = m_nN;
681 for (unsigned j=0; j < nN; ++j) ptgt[j] -= aptr[j];
685 template<unsigned p_stride>
686 void dLCP::pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
688 const unsigned nC = m_nC;
689 dReal *q_end = q + nC;
690 for (; q != q_end; p += p_stride, ++q) {
691 *p += s * (*q);
695 void dLCP::pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
697 const unsigned nC = m_nC;
698 dReal *ptgt = p + nC, *qsrc = q + nC;
699 const unsigned nN = m_nN;
700 for (unsigned i = 0; i < nN; ++i) {
701 ptgt[i] += s * qsrc[i];
705 void dLCP::solve1 (dReal *a, unsigned i, bool dir_positive, int only_transfer)
707 // the `Dell' and `ell' that are computed here are saved. if index i is
708 // later added to the factorization then they can be reused.
710 // @@@ question: do we need to solve for entire delta_x??? yes, but
711 // only if an x goes below 0 during the step.
713 const unsigned nC = m_nC;
714 if (nC > 0) {
716 dReal *Dell = m_Dell;
717 unsigned *C = m_C;
718 dReal *aptr = AROW(i);
719 # ifdef NUB_OPTIMIZATIONS
720 // if nub>0, initial part of aptr[] is guaranteed unpermuted
721 const unsigned nub = m_nub;
722 unsigned j = 0;
723 for ( ; j < nub; ++j) Dell[j] = aptr[j];
724 for ( ; j < nC; ++j) Dell[j] = aptr[C[j]];
725 # else
726 for (unsigned j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
727 # endif
729 solveL1Straight<1>(m_L, m_Dell, nC, m_nskip);
731 dReal *ell = m_ell, *Dell = m_Dell, *d = m_d;
732 for (unsigned j = 0; j < nC; ++j) ell[j] = Dell[j] * d[j];
735 if (!only_transfer) {
736 dReal *tmp = m_tmp, *ell = m_ell;
738 for (unsigned j = 0; j < nC; ++j) tmp[j] = ell[j];
740 solveL1Transposed<1>(m_L, tmp, nC, m_nskip);
741 if (dir_positive) {
742 unsigned *C = m_C;
743 dReal *tmp = m_tmp;
744 for (unsigned j = 0; j < nC; ++j) a[C[j]] = -tmp[j];
745 } else {
746 unsigned *C = m_C;
747 dReal *tmp = m_tmp;
748 for (unsigned j = 0; j < nC; ++j) a[C[j]] = tmp[j];
755 void dLCP::unpermute_X()
757 unsigned *p = m_p;
758 dReal *pairsbx = m_pairsbx;
759 const unsigned n = m_n;
760 for (unsigned j = 0; j < n; ++j) {
761 unsigned k = p[j];
762 if (k != j) {
763 // p[j] = j; -- not going to be checked anymore anyway
764 dReal x_j = (pairsbx + (sizeint)j * PBX__MAX)[PBX_X];
765 for (;;) {
766 dxSwap(x_j, (pairsbx + (sizeint)k * PBX__MAX)[PBX_X]);
768 unsigned orig_k = p[k];
769 p[k] = k;
770 if (orig_k == j) {
771 break;
773 k = orig_k;
775 (pairsbx + (sizeint)j * PBX__MAX)[PBX_X] = x_j;
780 void dLCP::unpermute_W()
782 memcpy (m_tmp, m_w, m_n * sizeof(dReal));
784 const unsigned *p = m_p;
785 dReal *w = m_w, *tmp = m_tmp;
786 const unsigned n = m_n;
787 for (unsigned j = 0; j < n; ++j) {
788 unsigned k = p[j];
789 w[k] = tmp[j];
793 #endif // dLCP_FAST
796 static void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX]);
797 static void dxSolveLCP_Generic (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
798 dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex);
800 /*extern */
801 void dxSolveLCP (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
802 dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex)
804 if (nub >= n)
806 dxSolveLCP_AllUnbounded (memarena, n, A, pairsbx);
808 else
810 dxSolveLCP_Generic (memarena, n, A, pairsbx, outer_w, nub, pairslh, findex);
814 //***************************************************************************
815 // if all the variables are unbounded then we can just factor, solve, and return
817 static
818 void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX])
820 dAASSERT(A != NULL);
821 dAASSERT(pairsbx != NULL);
822 dAASSERT(n != 0);
824 transfer_b_to_x<true>(pairsbx, n);
826 unsigned nskip = dPAD(n);
827 factorMatrixAsLDLT<PBX__MAX> (A, pairsbx + PBX_B, n, nskip);
828 solveEquationSystemWithLDLT<PBX__MAX, PBX__MAX> (A, pairsbx + PBX_B, pairsbx + PBX_X, n, nskip);
831 //***************************************************************************
832 // an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
834 static
835 void dxSolveLCP_Generic (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
836 dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex)
838 dAASSERT (n > 0 && A && pairsbx && pairslh && nub >= 0 && nub < n);
839 # ifndef dNODEBUG
841 // check restrictions on lo and hi
842 dReal *endlh = pairslh + (sizeint)n * PLH__MAX;
843 for (dReal *currlh = pairslh; currlh != endlh; currlh += PLH__MAX) dIASSERT (currlh[PLH_LO] <= 0 && currlh[PLH_HI] >= 0);
845 # endif
847 const unsigned nskip = dPAD(n);
848 dReal *L = memarena->AllocateOveralignedArray<dReal> ((sizeint)nskip * n, LMATRIX_ALIGNMENT);
849 dReal *d = memarena->AllocateArray<dReal> (n);
850 dReal *w = outer_w != NULL ? outer_w : memarena->AllocateArray<dReal> (n);
851 dReal *delta_w = memarena->AllocateArray<dReal> (n);
852 dReal *delta_x = memarena->AllocateArray<dReal> (n);
853 dReal *Dell = memarena->AllocateArray<dReal> (n);
854 dReal *ell = memarena->AllocateArray<dReal> (n);
855 #ifdef ROWPTRS
856 dReal **Arows = memarena->AllocateArray<dReal *> (n);
857 #else
858 dReal **Arows = NULL;
859 #endif
860 unsigned *p = memarena->AllocateArray<unsigned> (n);
861 unsigned *C = memarena->AllocateArray<unsigned> (n);
863 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
864 bool *state = memarena->AllocateArray<bool> (n);
866 // create LCP object. note that tmp is set to delta_w to save space, this
867 // optimization relies on knowledge of how tmp is used, so be careful!
868 dLCP lcp(n, nskip, nub, A, pairsbx, w, pairslh, L, d, Dell, ell, delta_w, state, findex, p, C, Arows);
869 unsigned adj_nub = lcp.getNub();
871 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
872 // LCP conditions then i is added to the appropriate index set. otherwise
873 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
874 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
875 // while driving x(i) we maintain the LCP conditions on the other variables
876 // 0..i-1. we do this by watching out for other x(i),w(i) values going
877 // outside the valid region, and then switching them between index sets
878 // when that happens.
880 bool hit_first_friction_index = false;
881 for (unsigned i = adj_nub; i < n; ++i) {
882 bool s_error = false;
883 // the index i is the driving index and indexes i+1..n-1 are "dont care",
884 // i.e. when we make changes to the system those x's will be zero and we
885 // don't care what happens to those w's. in other words, we only consider
886 // an (i+1)*(i+1) sub-problem of A*x=b+w.
888 // if we've hit the first friction index, we have to compute the lo and
889 // hi values based on the values of x already computed. we have been
890 // permuting the indexes, so the values stored in the findex vector are
891 // no longer valid. thus we have to temporarily unpermute the x vector.
892 // for the purposes of this computation, 0*infinity = 0 ... so if the
893 // contact constraint's normal force is 0, there should be no tangential
894 // force applied.
896 if (!hit_first_friction_index && findex && findex[i] >= 0) {
897 // un-permute x into delta_w, which is not being used at the moment
898 for (unsigned j = 0; j < n; ++j) delta_w[p[j]] = (pairsbx + (sizeint)j * PBX__MAX)[PBX_X];
900 // set lo and hi values
901 for (unsigned k = i; k < n; ++k) {
902 dReal *currlh = pairslh + (sizeint)k * PLH__MAX;
903 dReal wfk = delta_w[findex[k]];
904 if (wfk == 0) {
905 currlh[PLH_HI] = 0;
906 currlh[PLH_LO] = 0;
908 else {
909 currlh[PLH_HI] = dFabs (currlh[PLH_HI] * wfk);
910 currlh[PLH_LO] = -currlh[PLH_HI];
913 hit_first_friction_index = true;
916 // thus far we have not even been computing the w values for indexes
917 // greater than i, so compute w[i] now.
918 dReal wPrep = lcp.AiC_times_qC<PBX__MAX> (i, pairsbx + PBX_X) + lcp.AiN_times_qN<PBX__MAX> (i, pairsbx + PBX_X);
920 dReal *currbx = pairsbx + (sizeint)i * PBX__MAX;
922 w[i] = wPrep - currbx[PBX_B];
924 // if lo=hi=0 (which can happen for tangential friction when normals are
925 // 0) then the index will be assigned to set N with some state. however,
926 // set C's line has zero size, so the index will always remain in set N.
927 // with the "normal" switching logic, if w changed sign then the index
928 // would have to switch to set C and then back to set N with an inverted
929 // state. this is pointless, and also computationally expensive. to
930 // prevent this from happening, we use the rule that indexes with lo=hi=0
931 // will never be checked for set changes. this means that the state for
932 // these indexes may be incorrect, but that doesn't matter.
934 dReal *currlh = pairslh + (sizeint)i * PLH__MAX;
936 // see if x(i),w(i) is in a valid region
937 if (currlh[PLH_LO] == 0 && w[i] >= 0) {
938 lcp.transfer_i_to_N (i);
939 state[i] = false;
941 else if (currlh[PLH_HI] == 0 && w[i] <= 0) {
942 lcp.transfer_i_to_N (i);
943 state[i] = true;
945 else if (w[i] == 0) {
946 // this is a degenerate case. by the time we get to this test we know
947 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
948 // and similarly that hi > 0. this means that the line segment
949 // corresponding to set C is at least finite in extent, and we are on it.
950 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
951 lcp.solve1 (delta_x, i, false, 1);
953 lcp.transfer_i_to_C (i);
955 else {
956 // we must push x(i) and w(i)
957 for (;;) {
958 // find direction to push on x(i)
959 bool dir_positive = (w[i] <= 0);
961 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
962 lcp.solve1 (delta_x, i, dir_positive);
964 // note that delta_x[i] = (dir_positive ? 1 : -1), but we wont bother to set it
966 // compute: delta_w = A*delta_x ... note we only care about
967 // delta_w(N) and delta_w(i), the rest is ignored
968 lcp.pN_equals_ANC_times_qC (delta_w, delta_x);
969 lcp.pN_plusequals_ANi (delta_w, i, dir_positive);
970 delta_w[i] = dir_positive
971 ? lcp.AiC_times_qC<1> (i, delta_x) + lcp.Aii(i)
972 : lcp.AiC_times_qC<1> (i, delta_x) - lcp.Aii(i);
974 // find largest step we can take (size=s), either to drive x(i),w(i)
975 // to the valid LCP region or to drive an already-valid variable
976 // outside the valid region.
978 int cmd = 1; // index switching command
979 unsigned si = 0; // si = index to switch if cmd>3
981 dReal s = delta_w[i] != REAL(0.0)
982 ? -w[i] / delta_w[i]
983 : (w[i] != REAL(0.0) ? dCopySign(dInfinity, -w[i]) : REAL(0.0));
985 if (dir_positive) {
986 if (currlh[PLH_HI] < dInfinity) {
987 dReal s2 = (currlh[PLH_HI] - currbx[PBX_X]); // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
988 if (s2 < s) {
989 s = s2;
990 cmd = 3;
994 else {
995 if (currlh[PLH_LO] > -dInfinity) {
996 dReal s2 = (currbx[PBX_X] - currlh[PLH_LO]); // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
997 if (s2 < s) {
998 s = s2;
999 cmd = 2;
1005 const unsigned numN = lcp.numN();
1006 for (unsigned k = 0; k < numN; ++k) {
1007 const unsigned indexN_k = lcp.indexN(k);
1008 if (!state[indexN_k] ? delta_w[indexN_k] < 0 : delta_w[indexN_k] > 0) {
1009 // don't bother checking if lo=hi=0
1010 dReal *indexlh = pairslh + (sizeint)indexN_k * PLH__MAX;
1011 if (indexlh[PLH_LO] == 0 && indexlh[PLH_HI] == 0) continue;
1012 dReal s2 = -w[indexN_k] / delta_w[indexN_k];
1013 if (s2 < s) {
1014 s = s2;
1015 cmd = 4;
1016 si = indexN_k;
1023 const unsigned numC = lcp.numC();
1024 for (unsigned k = adj_nub; k < numC; ++k) {
1025 const unsigned indexC_k = lcp.indexC(k);
1026 dReal *indexlh = pairslh + (sizeint)indexC_k * PLH__MAX;
1027 if (delta_x[indexC_k] < 0 && indexlh[PLH_LO] > -dInfinity) {
1028 dReal s2 = (indexlh[PLH_LO] - (pairsbx + (sizeint)indexC_k * PBX__MAX)[PBX_X]) / delta_x[indexC_k];
1029 if (s2 < s) {
1030 s = s2;
1031 cmd = 5;
1032 si = indexC_k;
1035 if (delta_x[indexC_k] > 0 && indexlh[PLH_HI] < dInfinity) {
1036 dReal s2 = (indexlh[PLH_HI] - (pairsbx + (sizeint)indexC_k * PBX__MAX)[PBX_X]) / delta_x[indexC_k];
1037 if (s2 < s) {
1038 s = s2;
1039 cmd = 6;
1040 si = indexC_k;
1046 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
1047 // "C->NL","C->NH"};
1048 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
1050 // if s <= 0 then we've got a problem. if we just keep going then
1051 // we're going to get stuck in an infinite loop. instead, just cross
1052 // our fingers and exit with the current solution.
1053 if (s <= REAL(0.0)) {
1054 dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",(double)s);
1055 if (i < n) {
1056 dxtSetZero<PBX__MAX>(currbx + PBX_X, n - i);
1057 dxSetZero (w + i, n - i);
1059 s_error = true;
1060 break;
1063 // apply x = x + s * delta_x
1064 lcp.pC_plusequals_s_times_qC<PBX__MAX> (pairsbx + PBX_X, s, delta_x);
1065 currbx[PBX_X] = dir_positive
1066 ? currbx[PBX_X] + s
1067 : currbx[PBX_X] - s;
1069 // apply w = w + s * delta_w
1070 lcp.pN_plusequals_s_times_qN (w, s, delta_w);
1071 w[i] += s * delta_w[i];
1073 void *tmpbuf;
1074 // switch indexes between sets if necessary
1075 switch (cmd) {
1076 case 1: // done
1077 w[i] = 0;
1078 lcp.transfer_i_to_C (i);
1079 break;
1080 case 2: // done
1081 currbx[PBX_X] = currlh[PLH_LO];
1082 state[i] = false;
1083 lcp.transfer_i_to_N (i);
1084 break;
1085 case 3: // done
1086 currbx[PBX_X] = currlh[PLH_HI];
1087 state[i] = true;
1088 lcp.transfer_i_to_N (i);
1089 break;
1090 case 4: // keep going
1091 w[si] = 0;
1092 lcp.transfer_i_from_N_to_C (si);
1093 break;
1094 case 5: // keep going
1095 (pairsbx + (sizeint)si * PBX__MAX)[PBX_X] = (pairslh + (sizeint)si * PLH__MAX)[PLH_LO];
1096 state[si] = false;
1097 tmpbuf = memarena->PeekBufferRemainder();
1098 lcp.transfer_i_from_C_to_N (si, tmpbuf);
1099 break;
1100 case 6: // keep going
1101 (pairsbx + (sizeint)si * PBX__MAX)[PBX_X] = (pairslh + (sizeint)si * PLH__MAX)[PLH_HI];
1102 state[si] = true;
1103 tmpbuf = memarena->PeekBufferRemainder();
1104 lcp.transfer_i_from_C_to_N (si, tmpbuf);
1105 break;
1108 if (cmd <= 3) break;
1109 } // for (;;)
1110 } // else
1112 if (s_error) {
1113 break;
1115 } // for (unsigned i = adj_nub; i < n; ++i)
1117 // now we have to un-permute x and w
1118 if (outer_w != NULL) {
1119 lcp.unpermute_W();
1121 lcp.unpermute_X(); // This destroys p[] and must be done last
1124 sizeint dxEstimateSolveLCPMemoryReq(unsigned n, bool outer_w_avail)
1126 const unsigned nskip = dPAD(n);
1128 sizeint res = 0;
1130 res += dOVERALIGNED_SIZE(sizeof(dReal) * ((sizeint)n * nskip), LMATRIX_ALIGNMENT); // for L
1131 res += 5 * dEFFICIENT_SIZE(sizeof(dReal) * n); // for d, delta_w, delta_x, Dell, ell
1132 if (!outer_w_avail) {
1133 res += dEFFICIENT_SIZE(sizeof(dReal) * n); // for w
1135 #ifdef ROWPTRS
1136 res += dEFFICIENT_SIZE(sizeof(dReal *) * n); // for Arows
1137 #endif
1138 res += 2 * dEFFICIENT_SIZE(sizeof(unsigned) * n); // for p, C
1139 res += dEFFICIENT_SIZE(sizeof(bool) * n); // for state
1141 // Use n instead of nC as nC varies at runtime while n is greater or equal to nC
1142 sizeint lcp_transfer_req = dLCP::estimate_transfer_i_from_C_to_N_mem_req(n, nskip);
1143 res += dEFFICIENT_SIZE(lcp_transfer_req); // for dLCP::transfer_i_from_C_to_N
1145 return res;
1149 //***************************************************************************
1150 // accuracy and timing test
1152 static sizeint EstimateTestSolveLCPMemoryReq(unsigned n)
1154 const unsigned nskip = dPAD(n);
1156 sizeint res = 0;
1158 res += 2 * dEFFICIENT_SIZE(sizeof(dReal) * ((sizeint)n * nskip)); // for A, A2
1159 res += 7 * dEFFICIENT_SIZE(sizeof(dReal) * n); // for x, b, w, lo, hi, tmp1, tmp2
1160 res += dEFFICIENT_SIZE(sizeof(dReal) * PBX__MAX * n); // for pairsbx,
1161 res += dEFFICIENT_SIZE(sizeof(dReal) * PLH__MAX * n); // for pairslh
1163 res += dxEstimateSolveLCPMemoryReq(n, true);
1165 return res;
1168 extern "C" ODE_API int dTestSolveLCP()
1170 const unsigned n = 100;
1172 sizeint memreq = EstimateTestSolveLCPMemoryReq(n);
1173 dxWorldProcessMemArena *arena = dxAllocateTemporaryWorldProcessMemArena(memreq, NULL, NULL);
1174 if (arena == NULL) {
1175 return 0;
1177 arena->ResetState();
1179 unsigned i,nskip = dPAD(n);
1180 #ifdef dDOUBLE
1181 const dReal tol = REAL(1e-9);
1182 #endif
1183 #ifdef dSINGLE
1184 const dReal tol = REAL(1e-4);
1185 #endif
1186 printf ("dTestSolveLCP()\n");
1188 dReal *A = arena->AllocateArray<dReal> (n*nskip);
1189 dReal *x = arena->AllocateArray<dReal> (n);
1190 dReal *b = arena->AllocateArray<dReal> (n);
1191 dReal *w = arena->AllocateArray<dReal> (n);
1192 dReal *lo = arena->AllocateArray<dReal> (n);
1193 dReal *hi = arena->AllocateArray<dReal> (n);
1195 dReal *A2 = arena->AllocateArray<dReal> (n*nskip);
1196 dReal *pairsbx = arena->AllocateArray<dReal> (n * PBX__MAX);
1197 dReal *pairslh = arena->AllocateArray<dReal> (n * PLH__MAX);
1199 dReal *tmp1 = arena->AllocateArray<dReal> (n);
1200 dReal *tmp2 = arena->AllocateArray<dReal> (n);
1202 double total_time = 0;
1203 for (unsigned count=0; count < 1000; count++) {
1204 BEGIN_STATE_SAVE(arena, saveInner) {
1206 // form (A,b) = a random positive definite LCP problem
1207 dMakeRandomMatrix (A2,n,n,1.0);
1208 dMultiply2 (A,A2,A2,n,n,n);
1209 dMakeRandomMatrix (x,n,1,1.0);
1210 dMultiply0 (b,A,x,n,n,1);
1211 for (i=0; i<n; i++) b[i] += (dRandReal()*REAL(0.2))-REAL(0.1);
1213 // choose `nub' in the range 0..n-1
1214 unsigned nub = 50; //dRandInt (n);
1216 // make limits
1217 for (i=0; i<nub; i++) lo[i] = -dInfinity;
1218 for (i=0; i<nub; i++) hi[i] = dInfinity;
1219 //for (i=nub; i<n; i++) lo[i] = 0;
1220 //for (i=nub; i<n; i++) hi[i] = dInfinity;
1221 //for (i=nub; i<n; i++) lo[i] = -dInfinity;
1222 //for (i=nub; i<n; i++) hi[i] = 0;
1223 for (i=nub; i<n; i++) lo[i] = -(dRandReal()*REAL(1.0))-REAL(0.01);
1224 for (i=nub; i<n; i++) hi[i] = (dRandReal()*REAL(1.0))+REAL(0.01);
1226 // set a few limits to lo=hi=0
1228 for (i=0; i<10; i++) {
1229 unsigned j = dRandInt (n-nub) + nub;
1230 lo[j] = 0;
1231 hi[j] = 0;
1235 // solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
1236 // SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
1237 // ensure that it doesn't get referenced (if it does, the answer will be
1238 // wrong).
1240 memcpy (A2, A, n * nskip * sizeof(dReal));
1241 dClearUpperTriangle (A2, n);
1242 for (i = 0; i != n; ++i) {
1243 dReal *currbx = pairsbx + i * PBX__MAX;
1244 currbx[PBX_B] = b[i];
1245 currbx[PBX_X] = 0;
1247 for (i = 0; i != n; ++i) {
1248 dReal *currlh = pairslh + i * PLH__MAX;
1249 currlh[PLH_LO] = lo[i];
1250 currlh[PLH_HI] = hi[i];
1252 dSetZero (w,n);
1254 dStopwatch sw;
1255 dStopwatchReset (&sw);
1256 dStopwatchStart (&sw);
1258 dxSolveLCP (arena,n,A2,pairsbx,w,nub,pairslh,0);
1260 dStopwatchStop (&sw);
1261 double time = dStopwatchTime(&sw);
1262 total_time += time;
1263 double average = total_time / double(count+1) * 1000.0;
1265 for (i = 0; i != n; ++i) {
1266 const dReal *currbx = pairsbx + i * PBX__MAX;
1267 x[i] = currbx[PBX_X];
1270 // check the solution
1272 dMultiply0 (tmp1,A,x,n,n,1);
1273 for (i=0; i<n; i++) tmp2[i] = b[i] + w[i];
1274 dReal diff = dMaxDifference (tmp1,tmp2,n,1);
1275 // printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
1276 // diff > tol ? "FAILED" : "passed");
1277 if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff);
1278 unsigned n1=0,n2=0,n3=0;
1279 for (i=0; i<n; i++) {
1280 if (x[i]==lo[i] && w[i] >= 0) {
1281 n1++; // ok
1283 else if (x[i]==hi[i] && w[i] <= 0) {
1284 n2++; // ok
1286 else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) {
1287 n3++; // ok
1289 else {
1290 dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i,
1291 x[i],w[i],lo[i],hi[i]);
1295 // pacifier
1296 printf ("passed: NL=%3d NH=%3d C=%3d ",n1,n2,n3);
1297 printf ("time=%10.3f ms avg=%10.4f\n",time * 1000.0,average);
1299 } END_STATE_SAVE(arena, saveInner);
1302 dxFreeTemporaryWorldProcessMemArena(arena);
1303 return 1;