1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
29 solve A*x = b+w, with x and w subject to certain LCP conditions.
30 each x(i),w(i) must lie on one of the three line segments in the following
31 diagram. each line segment corresponds to one index set :
40 w=0 + +-----------------------+
47 +-------|-----------|-----------|----------> x(i)
50 the Dantzig algorithm proceeds as follows:
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
59 we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60 simpler, because the starting point for x(i),w(i) is always on the dotted
61 line x=0 and x will only ever increase in one direction, so it can only hit
62 two out of the three line segments.
68 this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69 the implementation is split into an LCP problem object (dLCP) and an LCP
70 driver function. most optimization occurs in the dLCP object.
72 a naive implementation of the algorithm requires either a lot of data motion
73 or a lot of permutation-array lookup, because we are constantly re-ordering
74 rows and columns. to avoid this and make a more optimized algorithm, a
75 non-trivial data structure is used to represent the matrix A (this is
76 implemented in the fast version of the dLCP object).
78 during execution of this algorithm, some indexes in A are clamped (set C),
79 some are non-clamped (set N), and some are "don't care" (where x=0).
80 A,x,b,w (and other problem vectors) are permuted such that the clamped
81 indexes are first, the unclamped indexes are next, and the don't-care
82 indexes are last. this permutation is recorded in the array `p'.
83 initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84 the corresponding elements of p are swapped.
86 because the C and N elements are grouped together in the rows of A, we can do
87 lots of work with a fast dot product function. if A,x,etc were not permuted
88 and we only had a permutation array, then those dot products would be much
89 slower as we would have a permutation array lookup in some inner loops.
91 A is accessed through an array of row pointers, so that element (i,j) of the
92 permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93 we still have to actually move the data.
95 during execution of this algorithm we maintain an L*D*L' factorization of
96 the clamped submatrix of A (call it `AC') which is the top left nC*nC
97 submatrix of A. there are two ways we could arrange the rows/columns in AC.
99 (1) AC is always permuted such that L*D*L' = AC. this causes a problem
100 when a row/column is removed from C, because then all the rows/columns of A
101 between the deleted index and the end of C need to be rotated downward.
102 this results in a lot of data motion and slows things down.
103 (2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104 itself a permutation of the underlying A). this is what we do - the
105 permutation is recorded in the vector C. call this permutation A[C,C].
106 when a row/column is removed from C, all we have to do is swap two
107 rows/columns and manipulate C.
111 #include <ode/common.h>
112 #include <ode/misc.h>
113 #include <ode/timer.h> // for testing
118 #include "mat.h" // for testing
119 #include "threaded_solver_ldlt.h"
121 #include "fastdot_impl.h"
122 #include "fastldltfactor_impl.h"
123 #include "fastldltsolve_impl.h"
126 //***************************************************************************
127 // code generation parameters
129 // LCP debugging (mostly for fast dLCP) - this slows things down a lot
132 #define dLCP_FAST // use fast dLCP object
134 #define NUB_OPTIMIZATIONS // use NUB optimizations
137 // option 1 : matrix row pointers (less data copying)
139 #define ATYPE dReal **
140 #define AROW(i) (m_A[i])
142 // option 2 : no matrix row pointers (slightly faster inner loops)
144 //#define ATYPE dReal *
145 //#define AROW(i) (m_A+(i)*m_nskip)
148 //***************************************************************************
150 #define dMIN(A,B) ((A)>(B) ? (B) : (A))
151 #define dMAX(A,B) ((B)>(A) ? (B) : (A))
154 #define LMATRIX_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
156 //***************************************************************************
159 // transfer b-values to x-values
160 template<bool zero_b
>
162 void transfer_b_to_x(dReal pairsbx
[PBX__MAX
], unsigned n
)
164 dReal
*const endbx
= pairsbx
+ (sizeint
)n
* PBX__MAX
;
165 for (dReal
*currbx
= pairsbx
; currbx
!= endbx
; currbx
+= PBX__MAX
) {
166 currbx
[PBX_X
] = currbx
[PBX_B
];
168 currbx
[PBX_B
] = REAL(0.0);
173 // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
174 // A is nskip. this only references and swaps the lower triangle.
175 // if `do_fast_row_swaps' is nonzero and row pointers are being used, then
176 // rows will be swapped by exchanging row pointers. otherwise the data will
180 void swapRowsAndCols (ATYPE A
, unsigned n
, unsigned i1
, unsigned i2
, unsigned nskip
,
181 int do_fast_row_swaps
)
183 dAASSERT (A
&& n
> 0 && i1
>= 0 && i2
>= 0 && i1
< n
&& i2
< n
&&
184 nskip
>= n
&& i1
< i2
);
189 for (unsigned i
=i1
+1; i
<i2
; ++i
) {
190 dReal
*A_i_i1
= A
[i
] + i1
;
197 // swap rows, by swapping row pointers
198 if (do_fast_row_swaps
) {
203 // Only swap till i2 column to match A plain storage variant.
204 for (unsigned k
= 0; k
<= i2
; ++k
) {
205 dxSwap(A_i1
[k
], A_i2
[k
]);
208 // swap columns the hard way
209 for (unsigned j
= i2
+ 1; j
< n
; ++j
) {
211 dxSwap(A_j
[i1
], A_j
[i2
]);
214 dReal
*A_i1
= A
+ (sizeint
)nskip
* i1
;
215 dReal
*A_i2
= A
+ (sizeint
)nskip
* i2
;
217 for (unsigned k
= 0; k
< i1
; ++k
) {
218 dxSwap(A_i1
[k
], A_i2
[k
]);
221 dReal
*A_i
= A_i1
+ nskip
;
222 for (unsigned i
= i1
+ 1; i
< i2
; A_i
+= nskip
, ++i
) {
223 dxSwap(A_i2
[i
], A_i
[i1
]);
226 dxSwap(A_i1
[i1
], A_i2
[i2
]);
228 dReal
*A_j
= A_i2
+ nskip
;
229 for (unsigned j
= i2
+ 1; j
< n
; A_j
+= nskip
, ++j
) {
230 dxSwap(A_j
[i1
], A_j
[i2
]);
236 // swap two indexes in the n*n LCP problem. i1 must be <= i2.
239 void swapProblem (ATYPE A
, dReal pairsbx
[PBX__MAX
], dReal
*w
, dReal pairslh
[PLH__MAX
],
240 unsigned *p
, bool *state
, int *findex
,
241 unsigned n
, unsigned i1
, unsigned i2
, unsigned nskip
,
242 int do_fast_row_swaps
)
244 dIASSERT (n
>0 && i1
< n
&& i2
< n
&& nskip
>= n
&& i1
<= i2
);
247 swapRowsAndCols (A
, n
, i1
, i2
, nskip
, do_fast_row_swaps
);
249 dxSwap((pairsbx
+ (sizeint
)i1
* PBX__MAX
)[PBX_B
], (pairsbx
+ (sizeint
)i2
* PBX__MAX
)[PBX_B
]);
250 dxSwap((pairsbx
+ (sizeint
)i1
* PBX__MAX
)[PBX_X
], (pairsbx
+ (sizeint
)i2
* PBX__MAX
)[PBX_X
]);
251 dSASSERT(PBX__MAX
== 2);
253 dxSwap(w
[i1
], w
[i2
]);
255 dxSwap((pairslh
+ (sizeint
)i1
* PLH__MAX
)[PLH_LO
], (pairslh
+ (sizeint
)i2
* PLH__MAX
)[PLH_LO
]);
256 dxSwap((pairslh
+ (sizeint
)i1
* PLH__MAX
)[PLH_HI
], (pairslh
+ (sizeint
)i2
* PLH__MAX
)[PLH_HI
]);
257 dSASSERT(PLH__MAX
== 2);
259 dxSwap(p
[i1
], p
[i2
]);
260 dxSwap(state
[i1
], state
[i2
]);
262 if (findex
!= NULL
) {
263 dxSwap(findex
[i1
], findex
[i2
]);
269 // for debugging - check that L,d is the factorization of A[C,C].
270 // A[C,C] has size nC*nC and leading dimension nskip.
271 // L has size nC*nC and leading dimension nskip.
277 void checkFactorization (ATYPE A
, dReal
*_L
, dReal
*_d
,
278 unsigned nC
, unsigned *C
, unsigned nskip
)
283 // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
285 for (i
=0; i
< nC
; i
++) {
286 for (j
= 0; j
<= i
; j
++) A1(i
, j
) = A1(j
, i
) = AROW(i
)[j
];
288 dMatrix A2
= A1
.select (nC
, C
, nC
, C
);
290 // printf ("A1=\n"); A1.print(); printf ("\n");
291 // printf ("A2=\n"); A2.print(); printf ("\n");
293 // compute A3 = L*D*L'
294 dMatrix
L (nC
, nC
, _L
, nskip
, 1);
296 for (i
= 0; i
< nC
; i
++) D(i
, i
) = 1.0 / _d
[i
];
297 L
.clearUpperTriangle();
298 for (i
= 0; i
< nC
; i
++) L(i
, i
) = 1;
299 dMatrix A3
= L
* D
* L
.transpose();
301 // printf ("L=\n"); L.print(); printf ("\n");
302 // printf ("D=\n"); D.print(); printf ("\n");
303 // printf ("A3=\n"); A2.print(); printf ("\n");
306 dReal diff
= A2
.maxDifference (A3
);
308 dDebug (0, "L*D*L' check, maximum difference = %.6e\n", diff
);
319 void checkPermutations (unsigned i
, unsigned n
, unsigned nC
, unsigned nN
, unsigned *p
, unsigned *C
)
322 dIASSERT (/*nC >= 0 && nN >= 0 && */(nC
+ nN
) == i
&& i
< n
);
323 for (k
=0; k
<i
; k
++) dIASSERT (p
[k
] >= 0 && p
[k
] < i
);
324 for (k
=i
; k
<n
; k
++) dIASSERT (p
[k
] == k
);
325 for (j
=0; j
<nC
; j
++) {
327 for (k
=0; k
<nC
; k
++) if (C
[k
]==j
) C_is_bad
= 0;
328 dIASSERT (C_is_bad
==0);
334 //***************************************************************************
335 // dLCP manipulator object. this represents an n*n LCP problem.
337 // two index sets C and N are kept. each set holds a subset of
338 // the variable indexes 0..n-1. an index can only be in one set.
339 // initially both sets are empty.
341 // the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
343 //***************************************************************************
344 // fast implementation of dLCP. see the above definition of dLCP for
345 // interface comments.
347 // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
348 // permuted as the other vectors/matrices are permuted.
350 // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
351 // contiguous indexes. the don't-care indexes follow N.
353 // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
354 // added or removed from the set C the factorization is updated.
355 // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
356 // the leading dimension of the matrix L is always `nskip'.
358 // at the start there may be other indexes that are unbounded but are not
359 // included in `nub'. dLCP will permute the matrix so that absolutely all
360 // unbounded vectors are at the start. thus there may be some initial
363 // the algorithms here assume certain patterns, particularly with respect to
370 const unsigned m_nskip
;
372 unsigned m_nC
, m_nN
; // size of each index set
373 ATYPE
const m_A
; // A rows
374 dReal
*const m_pairsbx
, *const m_w
, *const m_pairslh
; // permuted LCP problem data
375 dReal
*const m_L
, *const m_d
; // L*D*L' factorization of set C
376 dReal
*const m_Dell
, *const m_ell
, *const m_tmp
;
379 unsigned *const m_p
, *const m_C
;
381 dLCP (unsigned n
, unsigned nskip
, unsigned nub
, dReal
*Adata
, dReal
*pairsbx
, dReal
*w
,
382 dReal
*pairslh
, dReal
*L
, dReal
*d
,
383 dReal
*Dell
, dReal
*ell
, dReal
*tmp
,
384 bool *state
, int *findex
, unsigned *p
, unsigned *C
, dReal
**Arows
);
385 unsigned getNub() const { return m_nub
; }
386 void transfer_i_to_C (unsigned i
);
387 void transfer_i_to_N (unsigned /*i*/) { m_nN
++; } // because we can assume C and N span 1:i-1
388 void transfer_i_from_N_to_C (unsigned i
);
389 void transfer_i_from_C_to_N (unsigned i
, void *tmpbuf
);
390 static sizeint
estimate_transfer_i_from_C_to_N_mem_req(unsigned nC
, unsigned nskip
) { return dEstimateLDLTRemoveTmpbufSize(nC
, nskip
); }
391 unsigned numC() const { return m_nC
; }
392 unsigned numN() const { return m_nN
; }
393 unsigned indexC (unsigned i
) const { return i
; }
394 unsigned indexN (unsigned i
) const { return i
+m_nC
; }
395 dReal
Aii (unsigned i
) const { return AROW(i
)[i
]; }
396 template<unsigned q_stride
>
397 dReal
AiC_times_qC (unsigned i
, dReal
*q
) const { return calculateLargeVectorDot
<q_stride
> (AROW(i
), q
, m_nC
); }
398 template<unsigned q_stride
>
399 dReal
AiN_times_qN (unsigned i
, dReal
*q
) const { return calculateLargeVectorDot
<q_stride
> (AROW(i
) + m_nC
, q
+ (sizeint
)m_nC
* q_stride
, m_nN
); }
400 void pN_equals_ANC_times_qC (dReal
*p
, dReal
*q
);
401 void pN_plusequals_ANi (dReal
*p
, unsigned i
, bool dir_positive
);
402 template<unsigned p_stride
>
403 void pC_plusequals_s_times_qC (dReal
*p
, dReal s
, dReal
*q
);
404 void pN_plusequals_s_times_qN (dReal
*p
, dReal s
, dReal
*q
);
405 void solve1 (dReal
*a
, unsigned i
, bool dir_positive
, int only_transfer
=0);
411 dLCP::dLCP (unsigned n
, unsigned nskip
, unsigned nub
, dReal
*Adata
, dReal
*pairsbx
, dReal
*w
,
412 dReal
*pairslh
, dReal
*L
, dReal
*d
,
413 dReal
*Dell
, dReal
*ell
, dReal
*tmp
,
414 bool *state
, int *findex
, unsigned *p
, unsigned *C
, dReal
**Arows
):
415 m_n(n
), m_nskip(nskip
), m_nub(nub
), m_nC(0), m_nN(0),
421 m_pairsbx(pairsbx
), m_w(w
), m_pairslh(pairslh
),
422 m_L(L
), m_d(d
), m_Dell(Dell
), m_ell(ell
), m_tmp(tmp
),
423 m_state(state
), m_findex(findex
), m_p(p
), m_C(C
)
425 dxtSetZero
<PBX__MAX
>(pairsbx
+ PBX_X
, n
);
429 // make matrix row pointers
432 for (unsigned k
=0; k
<n
; aptr
+=nskip
, ++k
) A
[k
] = aptr
;
437 for (unsigned k
=0; k
!= n
; ++k
) p
[k
] = k
; // initially unpermutted
441 // for testing, we can do some random swaps in the area i > nub
444 for (unsigned k=0; k<100; k++) {
447 i1 = dRandInt(n-nub)+nub;
448 i2 = dRandInt(n-nub)+nub;
451 //printf ("--> %d %d\n",i1,i2);
452 swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, n, i1, i2, m_nskip, 0);
457 // permute the problem so that *all* the unbounded variables are at the
458 // start, i.e. look for unbounded variables not included in `nub'. we can
459 // potentially push up `nub' this way and get a bigger initial factorization.
460 // note that when we swap rows/cols here we must not just swap row pointers,
461 // as the initial factorization relies on the data being all in one chunk.
462 // variables that have findex >= 0 are *not* considered to be unbounded even
463 // if lo=-inf and hi=inf - this is because these limits may change during the
466 unsigned currNub
= nub
;
468 for (unsigned k
= currNub
; k
< n
; ++k
) {
469 if (findex
&& findex
[k
] >= 0) continue;
470 if ((pairslh
+ (sizeint
)k
* PLH__MAX
)[PLH_LO
] == -dInfinity
&& (pairslh
+ (sizeint
)k
* PLH__MAX
)[PLH_HI
] == dInfinity
) {
471 swapProblem (m_A
, m_pairsbx
, m_w
, pairslh
, m_p
, m_state
, findex
, n
, currNub
, k
, nskip
, 0);
477 // if there are unbounded variables at the start, factorize A up to that
478 // point and solve for x. this puts all indexes 0..currNub-1 into C.
482 for (unsigned j
= 0; j
< currNub
; Lrow
+= nskip
, ++j
) memcpy(Lrow
, AROW(j
), (j
+ 1) * sizeof(dReal
));
484 transfer_b_to_x
<false> (m_pairsbx
, currNub
);
485 factorMatrixAsLDLT
<1> (m_L
, m_d
, currNub
, nskip
);
486 solveEquationSystemWithLDLT
<1, PBX__MAX
> (m_L
, m_d
, m_pairsbx
+ PBX_X
, currNub
, nskip
);
487 dSetZero (m_w
, currNub
);
490 for (unsigned k
= 0; k
< currNub
; ++k
) C
[k
] = k
;
495 // permute the indexes > currNub such that all findex variables are at the end
497 unsigned num_at_end
= 0;
498 for (unsigned k
= m_n
; k
> currNub
; ) {
500 if (findex
[k
] >= 0) {
501 swapProblem (m_A
, m_pairsbx
, m_w
, m_pairslh
, m_p
, m_state
, findex
, m_n
, k
, m_n
- 1 - num_at_end
, nskip
, 1);
507 // print info about indexes
510 for (unsigned k=0; k<n; k++) {
511 if (k<currNub) printf ("C");
512 else if ((m_pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (m_pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) printf ("c");
521 void dLCP::transfer_i_to_C (unsigned i
)
524 const unsigned nC
= m_nC
;
527 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
528 dReal
*const Ltgt
= m_L
+ (sizeint
)m_nskip
* nC
, *ell
= m_ell
;
529 memcpy(Ltgt
, ell
, nC
* sizeof(dReal
));
531 dReal ell_Dell_dot
= dxDot(m_ell
, m_Dell
, nC
);
532 dReal AROW_i_i
= AROW(i
)[i
] != ell_Dell_dot
? AROW(i
)[i
] : dNextAfter(AROW(i
)[i
], dInfinity
); // A hack to avoid getting a zero in the denominator
533 m_d
[nC
] = dRecip (AROW_i_i
- ell_Dell_dot
);
536 m_d
[0] = dRecip (AROW(i
)[i
]);
539 swapProblem (m_A
, m_pairsbx
, m_w
, m_pairslh
, m_p
, m_state
, m_findex
, m_n
, nC
, i
, m_nskip
, 1);
542 m_nC
= nC
+ 1; // nC value is outdated after this line
546 checkFactorization (m_A
, m_L
, m_d
, m_nC
, m_C
, m_nskip
);
547 if (i
< (m_n
-1)) checkPermutations (i
+1, m_n
, m_nC
, m_nN
, m_p
, m_C
);
552 void dLCP::transfer_i_from_N_to_C (unsigned i
)
555 const unsigned nC
= m_nC
;
558 dReal
*const aptr
= AROW(i
);
559 dReal
*Dell
= m_Dell
;
560 const unsigned *C
= m_C
;
561 # ifdef NUB_OPTIMIZATIONS
562 // if nub>0, initial part of aptr unpermuted
563 const unsigned nub
= m_nub
;
565 for ( ; j
<nub
; ++j
) Dell
[j
] = aptr
[j
];
566 for ( ; j
<nC
; ++j
) Dell
[j
] = aptr
[C
[j
]];
568 for (unsigned j
=0; j
<nC
; ++j
) Dell
[j
] = aptr
[C
[j
]];
571 solveL1Straight
<1>(m_L
, m_Dell
, nC
, m_nskip
);
573 dReal ell_Dell_dot
= REAL(0.0);
574 dReal
*const Ltgt
= m_L
+ (sizeint
)m_nskip
* nC
;
575 dReal
*ell
= m_ell
, *Dell
= m_Dell
, *d
= m_d
;
576 for (unsigned j
= 0; j
< nC
; ++j
) {
577 dReal ell_j
, Dell_j
= Dell
[j
];
578 Ltgt
[j
] = ell
[j
] = ell_j
= Dell_j
* d
[j
];
579 ell_Dell_dot
+= ell_j
* Dell_j
;
582 dReal AROW_i_i
= AROW(i
)[i
] != ell_Dell_dot
? AROW(i
)[i
] : dNextAfter(AROW(i
)[i
], dInfinity
); // A hack to avoid getting a zero in the denominator
583 m_d
[nC
] = dRecip (AROW_i_i
- ell_Dell_dot
);
586 m_d
[0] = dRecip (AROW(i
)[i
]);
589 swapProblem (m_A
, m_pairsbx
, m_w
, m_pairslh
, m_p
, m_state
, m_findex
, m_n
, nC
, i
, m_nskip
, 1);
593 m_nC
= nC
+ 1; // nC value is outdated after this line
597 // if we just finish here then we'll go back and re-solve for
598 // delta_x. but actually we can be more efficient and incrementally
599 // update delta_x here. but if we do this, we wont have ell and Dell
600 // to use in updating the factorization later.
603 checkFactorization (m_A
,m_L
,m_d
,m_nC
,m_C
,m_nskip
);
608 void dLCP::transfer_i_from_C_to_N (unsigned i
, void *tmpbuf
)
612 // remove a row/column from the factorization, and adjust the
613 // indexes (black magic!)
615 const unsigned nC
= m_nC
;
617 for ( ; j
< nC
; ++j
) {
618 if (C
[j
] == nC
- 1) {
622 dxLDLTRemove (m_A
, C
, m_L
, m_d
, m_n
, nC
, j
, m_nskip
, tmpbuf
);
624 if (last_idx
== -1) {
625 for (k
= j
+ 1 ; k
< nC
; ++k
) {
626 if (C
[k
] == nC
- 1) {
636 if (j
!= (nC
- 1)) memmove (C
+ j
, C
+ j
+ 1, (nC
- j
- 1) * sizeof(C
[0]));
642 swapProblem (m_A
, m_pairsbx
, m_w
, m_pairslh
, m_p
, m_state
, m_findex
, m_n
, i
, nC
- 1, m_nskip
, 1);
645 m_nC
= nC
- 1; // nC value is outdated after this line
649 checkFactorization (m_A
, m_L
, m_d
, m_nC
, m_C
, m_nskip
);
654 void dLCP::pN_equals_ANC_times_qC (dReal
*p
, dReal
*q
)
656 // we could try to make this matrix-vector multiplication faster using
657 // outer product matrix tricks, e.g. with the dMultidotX() functions.
658 // but i tried it and it actually made things slower on random 100x100
659 // problems because of the overhead involved. so we'll stick with the
660 // simple method for now.
661 const unsigned nC
= m_nC
;
662 dReal
*ptgt
= p
+ nC
;
663 const unsigned nN
= m_nN
;
664 for (unsigned i
= 0; i
< nN
; ++i
) {
665 ptgt
[i
] = dxDot (AROW(i
+ nC
), q
, nC
);
670 void dLCP::pN_plusequals_ANi (dReal
*p
, unsigned i
, bool dir_positive
)
672 const unsigned nC
= m_nC
;
673 dReal
*aptr
= AROW(i
) + nC
;
674 dReal
*ptgt
= p
+ nC
;
676 const unsigned nN
= m_nN
;
677 for (unsigned j
=0; j
< nN
; ++j
) ptgt
[j
] += aptr
[j
];
680 const unsigned nN
= m_nN
;
681 for (unsigned j
=0; j
< nN
; ++j
) ptgt
[j
] -= aptr
[j
];
685 template<unsigned p_stride
>
686 void dLCP::pC_plusequals_s_times_qC (dReal
*p
, dReal s
, dReal
*q
)
688 const unsigned nC
= m_nC
;
689 dReal
*q_end
= q
+ nC
;
690 for (; q
!= q_end
; p
+= p_stride
, ++q
) {
695 void dLCP::pN_plusequals_s_times_qN (dReal
*p
, dReal s
, dReal
*q
)
697 const unsigned nC
= m_nC
;
698 dReal
*ptgt
= p
+ nC
, *qsrc
= q
+ nC
;
699 const unsigned nN
= m_nN
;
700 for (unsigned i
= 0; i
< nN
; ++i
) {
701 ptgt
[i
] += s
* qsrc
[i
];
705 void dLCP::solve1 (dReal
*a
, unsigned i
, bool dir_positive
, int only_transfer
)
707 // the `Dell' and `ell' that are computed here are saved. if index i is
708 // later added to the factorization then they can be reused.
710 // @@@ question: do we need to solve for entire delta_x??? yes, but
711 // only if an x goes below 0 during the step.
713 const unsigned nC
= m_nC
;
716 dReal
*Dell
= m_Dell
;
718 dReal
*aptr
= AROW(i
);
719 # ifdef NUB_OPTIMIZATIONS
720 // if nub>0, initial part of aptr[] is guaranteed unpermuted
721 const unsigned nub
= m_nub
;
723 for ( ; j
< nub
; ++j
) Dell
[j
] = aptr
[j
];
724 for ( ; j
< nC
; ++j
) Dell
[j
] = aptr
[C
[j
]];
726 for (unsigned j
= 0; j
< nC
; ++j
) Dell
[j
] = aptr
[C
[j
]];
729 solveL1Straight
<1>(m_L
, m_Dell
, nC
, m_nskip
);
731 dReal
*ell
= m_ell
, *Dell
= m_Dell
, *d
= m_d
;
732 for (unsigned j
= 0; j
< nC
; ++j
) ell
[j
] = Dell
[j
] * d
[j
];
735 if (!only_transfer
) {
736 dReal
*tmp
= m_tmp
, *ell
= m_ell
;
738 for (unsigned j
= 0; j
< nC
; ++j
) tmp
[j
] = ell
[j
];
740 solveL1Transposed
<1>(m_L
, tmp
, nC
, m_nskip
);
744 for (unsigned j
= 0; j
< nC
; ++j
) a
[C
[j
]] = -tmp
[j
];
748 for (unsigned j
= 0; j
< nC
; ++j
) a
[C
[j
]] = tmp
[j
];
755 void dLCP::unpermute_X()
758 dReal
*pairsbx
= m_pairsbx
;
759 const unsigned n
= m_n
;
760 for (unsigned j
= 0; j
< n
; ++j
) {
763 // p[j] = j; -- not going to be checked anymore anyway
764 dReal x_j
= (pairsbx
+ (sizeint
)j
* PBX__MAX
)[PBX_X
];
766 dxSwap(x_j
, (pairsbx
+ (sizeint
)k
* PBX__MAX
)[PBX_X
]);
768 unsigned orig_k
= p
[k
];
775 (pairsbx
+ (sizeint
)j
* PBX__MAX
)[PBX_X
] = x_j
;
780 void dLCP::unpermute_W()
782 memcpy (m_tmp
, m_w
, m_n
* sizeof(dReal
));
784 const unsigned *p
= m_p
;
785 dReal
*w
= m_w
, *tmp
= m_tmp
;
786 const unsigned n
= m_n
;
787 for (unsigned j
= 0; j
< n
; ++j
) {
796 static void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena
*memarena
, unsigned n
, dReal
*A
, dReal pairsbx
[PBX__MAX
]);
797 static void dxSolveLCP_Generic (dxWorldProcessMemArena
*memarena
, unsigned n
, dReal
*A
, dReal pairsbx
[PBX__MAX
],
798 dReal
*outer_w
/*=NULL*/, unsigned nub
, dReal pairslh
[PLH__MAX
], int *findex
);
801 void dxSolveLCP (dxWorldProcessMemArena
*memarena
, unsigned n
, dReal
*A
, dReal pairsbx
[PBX__MAX
],
802 dReal
*outer_w
/*=NULL*/, unsigned nub
, dReal pairslh
[PLH__MAX
], int *findex
)
806 dxSolveLCP_AllUnbounded (memarena
, n
, A
, pairsbx
);
810 dxSolveLCP_Generic (memarena
, n
, A
, pairsbx
, outer_w
, nub
, pairslh
, findex
);
814 //***************************************************************************
815 // if all the variables are unbounded then we can just factor, solve, and return
818 void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena
*memarena
, unsigned n
, dReal
*A
, dReal pairsbx
[PBX__MAX
])
821 dAASSERT(pairsbx
!= NULL
);
824 transfer_b_to_x
<true>(pairsbx
, n
);
826 unsigned nskip
= dPAD(n
);
827 factorMatrixAsLDLT
<PBX__MAX
> (A
, pairsbx
+ PBX_B
, n
, nskip
);
828 solveEquationSystemWithLDLT
<PBX__MAX
, PBX__MAX
> (A
, pairsbx
+ PBX_B
, pairsbx
+ PBX_X
, n
, nskip
);
831 //***************************************************************************
832 // an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
835 void dxSolveLCP_Generic (dxWorldProcessMemArena
*memarena
, unsigned n
, dReal
*A
, dReal pairsbx
[PBX__MAX
],
836 dReal
*outer_w
/*=NULL*/, unsigned nub
, dReal pairslh
[PLH__MAX
], int *findex
)
838 dAASSERT (n
> 0 && A
&& pairsbx
&& pairslh
&& nub
>= 0 && nub
< n
);
841 // check restrictions on lo and hi
842 dReal
*endlh
= pairslh
+ (sizeint
)n
* PLH__MAX
;
843 for (dReal
*currlh
= pairslh
; currlh
!= endlh
; currlh
+= PLH__MAX
) dIASSERT (currlh
[PLH_LO
] <= 0 && currlh
[PLH_HI
] >= 0);
847 const unsigned nskip
= dPAD(n
);
848 dReal
*L
= memarena
->AllocateOveralignedArray
<dReal
> ((sizeint
)nskip
* n
, LMATRIX_ALIGNMENT
);
849 dReal
*d
= memarena
->AllocateArray
<dReal
> (n
);
850 dReal
*w
= outer_w
!= NULL
? outer_w
: memarena
->AllocateArray
<dReal
> (n
);
851 dReal
*delta_w
= memarena
->AllocateArray
<dReal
> (n
);
852 dReal
*delta_x
= memarena
->AllocateArray
<dReal
> (n
);
853 dReal
*Dell
= memarena
->AllocateArray
<dReal
> (n
);
854 dReal
*ell
= memarena
->AllocateArray
<dReal
> (n
);
856 dReal
**Arows
= memarena
->AllocateArray
<dReal
*> (n
);
858 dReal
**Arows
= NULL
;
860 unsigned *p
= memarena
->AllocateArray
<unsigned> (n
);
861 unsigned *C
= memarena
->AllocateArray
<unsigned> (n
);
863 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
864 bool *state
= memarena
->AllocateArray
<bool> (n
);
866 // create LCP object. note that tmp is set to delta_w to save space, this
867 // optimization relies on knowledge of how tmp is used, so be careful!
868 dLCP
lcp(n
, nskip
, nub
, A
, pairsbx
, w
, pairslh
, L
, d
, Dell
, ell
, delta_w
, state
, findex
, p
, C
, Arows
);
869 unsigned adj_nub
= lcp
.getNub();
871 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
872 // LCP conditions then i is added to the appropriate index set. otherwise
873 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
874 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
875 // while driving x(i) we maintain the LCP conditions on the other variables
876 // 0..i-1. we do this by watching out for other x(i),w(i) values going
877 // outside the valid region, and then switching them between index sets
878 // when that happens.
880 bool hit_first_friction_index
= false;
881 for (unsigned i
= adj_nub
; i
< n
; ++i
) {
882 bool s_error
= false;
883 // the index i is the driving index and indexes i+1..n-1 are "dont care",
884 // i.e. when we make changes to the system those x's will be zero and we
885 // don't care what happens to those w's. in other words, we only consider
886 // an (i+1)*(i+1) sub-problem of A*x=b+w.
888 // if we've hit the first friction index, we have to compute the lo and
889 // hi values based on the values of x already computed. we have been
890 // permuting the indexes, so the values stored in the findex vector are
891 // no longer valid. thus we have to temporarily unpermute the x vector.
892 // for the purposes of this computation, 0*infinity = 0 ... so if the
893 // contact constraint's normal force is 0, there should be no tangential
896 if (!hit_first_friction_index
&& findex
&& findex
[i
] >= 0) {
897 // un-permute x into delta_w, which is not being used at the moment
898 for (unsigned j
= 0; j
< n
; ++j
) delta_w
[p
[j
]] = (pairsbx
+ (sizeint
)j
* PBX__MAX
)[PBX_X
];
900 // set lo and hi values
901 for (unsigned k
= i
; k
< n
; ++k
) {
902 dReal
*currlh
= pairslh
+ (sizeint
)k
* PLH__MAX
;
903 dReal wfk
= delta_w
[findex
[k
]];
909 currlh
[PLH_HI
] = dFabs (currlh
[PLH_HI
] * wfk
);
910 currlh
[PLH_LO
] = -currlh
[PLH_HI
];
913 hit_first_friction_index
= true;
916 // thus far we have not even been computing the w values for indexes
917 // greater than i, so compute w[i] now.
918 dReal wPrep
= lcp
.AiC_times_qC
<PBX__MAX
> (i
, pairsbx
+ PBX_X
) + lcp
.AiN_times_qN
<PBX__MAX
> (i
, pairsbx
+ PBX_X
);
920 dReal
*currbx
= pairsbx
+ (sizeint
)i
* PBX__MAX
;
922 w
[i
] = wPrep
- currbx
[PBX_B
];
924 // if lo=hi=0 (which can happen for tangential friction when normals are
925 // 0) then the index will be assigned to set N with some state. however,
926 // set C's line has zero size, so the index will always remain in set N.
927 // with the "normal" switching logic, if w changed sign then the index
928 // would have to switch to set C and then back to set N with an inverted
929 // state. this is pointless, and also computationally expensive. to
930 // prevent this from happening, we use the rule that indexes with lo=hi=0
931 // will never be checked for set changes. this means that the state for
932 // these indexes may be incorrect, but that doesn't matter.
934 dReal
*currlh
= pairslh
+ (sizeint
)i
* PLH__MAX
;
936 // see if x(i),w(i) is in a valid region
937 if (currlh
[PLH_LO
] == 0 && w
[i
] >= 0) {
938 lcp
.transfer_i_to_N (i
);
941 else if (currlh
[PLH_HI
] == 0 && w
[i
] <= 0) {
942 lcp
.transfer_i_to_N (i
);
945 else if (w
[i
] == 0) {
946 // this is a degenerate case. by the time we get to this test we know
947 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
948 // and similarly that hi > 0. this means that the line segment
949 // corresponding to set C is at least finite in extent, and we are on it.
950 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
951 lcp
.solve1 (delta_x
, i
, false, 1);
953 lcp
.transfer_i_to_C (i
);
956 // we must push x(i) and w(i)
958 // find direction to push on x(i)
959 bool dir_positive
= (w
[i
] <= 0);
961 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
962 lcp
.solve1 (delta_x
, i
, dir_positive
);
964 // note that delta_x[i] = (dir_positive ? 1 : -1), but we wont bother to set it
966 // compute: delta_w = A*delta_x ... note we only care about
967 // delta_w(N) and delta_w(i), the rest is ignored
968 lcp
.pN_equals_ANC_times_qC (delta_w
, delta_x
);
969 lcp
.pN_plusequals_ANi (delta_w
, i
, dir_positive
);
970 delta_w
[i
] = dir_positive
971 ? lcp
.AiC_times_qC
<1> (i
, delta_x
) + lcp
.Aii(i
)
972 : lcp
.AiC_times_qC
<1> (i
, delta_x
) - lcp
.Aii(i
);
974 // find largest step we can take (size=s), either to drive x(i),w(i)
975 // to the valid LCP region or to drive an already-valid variable
976 // outside the valid region.
978 int cmd
= 1; // index switching command
979 unsigned si
= 0; // si = index to switch if cmd>3
981 dReal s
= delta_w
[i
] != REAL(0.0)
983 : (w
[i
] != REAL(0.0) ? dCopySign(dInfinity
, -w
[i
]) : REAL(0.0));
986 if (currlh
[PLH_HI
] < dInfinity
) {
987 dReal s2
= (currlh
[PLH_HI
] - currbx
[PBX_X
]); // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
995 if (currlh
[PLH_LO
] > -dInfinity
) {
996 dReal s2
= (currbx
[PBX_X
] - currlh
[PLH_LO
]); // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
1005 const unsigned numN
= lcp
.numN();
1006 for (unsigned k
= 0; k
< numN
; ++k
) {
1007 const unsigned indexN_k
= lcp
.indexN(k
);
1008 if (!state
[indexN_k
] ? delta_w
[indexN_k
] < 0 : delta_w
[indexN_k
] > 0) {
1009 // don't bother checking if lo=hi=0
1010 dReal
*indexlh
= pairslh
+ (sizeint
)indexN_k
* PLH__MAX
;
1011 if (indexlh
[PLH_LO
] == 0 && indexlh
[PLH_HI
] == 0) continue;
1012 dReal s2
= -w
[indexN_k
] / delta_w
[indexN_k
];
1023 const unsigned numC
= lcp
.numC();
1024 for (unsigned k
= adj_nub
; k
< numC
; ++k
) {
1025 const unsigned indexC_k
= lcp
.indexC(k
);
1026 dReal
*indexlh
= pairslh
+ (sizeint
)indexC_k
* PLH__MAX
;
1027 if (delta_x
[indexC_k
] < 0 && indexlh
[PLH_LO
] > -dInfinity
) {
1028 dReal s2
= (indexlh
[PLH_LO
] - (pairsbx
+ (sizeint
)indexC_k
* PBX__MAX
)[PBX_X
]) / delta_x
[indexC_k
];
1035 if (delta_x
[indexC_k
] > 0 && indexlh
[PLH_HI
] < dInfinity
) {
1036 dReal s2
= (indexlh
[PLH_HI
] - (pairsbx
+ (sizeint
)indexC_k
* PBX__MAX
)[PBX_X
]) / delta_x
[indexC_k
];
1046 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
1047 // "C->NL","C->NH"};
1048 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
1050 // if s <= 0 then we've got a problem. if we just keep going then
1051 // we're going to get stuck in an infinite loop. instead, just cross
1052 // our fingers and exit with the current solution.
1053 if (s
<= REAL(0.0)) {
1054 dMessage (d_ERR_LCP
, "LCP internal error, s <= 0 (s=%.4e)",(double)s
);
1056 dxtSetZero
<PBX__MAX
>(currbx
+ PBX_X
, n
- i
);
1057 dxSetZero (w
+ i
, n
- i
);
1063 // apply x = x + s * delta_x
1064 lcp
.pC_plusequals_s_times_qC
<PBX__MAX
> (pairsbx
+ PBX_X
, s
, delta_x
);
1065 currbx
[PBX_X
] = dir_positive
1067 : currbx
[PBX_X
] - s
;
1069 // apply w = w + s * delta_w
1070 lcp
.pN_plusequals_s_times_qN (w
, s
, delta_w
);
1071 w
[i
] += s
* delta_w
[i
];
1074 // switch indexes between sets if necessary
1078 lcp
.transfer_i_to_C (i
);
1081 currbx
[PBX_X
] = currlh
[PLH_LO
];
1083 lcp
.transfer_i_to_N (i
);
1086 currbx
[PBX_X
] = currlh
[PLH_HI
];
1088 lcp
.transfer_i_to_N (i
);
1090 case 4: // keep going
1092 lcp
.transfer_i_from_N_to_C (si
);
1094 case 5: // keep going
1095 (pairsbx
+ (sizeint
)si
* PBX__MAX
)[PBX_X
] = (pairslh
+ (sizeint
)si
* PLH__MAX
)[PLH_LO
];
1097 tmpbuf
= memarena
->PeekBufferRemainder();
1098 lcp
.transfer_i_from_C_to_N (si
, tmpbuf
);
1100 case 6: // keep going
1101 (pairsbx
+ (sizeint
)si
* PBX__MAX
)[PBX_X
] = (pairslh
+ (sizeint
)si
* PLH__MAX
)[PLH_HI
];
1103 tmpbuf
= memarena
->PeekBufferRemainder();
1104 lcp
.transfer_i_from_C_to_N (si
, tmpbuf
);
1108 if (cmd
<= 3) break;
1115 } // for (unsigned i = adj_nub; i < n; ++i)
1117 // now we have to un-permute x and w
1118 if (outer_w
!= NULL
) {
1121 lcp
.unpermute_X(); // This destroys p[] and must be done last
1124 sizeint
dxEstimateSolveLCPMemoryReq(unsigned n
, bool outer_w_avail
)
1126 const unsigned nskip
= dPAD(n
);
1130 res
+= dOVERALIGNED_SIZE(sizeof(dReal
) * ((sizeint
)n
* nskip
), LMATRIX_ALIGNMENT
); // for L
1131 res
+= 5 * dEFFICIENT_SIZE(sizeof(dReal
) * n
); // for d, delta_w, delta_x, Dell, ell
1132 if (!outer_w_avail
) {
1133 res
+= dEFFICIENT_SIZE(sizeof(dReal
) * n
); // for w
1136 res
+= dEFFICIENT_SIZE(sizeof(dReal
*) * n
); // for Arows
1138 res
+= 2 * dEFFICIENT_SIZE(sizeof(unsigned) * n
); // for p, C
1139 res
+= dEFFICIENT_SIZE(sizeof(bool) * n
); // for state
1141 // Use n instead of nC as nC varies at runtime while n is greater or equal to nC
1142 sizeint lcp_transfer_req
= dLCP::estimate_transfer_i_from_C_to_N_mem_req(n
, nskip
);
1143 res
+= dEFFICIENT_SIZE(lcp_transfer_req
); // for dLCP::transfer_i_from_C_to_N
1149 //***************************************************************************
1150 // accuracy and timing test
1152 static sizeint
EstimateTestSolveLCPMemoryReq(unsigned n
)
1154 const unsigned nskip
= dPAD(n
);
1158 res
+= 2 * dEFFICIENT_SIZE(sizeof(dReal
) * ((sizeint
)n
* nskip
)); // for A, A2
1159 res
+= 7 * dEFFICIENT_SIZE(sizeof(dReal
) * n
); // for x, b, w, lo, hi, tmp1, tmp2
1160 res
+= dEFFICIENT_SIZE(sizeof(dReal
) * PBX__MAX
* n
); // for pairsbx,
1161 res
+= dEFFICIENT_SIZE(sizeof(dReal
) * PLH__MAX
* n
); // for pairslh
1163 res
+= dxEstimateSolveLCPMemoryReq(n
, true);
1168 extern "C" ODE_API
int dTestSolveLCP()
1170 const unsigned n
= 100;
1172 sizeint memreq
= EstimateTestSolveLCPMemoryReq(n
);
1173 dxWorldProcessMemArena
*arena
= dxAllocateTemporaryWorldProcessMemArena(memreq
, NULL
, NULL
);
1174 if (arena
== NULL
) {
1177 arena
->ResetState();
1179 unsigned i
,nskip
= dPAD(n
);
1181 const dReal tol
= REAL(1e-9);
1184 const dReal tol
= REAL(1e-4);
1186 printf ("dTestSolveLCP()\n");
1188 dReal
*A
= arena
->AllocateArray
<dReal
> (n
*nskip
);
1189 dReal
*x
= arena
->AllocateArray
<dReal
> (n
);
1190 dReal
*b
= arena
->AllocateArray
<dReal
> (n
);
1191 dReal
*w
= arena
->AllocateArray
<dReal
> (n
);
1192 dReal
*lo
= arena
->AllocateArray
<dReal
> (n
);
1193 dReal
*hi
= arena
->AllocateArray
<dReal
> (n
);
1195 dReal
*A2
= arena
->AllocateArray
<dReal
> (n
*nskip
);
1196 dReal
*pairsbx
= arena
->AllocateArray
<dReal
> (n
* PBX__MAX
);
1197 dReal
*pairslh
= arena
->AllocateArray
<dReal
> (n
* PLH__MAX
);
1199 dReal
*tmp1
= arena
->AllocateArray
<dReal
> (n
);
1200 dReal
*tmp2
= arena
->AllocateArray
<dReal
> (n
);
1202 double total_time
= 0;
1203 for (unsigned count
=0; count
< 1000; count
++) {
1204 BEGIN_STATE_SAVE(arena
, saveInner
) {
1206 // form (A,b) = a random positive definite LCP problem
1207 dMakeRandomMatrix (A2
,n
,n
,1.0);
1208 dMultiply2 (A
,A2
,A2
,n
,n
,n
);
1209 dMakeRandomMatrix (x
,n
,1,1.0);
1210 dMultiply0 (b
,A
,x
,n
,n
,1);
1211 for (i
=0; i
<n
; i
++) b
[i
] += (dRandReal()*REAL(0.2))-REAL(0.1);
1213 // choose `nub' in the range 0..n-1
1214 unsigned nub
= 50; //dRandInt (n);
1217 for (i
=0; i
<nub
; i
++) lo
[i
] = -dInfinity
;
1218 for (i
=0; i
<nub
; i
++) hi
[i
] = dInfinity
;
1219 //for (i=nub; i<n; i++) lo[i] = 0;
1220 //for (i=nub; i<n; i++) hi[i] = dInfinity;
1221 //for (i=nub; i<n; i++) lo[i] = -dInfinity;
1222 //for (i=nub; i<n; i++) hi[i] = 0;
1223 for (i
=nub
; i
<n
; i
++) lo
[i
] = -(dRandReal()*REAL(1.0))-REAL(0.01);
1224 for (i
=nub
; i
<n
; i
++) hi
[i
] = (dRandReal()*REAL(1.0))+REAL(0.01);
1226 // set a few limits to lo=hi=0
1228 for (i=0; i<10; i++) {
1229 unsigned j = dRandInt (n-nub) + nub;
1235 // solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
1236 // SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
1237 // ensure that it doesn't get referenced (if it does, the answer will be
1240 memcpy (A2
, A
, n
* nskip
* sizeof(dReal
));
1241 dClearUpperTriangle (A2
, n
);
1242 for (i
= 0; i
!= n
; ++i
) {
1243 dReal
*currbx
= pairsbx
+ i
* PBX__MAX
;
1244 currbx
[PBX_B
] = b
[i
];
1247 for (i
= 0; i
!= n
; ++i
) {
1248 dReal
*currlh
= pairslh
+ i
* PLH__MAX
;
1249 currlh
[PLH_LO
] = lo
[i
];
1250 currlh
[PLH_HI
] = hi
[i
];
1255 dStopwatchReset (&sw
);
1256 dStopwatchStart (&sw
);
1258 dxSolveLCP (arena
,n
,A2
,pairsbx
,w
,nub
,pairslh
,0);
1260 dStopwatchStop (&sw
);
1261 double time
= dStopwatchTime(&sw
);
1263 double average
= total_time
/ double(count
+1) * 1000.0;
1265 for (i
= 0; i
!= n
; ++i
) {
1266 const dReal
*currbx
= pairsbx
+ i
* PBX__MAX
;
1267 x
[i
] = currbx
[PBX_X
];
1270 // check the solution
1272 dMultiply0 (tmp1
,A
,x
,n
,n
,1);
1273 for (i
=0; i
<n
; i
++) tmp2
[i
] = b
[i
] + w
[i
];
1274 dReal diff
= dMaxDifference (tmp1
,tmp2
,n
,1);
1275 // printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
1276 // diff > tol ? "FAILED" : "passed");
1277 if (diff
> tol
) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff
);
1278 unsigned n1
=0,n2
=0,n3
=0;
1279 for (i
=0; i
<n
; i
++) {
1280 if (x
[i
]==lo
[i
] && w
[i
] >= 0) {
1283 else if (x
[i
]==hi
[i
] && w
[i
] <= 0) {
1286 else if (x
[i
] >= lo
[i
] && x
[i
] <= hi
[i
] && w
[i
] == 0) {
1290 dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i
,
1291 x
[i
],w
[i
],lo
[i
],hi
[i
]);
1296 printf ("passed: NL=%3d NH=%3d C=%3d ",n1
,n2
,n3
);
1297 printf ("time=%10.3f ms avg=%10.4f\n",time
* 1000.0,average
);
1299 } END_STATE_SAVE(arena
, saveInner
);
1302 dxFreeTemporaryWorldProcessMemArena(arena
);