2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011-2012, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu
.ufl
.cise
.klu
.tdouble
;
27 import edu
.ufl
.cise
.klu
.common
.KLU_common
;
28 import edu
.ufl
.cise
.klu
.common
.KLU_numeric
;
29 import edu
.ufl
.cise
.klu
.common
.KLU_symbolic
;
31 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu_dump
.klu_valid
;
34 * Solve Ax=b using the symbolic and numeric objects from KLU_analyze
35 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
36 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
37 * of size 4n double's (note that columns 2 to 4 of Xwork overlap with
40 public class Dklu_solve
extends Dklu_internal
{
46 * @param d leading dimension of B
47 * @param nrhs number of right-hand-sides
48 * @param B right-hand-side on input, overwritten with solution to Ax=b on
49 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
53 public static int klu_solve(KLU_symbolic Symbolic
, KLU_numeric Numeric
,
54 int d
, int nrhs
, double[] B
, KLU_common Common
)
57 double[] x
= new double[4] ;
59 double[] Offx
, X
, Bz
, Udiag
, Rs
;
60 int[] Q
, R
, Pnum
, Offp
, Offi
, Lip
, Uip
, Llen
, Ulen
;
62 int k1
, k2
, nk
, k
, block
, pend
, n
, p
, nblocks
, chunk
, nr
, i
;
64 /* ---------------------------------------------------------------------- */
66 /* ---------------------------------------------------------------------- */
72 if (Numeric
== null || Symbolic
== null || d
< Symbolic
.n
|| nrhs
< 0 ||
75 Common
.status
= KLU_INVALID
;
78 Common
.status
= KLU_OK
;
80 /* ---------------------------------------------------------------------- */
81 /* get the contents of the Symbolic object */
82 /* ---------------------------------------------------------------------- */
86 nblocks
= Symbolic
.nblocks
;
90 /* ---------------------------------------------------------------------- */
91 /* get the contents of the Numeric object */
92 /* ---------------------------------------------------------------------- */
94 ASSERT (nblocks
== Numeric
.nblocks
) ;
101 Llen
= Numeric
.Llen
;
103 Ulen
= Numeric
.Ulen
;
104 LUbx
= Numeric
.LUbx
;
105 Udiag
= Numeric
.Udiag
;
110 ASSERT (klu_valid (n
, Offp
, Offi
, Offx
)) ;
112 /* ---------------------------------------------------------------------- */
113 /* solve in chunks of 4 columns at a time */
114 /* ---------------------------------------------------------------------- */
116 for (chunk
= 0 ; chunk
< nrhs
; chunk
+= 4)
119 /* ------------------------------------------------------------------ */
120 /* get the size of the current chunk */
121 /* ------------------------------------------------------------------ */
123 nr
= MIN (nrhs
- chunk
, 4) ;
125 /* ------------------------------------------------------------------ */
126 /* scale and permute the right hand side, X = P*(R\B) */
127 /* ------------------------------------------------------------------ */
138 for (k
= 0 ; k
< n
; k
++)
140 X
[k
] = Bz
[Pnum
[k
]] ;
146 for (k
= 0 ; k
< n
; k
++)
150 X
[2*k
+ 1] = Bz
[i
+ d
] ;
156 for (k
= 0 ; k
< n
; k
++)
160 X
[3*k
+ 1] = Bz
[i
+ d
] ;
161 X
[3*k
+ 2] = Bz
[i
+ d
*2] ;
167 for (k
= 0 ; k
< n
; k
++)
171 X
[4*k
+ 1] = Bz
[i
+ d
] ;
172 X
[4*k
+ 2] = Bz
[i
+ d
*2] ;
173 X
[4*k
+ 3] = Bz
[i
+ d
*3] ;
187 for (k
= 0 ; k
< n
; k
++)
189 X
[k
] = Bz
[Pnum
[k
]] / Rs
[k
] ;
190 //SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
196 for (k
= 0 ; k
< n
; k
++)
200 X
[2*k
] = Bz
[i
] / rs
;
201 //SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
202 X
[2*k
+ 1] = Bz
[i
+ d
] / rs
;
203 //SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
209 for (k
= 0 ; k
< n
; k
++)
213 X
[3*k
] = Bz
[i
] / rs
;
214 //SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
215 X
[3*k
+ 1] = Bz
[i
+ d
] / rs
;
216 //SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
217 X
[3*k
+ 2] = Bz
[i
+ d
*2] / rs
;
218 //SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
224 for (k
= 0 ; k
< n
; k
++)
228 X
[4*k
] = Bz
[i
] / rs
;
229 //SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
230 X
[4*k
+ 1] = Bz
[i
+ d
] / rs
;
231 //SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
232 X
[4*k
+ 2] = Bz
[i
+ d
*2] / rs
;
233 //SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
234 X
[4*k
+ 3] = Bz
[i
+ d
*3] / rs
;
235 //SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
241 /* ------------------------------------------------------------------ */
242 /* solve X = (L*U + Off)\X */
243 /* ------------------------------------------------------------------ */
245 for (block
= nblocks
-1 ; block
>= 0 ; block
--)
248 /* -------------------------------------------------------------- */
249 /* the block of size nk is from rows/columns k1 to k2-1 */
250 /* -------------------------------------------------------------- */
255 PRINTF ("solve %d, k1 %d k2-1 %d nk %d\n", block
, k1
,k2
-1,nk
) ;
257 /* solve the block system */
265 X
[k1
] = X
[k1
] / s
;
266 //DIV (X [k1], X [k1], s) ;
270 X
[2*k1
] = X
[2*k1
] / s
;
271 //DIV (X [2*k1], X [2*k1], s) ;
272 X
[2*k1
+ 1] = X
[2*k1
+ 1] / s
;
273 //DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
277 X
[3*k1
] = X
[3*k1
] / s
;
278 //DIV (X [3*k1], X [3*k1], s) ;
279 X
[3*k1
+ 1] = X
[3*k1
+ 1] / s
;
280 //DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
281 X
[3*k1
+ 2] = X
[3*k1
+ 2] / s
;
282 //DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
286 X
[4*k1
] = X
[4*k1
] / s
;
287 //DIV (X [4*k1], X [4*k1], s) ;
288 X
[4*k1
+ 1] = X
[4*k1
+ 1] / s
;
289 //DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
290 X
[4*k1
+ 2] = X
[4*k1
+ 2] / s
;
291 //DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
292 X
[4*k1
+ 3] = X
[4*k1
+ 3] / s
;
293 //DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
300 klu_lsolve (nk
, Lip
+ k1
, Llen
+ k1
,
301 LUbx
[block
], nr
, X
+ nr
*k1
) ;
302 klu_usolve (nk
, Uip
+ k1
, Ulen
+ k1
,
303 LUbx
[block
], Udiag
+ k1
, nr
, X
+ nr
*k1
) ;
306 /* -------------------------------------------------------------- */
307 /* block back-substitution for the off-diagonal-block entries */
308 /* -------------------------------------------------------------- */
317 for (k
= k1
; k
< k2
; k
++)
321 for (p
= Offp
[k
] ; p
< pend
; p
++)
323 X
[Offi
[p
]] -= Offx
[p
] * x
[0] ;
324 //MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
331 for (k
= k1
; k
< k2
; k
++)
335 x
[1] = X
[2*k
+ 1] ;
336 for (p
= Offp
[k
] ; p
< pend
; p
++)
340 X
[2*i
] -= offik
* x
[0] ;
341 //MULT_SUB (X [2*i], offik, x [0]) ;
342 X
[2*i
+ 1] -= offik
* x
[1] ;
343 //MULT_SUB (X [2*i + 1], offik, x [1]) ;
350 for (k
= k1
; k
< k2
; k
++)
354 x
[1] = X
[3*k
+ 1] ;
355 x
[2] = X
[3*k
+ 2] ;
356 for (p
= Offp
[k
] ; p
< pend
; p
++)
360 X
[3*i
] -= offik
* x
[0] ;
361 //MULT_SUB (X [3*i], offik, x [0]) ;
362 X
[3*i
+ 1] -= offik
* x
[1] ;
363 //MULT_SUB (X [3*i + 1], offik, x [1]) ;
364 X
[3*i
+ 2] -= offik
* x
[2] ;
365 //MULT_SUB (X [3*i + 2], offik, x [2]) ;
372 for (k
= k1
; k
< k2
; k
++)
376 x
[1] = X
[4*k
+ 1] ;
377 x
[2] = X
[4*k
+ 2] ;
378 x
[3] = X
[4*k
+ 3] ;
379 for (p
= Offp
[k
] ; p
< pend
; p
++)
383 X
[4*i
] -= offik
* x
[0] ;
384 //MULT_SUB (X [4*i], offik, x [0]) ;
385 X
[4*i
+ 1] -= offik
* x
[1] ;
386 //MULT_SUB (X [4*i + 1], offik, x [1]) ;
387 X
[4*i
+ 2] -= offik
* x
[2] ;
388 //MULT_SUB (X [4*i + 2], offik, x [2]) ;
389 X
[4*i
+ 3] -= offik
* x
[3] ;
390 //MULT_SUB (X [4*i + 3], offik, x [3]) ;
398 /* ------------------------------------------------------------------ */
399 /* permute the result, Bz = Q*X */
400 /* ------------------------------------------------------------------ */
407 for (k
= 0 ; k
< n
; k
++)
415 for (k
= 0 ; k
< n
; k
++)
419 Bz
[i
+ d
] = X
[2*k
+ 1] ;
425 for (k
= 0 ; k
< n
; k
++)
429 Bz
[i
+ d
] = X
[3*k
+ 1] ;
430 Bz
[i
+ d
*2] = X
[3*k
+ 2] ;
436 for (k
= 0 ; k
< n
; k
++)
440 Bz
[i
+ d
] = X
[4*k
+ 1] ;
441 Bz
[i
+ d
*2] = X
[4*k
+ 2] ;
442 Bz
[i
+ d
*3] = X
[4*k
+ 3] ;
447 /* ------------------------------------------------------------------ */
448 /* go to the next chunk of B */
449 /* ------------------------------------------------------------------ */