2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011-2012, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu
.ufl
.cise
.klu
.tdouble
;
27 import edu
.ufl
.cise
.klu
.common
.KLU_common
;
28 import edu
.ufl
.cise
.klu
.common
.KLU_numeric
;
29 import edu
.ufl
.cise
.klu
.common
.KLU_symbolic
;
31 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu_dump
.klu_valid
;
32 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu
.klu_ltsolve
;
33 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu
.klu_utsolve
;
36 * Solve A'x=b using the symbolic and numeric objects from KLU_analyze
37 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
38 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
39 * of size 4n double's (note that columns 2 to 4 of Xwork overlap with
42 public class Dklu_tsolve
extends Dklu_internal
{
48 * @param d leading dimension of B
49 * @param nrhs number of right-hand-sides
50 * @param B right-hand-side on input, overwritten with solution to Ax=b on
51 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
54 public static int klu_tsolve(KLU_symbolic Symbolic
,
55 KLU_numeric Numeric
, int d
, int nrhs
,
56 double[] B
, int B_offset
, KLU_common Common
)
58 double[] x
= new double[4] ;
62 double[] Offx
, X
, Bz
, Udiag
;
63 int[] Q
, R
, Pnum
, Offp
, Offi
, Lip
, Uip
, Llen
, Ulen
;
65 int k1
, k2
, nk
, k
, block
, pend
, n
, p
, nblocks
, chunk
, nr
, i
;
67 /* ---------------------------------------------------------------------- */
69 /* ---------------------------------------------------------------------- */
75 if (Numeric
== null || Symbolic
== null || d
< Symbolic
.n
|| nrhs
< 0 ||
78 Common
.status
= KLU_INVALID
;
81 Common
.status
= KLU_OK
;
83 /* ---------------------------------------------------------------------- */
84 /* get the contents of the Symbolic object */
85 /* ---------------------------------------------------------------------- */
89 nblocks
= Symbolic
.nblocks
;
93 /* ---------------------------------------------------------------------- */
94 /* get the contents of the Numeric object */
95 /* ---------------------------------------------------------------------- */
97 ASSERT (nblocks
== Numeric
.nblocks
) ;
100 Offi
= Numeric
.Offi
;
101 Offx
= Numeric
.Offx
;
104 Llen
= Numeric
.Llen
;
106 Ulen
= Numeric
.Ulen
;
107 LUbx
= Numeric
.LUbx
;
108 Udiag
= Numeric
.Udiag
;
112 ASSERT (klu_valid (n
, Offp
, Offi
, Offx
)) ;
114 /* ---------------------------------------------------------------------- */
115 /* solve in chunks of 4 columns at a time */
116 /* ---------------------------------------------------------------------- */
118 for (chunk
= 0 ; chunk
< nrhs
; chunk
+= 4)
121 /* ------------------------------------------------------------------ */
122 /* get the size of the current chunk */
123 /* ------------------------------------------------------------------ */
125 nr
= MIN (nrhs
- chunk
, 4) ;
127 /* ------------------------------------------------------------------ */
128 /* permute the right hand side, X = Q'*B */
129 /* ------------------------------------------------------------------ */
136 for (k
= 0 ; k
< n
; k
++)
138 X
[k
] = Bz
[B_offset
+ Q
[k
]] ;
144 for (k
= 0 ; k
< n
; k
++)
147 X
[2*k
] = Bz
[B_offset
+ i
] ;
148 X
[2*k
+ 1] = Bz
[B_offset
+ i
+ d
] ;
154 for (k
= 0 ; k
< n
; k
++)
157 X
[3*k
] = Bz
[B_offset
+ i
] ;
158 X
[3*k
+ 1] = Bz
[B_offset
+ i
+ d
] ;
159 X
[3*k
+ 2] = Bz
[B_offset
+ i
+ d
*2] ;
165 for (k
= 0 ; k
< n
; k
++)
168 X
[4*k
] = Bz
[B_offset
+ i
] ;
169 X
[4*k
+ 1] = Bz
[B_offset
+ i
+ d
] ;
170 X
[4*k
+ 2] = Bz
[B_offset
+ i
+ d
*2] ;
171 X
[4*k
+ 3] = Bz
[B_offset
+ i
+ d
*3] ;
177 /* ------------------------------------------------------------------ */
178 /* solve X = (L*U + Off)'\X */
179 /* ------------------------------------------------------------------ */
181 for (block
= 0 ; block
< nblocks
; block
++)
184 /* -------------------------------------------------------------- */
185 /* the block of size nk is from rows/columns k1 to k2-1 */
186 /* -------------------------------------------------------------- */
191 PRINTF ("tsolve %d, k1 %d k2-1 %d nk %d\n", block
, k1
,k2
-1,nk
) ;
193 /* -------------------------------------------------------------- */
194 /* block back-substitution for the off-diagonal-block entries */
195 /* -------------------------------------------------------------- */
204 for (k
= k1
; k
< k2
; k
++)
207 for (p
= Offp
[k
] ; p
< pend
; p
++)
210 X
[k
] -= Offx
[p
] * X
[Offi
[p
]] ;
211 //MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
219 for (k
= k1
; k
< k2
; k
++)
223 x
[1] = X
[2*k
+ 1] ;
224 for (p
= Offp
[k
] ; p
< pend
; p
++)
230 x
[0] -= offik
* X
[2*i
] ;
231 //MULT_SUB (x [0], offik, X [2*i]) ;
232 x
[1] -= offik
* X
[2*i
+ 1] ;
233 //MULT_SUB (x [1], offik, X [2*i + 1]) ;
236 X
[2*k
+ 1] = x
[1] ;
242 for (k
= k1
; k
< k2
; k
++)
246 x
[1] = X
[3*k
+ 1] ;
247 x
[2] = X
[3*k
+ 2] ;
248 for (p
= Offp
[k
] ; p
< pend
; p
++)
254 x
[0] -= offik
* X
[3*i
] ;
255 //MULT_SUB (x [0], offik, X [3*i]) ;
256 x
[1] -= offik
* X
[3*i
+ 1] ;
257 //MULT_SUB (x [1], offik, X [3*i + 1]) ;
258 x
[2] -= offik
* X
[3*i
+ 2] ;
259 //MULT_SUB (x [2], offik, X [3*i + 2]) ;
262 X
[3*k
+ 1] = x
[1] ;
263 X
[3*k
+ 2] = x
[2] ;
269 for (k
= k1
; k
< k2
; k
++)
273 x
[1] = X
[4*k
+ 1] ;
274 x
[2] = X
[4*k
+ 2] ;
275 x
[3] = X
[4*k
+ 3] ;
276 for (p
= Offp
[k
] ; p
< pend
; p
++)
282 x
[0] -= offik
* X
[4*i
] ;
283 //MULT_SUB (x [0], offik, X [4*i]) ;
284 x
[1] -= offik
* X
[4*i
+ 1] ;
285 //MULT_SUB (x [1], offik, X [4*i + 1]) ;
286 x
[2] -= offik
* X
[4*i
+ 2] ;
287 //MULT_SUB (x [2], offik, X [4*i + 2]) ;
288 x
[3] -= offik
* X
[4*i
+ 3] ;
289 //MULT_SUB (x [3], offik, X [4*i + 3]) ;
292 X
[4*k
+ 1] = x
[1] ;
293 X
[4*k
+ 2] = x
[2] ;
294 X
[4*k
+ 3] = x
[3] ;
300 /* -------------------------------------------------------------- */
301 /* solve the block system */
302 /* -------------------------------------------------------------- */
313 X
[k1
] = X
[k1
] / s
;
314 //DIV (X [k1], X [k1], s) ;
318 X
[2*k1
] = X
[2*k1
] / s
;
319 //DIV (X [2*k1], X [2*k1], s) ;
320 X
[2*k1
+ 1] = X
[2*k1
+ 1] / s
;
321 //DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
325 X
[3*k1
] = X
[3*k1
] / s
;
326 //DIV (X [3*k1], X [3*k1], s) ;
327 X
[3*k1
+ 1] = X
[3*k1
+ 1] / s
;
328 //DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
329 X
[3*k1
+ 2] = X
[3*k1
+ 2] / s
;
330 //DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
334 X
[4*k1
] = X
[4*k1
] / s
;
335 //DIV (X [4*k1], X [4*k1], s) ;
336 X
[4*k1
+ 1] = X
[4*k1
+ 1] / s
;
337 //DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
338 X
[4*k1
+ 2] = X
[4*k1
+ 2] / s
;
339 //DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
340 X
[4*k1
+ 3] = X
[4*k1
+ 3] / s
;
341 //DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
348 klu_utsolve (nk
, Uip
, k1
, Ulen
, k1
, LUbx
[block
],
349 Udiag
, k1
, nr
, X
, nr
*k1
) ;
350 klu_ltsolve (nk
, Lip
, k1
, Llen
, k1
, LUbx
[block
], nr
,
355 /* ------------------------------------------------------------------ */
356 /* scale and permute the result, Bz = P'(R\X) */
357 /* ------------------------------------------------------------------ */
368 for (k
= 0 ; k
< n
; k
++)
370 Bz
[B_offset
+ Pnum
[k
]] = X
[k
] ;
376 for (k
= 0 ; k
< n
; k
++)
379 Bz
[B_offset
+ i
] = X
[2*k
] ;
380 Bz
[B_offset
+ i
+ d
] = X
[2*k
+ 1] ;
386 for (k
= 0 ; k
< n
; k
++)
389 Bz
[B_offset
+ i
] = X
[3*k
] ;
390 Bz
[B_offset
+ i
+ d
] = X
[3*k
+ 1] ;
391 Bz
[B_offset
+ i
+ d
*2] = X
[3*k
+ 2] ;
397 for (k
= 0 ; k
< n
; k
++)
400 Bz
[B_offset
+ i
] = X
[4*k
] ;
401 Bz
[B_offset
+ i
+ d
] = X
[4*k
+ 1] ;
402 Bz
[B_offset
+ i
+ d
*2] = X
[4*k
+ 2] ;
403 Bz
[B_offset
+ i
+ d
*3] = X
[4*k
+ 3] ;
417 for (k
= 0 ; k
< n
; k
++)
419 Bz
[B_offset
+ Pnum
[k
]] = X
[k
] / Rs
[k
] ;
420 //SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
426 for (k
= 0 ; k
< n
; k
++)
430 Bz
[B_offset
+ i
] = X
[2*k
] / rs
;
431 //SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
432 Bz
[B_offset
+ i
+ d
] = X
[2*k
+ 1] / rs
;
433 //SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
439 for (k
= 0 ; k
< n
; k
++)
443 Bz
[B_offset
+ i
] = X
[3*k
] / rs
;
444 //SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
445 Bz
[B_offset
+ i
+ d
] = X
[3*k
+ 1] / rs
;
446 //SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
447 Bz
[B_offset
+ i
+ d
*2] = X
[3*k
+ 2] / rs
;
448 //SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
454 for (k
= 0 ; k
< n
; k
++)
458 Bz
[B_offset
+ i
] = X
[4*k
] / rs
;
459 //SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
460 Bz
[B_offset
+ i
+ d
] = X
[4*k
+ 1] / rs
;
461 //SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
462 Bz
[B_offset
+ i
+ d
*2] = X
[4*k
+ 2] / rs
;
463 //SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
464 Bz
[B_offset
+ i
+ d
*3] = X
[4*k
+ 3] / rs
;
465 //SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
471 /* ------------------------------------------------------------------ */
472 /* go to the next chunk of B */
473 /* ------------------------------------------------------------------ */