2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011-2012, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu
.ufl
.cise
.klu
.tdouble
;
27 import edu
.ufl
.cise
.klu
.common
.KLU_common
;
28 import edu
.ufl
.cise
.klu
.common
.KLU_numeric
;
29 import edu
.ufl
.cise
.klu
.common
.KLU_symbolic
;
31 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu_dump
.klu_valid
;
32 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu
.klu_ltsolve
;
33 import static edu
.ufl
.cise
.klu
.tdouble
.Dklu
.klu_utsolve
;
36 * Solve A'x=b using the symbolic and numeric objects from KLU_analyze
37 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
38 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
39 * of size 4n double's (note that columns 2 to 4 of Xwork overlap with
42 public class Dklu_tsolve
extends Dklu_internal
{
48 * @param d leading dimension of B
49 * @param nrhs number of right-hand-sides
50 * @param B right-hand-side on input, overwritten with solution to Ax=b on
51 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
54 public static int klu_tsolve(KLU_symbolic Symbolic
,
55 KLU_numeric Numeric
, int d
, int nrhs
,
56 double[] B
, int B_offset
, KLU_common Common
)
58 double[] x
= new double[4] ;
62 double[] Offx
, X
, Bz
, Udiag
;
63 int[] Q
, R
, Pnum
, Offp
, Offi
, Lip
, Uip
, Llen
, Ulen
;
65 int k1
, k2
, nk
, k
, block
, pend
, n
, p
, nblocks
, chunk
, nr
, i
;
67 /* ---------------------------------------------------------------------- */
69 /* ---------------------------------------------------------------------- */
75 if (Numeric
== null || Symbolic
== null || d
< Symbolic
.n
|| nrhs
< 0 ||
78 Common
.status
= KLU_INVALID
;
81 Common
.status
= KLU_OK
;
83 /* ---------------------------------------------------------------------- */
84 /* get the contents of the Symbolic object */
85 /* ---------------------------------------------------------------------- */
89 nblocks
= Symbolic
.nblocks
;
93 /* ---------------------------------------------------------------------- */
94 /* get the contents of the Numeric object */
95 /* ---------------------------------------------------------------------- */
97 ASSERT (nblocks
== Numeric
.nblocks
) ;
100 Offi
= Numeric
.Offi
;
101 Offx
= Numeric
.Offx
;
104 Llen
= Numeric
.Llen
;
106 Ulen
= Numeric
.Ulen
;
107 LUbx
= Numeric
.LUbx
;
108 Udiag
= Numeric
.Udiag
;
112 if (!NDEBUG
) ASSERT (klu_valid (n
, Offp
, Offi
, Offx
)) ;
114 /* ---------------------------------------------------------------------- */
115 /* solve in chunks of 4 columns at a time */
116 /* ---------------------------------------------------------------------- */
118 for (chunk
= 0 ; chunk
< nrhs
; chunk
+= 4)
121 /* ------------------------------------------------------------------ */
122 /* get the size of the current chunk */
123 /* ------------------------------------------------------------------ */
125 nr
= MIN (nrhs
- chunk
, 4) ;
127 /* ------------------------------------------------------------------ */
128 /* permute the right hand side, X = Q'*B */
129 /* ------------------------------------------------------------------ */
136 for (k
= 0 ; k
< n
; k
++)
138 X
[k
] = Bz
[B_offset
+ Q
[k
]] ;
144 for (k
= 0 ; k
< n
; k
++)
147 X
[2*k
] = Bz
[B_offset
+ i
] ;
148 X
[2*k
+ 1] = Bz
[B_offset
+ i
+ d
] ;
154 for (k
= 0 ; k
< n
; k
++)
157 X
[3*k
] = Bz
[B_offset
+ i
] ;
158 X
[3*k
+ 1] = Bz
[B_offset
+ i
+ d
] ;
159 X
[3*k
+ 2] = Bz
[B_offset
+ i
+ d
*2] ;
165 for (k
= 0 ; k
< n
; k
++)
168 X
[4*k
] = Bz
[B_offset
+ i
] ;
169 X
[4*k
+ 1] = Bz
[B_offset
+ i
+ d
] ;
170 X
[4*k
+ 2] = Bz
[B_offset
+ i
+ d
*2] ;
171 X
[4*k
+ 3] = Bz
[B_offset
+ i
+ d
*3] ;
177 /* ------------------------------------------------------------------ */
178 /* solve X = (L*U + Off)'\X */
179 /* ------------------------------------------------------------------ */
181 for (block
= 0 ; block
< nblocks
; block
++)
184 /* -------------------------------------------------------------- */
185 /* the block of size nk is from rows/columns k1 to k2-1 */
186 /* -------------------------------------------------------------- */
191 PRINTF ("tsolve %d, k1 %d k2-1 %d nk %d\n", block
, k1
,k2
-1,nk
) ;
193 /* -------------------------------------------------------------- */
194 /* block back-substitution for the off-diagonal-block entries */
195 /* -------------------------------------------------------------- */
204 for (k
= k1
; k
< k2
; k
++)
207 for (p
= Offp
[k
] ; p
< pend
; p
++)
210 //MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
211 X
[k
] -= Offx
[p
] * X
[Offi
[p
]] ;
219 for (k
= k1
; k
< k2
; k
++)
223 x
[1] = X
[2*k
+ 1] ;
224 for (p
= Offp
[k
] ; p
< pend
; p
++)
230 //MULT_SUB (x [0], offik, X [2*i]) ;
231 x
[0] -= offik
* X
[2*i
] ;
232 //MULT_SUB (x [1], offik, X [2*i + 1]) ;
233 x
[1] -= offik
* X
[2*i
+ 1] ;
236 X
[2*k
+ 1] = x
[1] ;
242 for (k
= k1
; k
< k2
; k
++)
246 x
[1] = X
[3*k
+ 1] ;
247 x
[2] = X
[3*k
+ 2] ;
248 for (p
= Offp
[k
] ; p
< pend
; p
++)
254 //MULT_SUB (x [0], offik, X [3*i]) ;
255 x
[0] -= offik
* X
[3*i
] ;
256 //MULT_SUB (x [1], offik, X [3*i + 1]) ;
257 x
[1] -= offik
* X
[3*i
+ 1] ;
258 //MULT_SUB (x [2], offik, X [3*i + 2]) ;
259 x
[2] -= offik
* X
[3*i
+ 2] ;
262 X
[3*k
+ 1] = x
[1] ;
263 X
[3*k
+ 2] = x
[2] ;
269 for (k
= k1
; k
< k2
; k
++)
273 x
[1] = X
[4*k
+ 1] ;
274 x
[2] = X
[4*k
+ 2] ;
275 x
[3] = X
[4*k
+ 3] ;
276 for (p
= Offp
[k
] ; p
< pend
; p
++)
282 //MULT_SUB (x [0], offik, X [4*i]) ;
283 x
[0] -= offik
* X
[4*i
] ;
284 //MULT_SUB (x [1], offik, X [4*i + 1]) ;
285 x
[1] -= offik
* X
[4*i
+ 1] ;
286 //MULT_SUB (x [2], offik, X [4*i + 2]) ;
287 x
[2] -= offik
* X
[4*i
+ 2] ;
288 //MULT_SUB (x [3], offik, X [4*i + 3]) ;
289 x
[3] -= offik
* X
[4*i
+ 3] ;
292 X
[4*k
+ 1] = x
[1] ;
293 X
[4*k
+ 2] = x
[2] ;
294 X
[4*k
+ 3] = x
[3] ;
300 /* -------------------------------------------------------------- */
301 /* solve the block system */
302 /* -------------------------------------------------------------- */
313 //DIV (X [k1], X [k1], s) ;
314 X
[k1
] = X
[k1
] / s
;
318 //DIV (X [2*k1], X [2*k1], s) ;
319 X
[2*k1
] = X
[2*k1
] / s
;
320 //DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
321 X
[2*k1
+ 1] = X
[2*k1
+ 1] / s
;
325 //DIV (X [3*k1], X [3*k1], s) ;
326 X
[3*k1
] = X
[3*k1
] / s
;
327 //DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
328 X
[3*k1
+ 1] = X
[3*k1
+ 1] / s
;
329 //DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
330 X
[3*k1
+ 2] = X
[3*k1
+ 2] / s
;
334 //DIV (X [4*k1], X [4*k1], s) ;
335 X
[4*k1
] = X
[4*k1
] / s
;
336 //DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
337 X
[4*k1
+ 1] = X
[4*k1
+ 1] / s
;
338 //DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
339 X
[4*k1
+ 2] = X
[4*k1
+ 2] / s
;
340 //DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
341 X
[4*k1
+ 3] = X
[4*k1
+ 3] / s
;
348 klu_utsolve (nk
, Uip
, k1
, Ulen
, k1
, LUbx
[block
],
349 Udiag
, k1
, nr
, X
, nr
*k1
) ;
350 klu_ltsolve (nk
, Lip
, k1
, Llen
, k1
, LUbx
[block
], nr
,
355 /* ------------------------------------------------------------------ */
356 /* scale and permute the result, Bz = P'(R\X) */
357 /* ------------------------------------------------------------------ */
368 for (k
= 0 ; k
< n
; k
++)
370 Bz
[B_offset
+ Pnum
[k
]] = X
[k
] ;
376 for (k
= 0 ; k
< n
; k
++)
379 Bz
[B_offset
+ i
] = X
[2*k
] ;
380 Bz
[B_offset
+ i
+ d
] = X
[2*k
+ 1] ;
386 for (k
= 0 ; k
< n
; k
++)
389 Bz
[B_offset
+ i
] = X
[3*k
] ;
390 Bz
[B_offset
+ i
+ d
] = X
[3*k
+ 1] ;
391 Bz
[B_offset
+ i
+ d
*2] = X
[3*k
+ 2] ;
397 for (k
= 0 ; k
< n
; k
++)
400 Bz
[B_offset
+ i
] = X
[4*k
] ;
401 Bz
[B_offset
+ i
+ d
] = X
[4*k
+ 1] ;
402 Bz
[B_offset
+ i
+ d
*2] = X
[4*k
+ 2] ;
403 Bz
[B_offset
+ i
+ d
*3] = X
[4*k
+ 3] ;
417 for (k
= 0 ; k
< n
; k
++)
419 //SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
420 Bz
[B_offset
+ Pnum
[k
]] = X
[k
] / Rs
[k
] ;
426 for (k
= 0 ; k
< n
; k
++)
430 //SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
431 Bz
[B_offset
+ i
] = X
[2*k
] / rs
;
432 //SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
433 Bz
[B_offset
+ i
+ d
] = X
[2*k
+ 1] / rs
;
439 for (k
= 0 ; k
< n
; k
++)
443 //SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
444 Bz
[B_offset
+ i
] = X
[3*k
] / rs
;
445 //SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
446 Bz
[B_offset
+ i
+ d
] = X
[3*k
+ 1] / rs
;
447 //SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
448 Bz
[B_offset
+ i
+ d
*2] = X
[3*k
+ 2] / rs
;
454 for (k
= 0 ; k
< n
; k
++)
458 //SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
459 Bz
[B_offset
+ i
] = X
[4*k
] / rs
;
460 //SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
461 Bz
[B_offset
+ i
+ d
] = X
[4*k
+ 1] / rs
;
462 //SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
463 Bz
[B_offset
+ i
+ d
*2] = X
[4*k
+ 2] / rs
;
464 //SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
465 Bz
[B_offset
+ i
+ d
*3] = X
[4*k
+ 3] / rs
;
471 /* ------------------------------------------------------------------ */
472 /* go to the next chunk of B */
473 /* ------------------------------------------------------------------ */