2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu
.ufl
.cise
.klu
.tdouble
;
28 * Solve Ax=b using the symbolic and numeric objects from KLU_analyze
29 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
30 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
31 * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
34 public class Dklu_solve
{
40 * @param d leading dimension of B
41 * @param nrhs number of right-hand-sides
42 * @param B right-hand-side on input, overwritten with solution to Ax=b on
43 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
47 public static boolean klu_solve(KLU_symbolic Symbolic
, KLU_numeric Numeric
,
48 int d
, int nrhs
, double[] B
, KLU_common Common
)
51 Entry
[] x
= new Entry
[4];
53 Entry Offx
, X
, Bz
, Udiag
;
54 int[] Q
, R
, Pnum
, Offp
, Offi
, Lip
, Uip
, Llen
, Ulen
;
56 int k1
, k2
, nk
, k
, block
, pend
, n
, p
, nblocks
, chunk
, nr
, i
;
58 /* ------------------------------------------------------------------ */
60 /* ------------------------------------------------------------------ */
66 if (Numeric
== null || Symbolic
== null || d
< Symbolic
.n
|| nrhs
< 0 ||
69 Common
.status
= KLU_INVALID
;
72 Common
.status
= KLU_OK
;
74 /* ------------------------------------------------------------------ */
75 /* get the contents of the Symbolic object */
76 /* ------------------------------------------------------------------ */
80 nblocks
= Symbolic
.nblocks
;
84 /* ------------------------------------------------------------------ */
85 /* get the contents of the Numeric object */
86 /* ------------------------------------------------------------------ */
88 assert (nblocks
== Numeric
.nblocks
);
92 Offx
= (Entry
) Numeric
.Offx
;
98 LUbx
= (Unit
[]) Numeric
.LUbx
;
99 Udiag
= Numeric
.Udiag
;
102 X
= (Entry
) Numeric
.Xwork
;
104 assert Dklu_valid
.klu_valid(n
, Offp
, Offi
, Offx
);
106 /* ------------------------------------------------------------------ */
107 /* solve in chunks of 4 columns at a time */
108 /* ------------------------------------------------------------------ */
110 for (chunk
= 0; chunk
< nrhs
; chunk
+= 4)
113 /* -------------------------------------------------------------- */
114 /* get the size of the current chunk */
115 /* -------------------------------------------------------------- */
117 nr
= min(nrhs
- chunk
, 4);
119 /* -------------------------------------------------------------- */
120 /* scale and permute the right hand side, X = P*(R\B) */
121 /* -------------------------------------------------------------- */
132 for (k
= 0; k
< n
; k
++)
140 for (k
= 0; k
< n
; k
++)
144 X
[2*k
+ 1] = Bz
[i
+ d
];
150 for (k
= 0; k
< n
; k
++)
154 X
[3*k
+ 1] = Bz
[i
+ d
];
155 X
[3*k
+ 2] = Bz
[i
+ d
*2];
161 for (k
= 0; k
< n
; k
++)
165 X
[4*k
+ 1] = Bz
[i
+ d
];
166 X
[4*k
+ 2] = Bz
[i
+ d
*2];
167 X
[4*k
+ 3] = Bz
[i
+ d
*3];
181 for (k
= 0; k
< n
; k
++)
183 scale_div_assign(X
[k
], Bz
[Pnum
[k
]], Rs
[k
]);
189 for (k
= 0; k
< n
; k
++)
193 scale_div_assign(X
[2*k
], Bz
[i
], rs
);
194 scale_div_assign(X
[2*k
+ 1], Bz
[i
+ d
], rs
);
200 for (k
= 0; k
< n
; k
++)
204 scale_div_assign(X
[3*k
], Bz
[i
], rs
);
205 scale_div_assign(X
[3*k
+ 1], Bz
[i
+ d
], rs
);
206 scale_div_assign(X
[3*k
+ 2], Bz
[i
+ d
*2], rs
);
212 for (k
= 0; k
< n
; k
++)
216 scale_div_assign(X
[4*k
], Bz
[i
], rs
);
217 scale_div_assign(X
[4*k
+ 1], Bz
[i
+ d
], rs
);
218 scale_div_assign(X
[4*k
+ 2], Bz
[i
+ d
*2], rs
);
219 scale_div_assign(X
[4*k
+ 3], Bz
[i
+ d
*3], rs
);
226 /* -------------------------------------------------------------- */
227 /* solve X = (L*U + Off)\X */
228 /* -------------------------------------------------------------- */
230 for (block
= nblocks
-1; block
>= 0; block
--)
233 /* ---------------------------------------------------------- */
234 /* the block of size nk is from rows/columns k1 to k2-1 */
235 /* ---------------------------------------------------------- */
240 printf("solve %d, k1 %d k2-1 %d nk %d\n", block
, k1
, k2
-1, nk
);
242 /* solve the block system */
250 div(X
[k1
], X
[k1
], s
);
254 div(X
[2*k1
], X
[2*k1
], s
);
255 div(X
[2*k1
+ 1], X
[2*k1
+ 1], s
);
259 div(X
[3*k1
], X
[3*k1
], s
);
260 div(X
[3*k1
+ 1], X
[3*k1
+ 1], s
);
261 div(X
[3*k1
+ 2], X
[3*k1
+ 2], s
);
265 div(X
[4*k1
], X
[4*k1
], s
);
266 div(X
[4*k1
+ 1], X
[4*k1
+ 1], s
);
267 div(X
[4*k1
+ 2], X
[4*k1
+ 2], s
);
268 div(X
[4*k1
+ 3], X
[4*k1
+ 3], s
);
275 Dklu_lsolve
.klu_lsolve(nk
, Lip
+ k1
, Llen
+ k1
, LUbx
[block
],
277 Dklu_usolve
.klu_usolve(nk
, Uip
+ k1
, Ulen
+ k1
, LUbx
[block
],
278 Udiag
+ k1
, nr
, X
+ nr
*k1
);
281 /* ---------------------------------------------------------- */
282 /* block back-substitution for the off-diagonal-block entries */
283 /* ---------------------------------------------------------- */
292 for (k
= k1
; k
< k2
; k
++)
296 for (p
= Offp
[k
]; p
< pend
; p
++)
298 mult_sub(X
[Offi
[p
]], Offx
[p
], x
[0]);
305 for (k
= k1
; k
< k2
; k
++)
310 for (p
= Offp
[k
]; p
< pend
; p
++)
314 mult_sub(X
[2*i
], offik
, x
[0]);
315 mult_sub(X
[2*i
+ 1], offik
, x
[1]);
322 for (k
= k1
; k
< k2
; k
++)
328 for (p
= Offp
[k
]; p
< pend
; p
++)
332 mult_sub(X
[3*i
], offik
, x
[0]);
333 mult_sub(X
[3*i
+ 1], offik
, x
[1]);
334 mult_sub(X
[3*i
+ 2], offik
, x
[2]);
341 for (k
= k1
; k
< k2
; k
++)
348 for (p
= Offp
[k
]; p
< pend
; p
++)
352 mult_sub(X
[4*i
], offik
, x
[0]);
353 mult_sub(X
[4*i
+ 1], offik
, x
[1]);
354 mult_sub(X
[4*i
+ 2], offik
, x
[2]);
355 mult_sub(X
[4*i
+ 3], offik
, x
[3]);
364 /* -------------------------------------------------------------- */
365 /* permute the result, Bz = Q*X */
366 /* -------------------------------------------------------------- */
372 for (k
= 0; k
< n
; k
++)
380 for (k
= 0; k
< n
; k
++)
384 Bz
[i
+ d
] = X
[2*k
+ 1];
390 for (k
= 0; k
< n
; k
++)
394 Bz
[i
+ d
] = X
[3*k
+ 1];
395 Bz
[i
+ d
*2] = X
[3*k
+ 2];
401 for (k
= 0; k
< n
; k
++)
405 Bz
[i
+ d
] = X
[4*k
+ 1];
406 Bz
[i
+ d
*2] = X
[4*k
+ 2];
407 Bz
[i
+ d
*3] = X
[4*k
+ 3];
412 /* -------------------------------------------------------------- */
413 /* go to the next chunk of B */
414 /* -------------------------------------------------------------- */