Updating copyright statements.
[JKLU.git] / src / main / java / edu / ufl / cise / klu / tdouble / Dklu_solve.java
blob9efc31575f5434e31bfb117ba7fb083133180ac6
1 /**
2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011-2012, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu.ufl.cise.klu.tdouble;
27 import edu.ufl.cise.klu.common.KLU_common;
28 import edu.ufl.cise.klu.common.KLU_numeric;
29 import edu.ufl.cise.klu.common.KLU_symbolic;
31 import static edu.ufl.cise.klu.tdouble.Dklu_dump.klu_valid;
33 /**
34 * Solve Ax=b using the symbolic and numeric objects from KLU_analyze
35 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
36 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
37 * of size 4n double's (note that columns 2 to 4 of Xwork overlap with
38 * Numeric.Iwork).
40 public class Dklu_solve extends Dklu_internal {
42 /**
44 * @param Symbolic
45 * @param Numeric
46 * @param d leading dimension of B
47 * @param nrhs number of right-hand-sides
48 * @param B right-hand-side on input, overwritten with solution to Ax=b on
49 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
50 * @param Common
51 * @return
53 public static int klu_solve(KLU_symbolic Symbolic, KLU_numeric Numeric,
54 int d, int nrhs, double[] B, KLU_common Common)
56 double offik, s ;
57 double[] x = new double[4] ;
58 double rs ;
59 double[] Offx, X, Bz, Udiag, Rs ;
60 int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen ;
61 double[][] LUbx ;
62 int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
64 /* ---------------------------------------------------------------------- */
65 /* check inputs */
66 /* ---------------------------------------------------------------------- */
68 if (Common == null)
70 return (FALSE) ;
72 if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 ||
73 B == null)
75 Common.status = KLU_INVALID ;
76 return (FALSE) ;
78 Common.status = KLU_OK ;
80 /* ---------------------------------------------------------------------- */
81 /* get the contents of the Symbolic object */
82 /* ---------------------------------------------------------------------- */
84 Bz = B ;
85 n = Symbolic.n ;
86 nblocks = Symbolic.nblocks ;
87 Q = Symbolic.Q ;
88 R = Symbolic.R ;
90 /* ---------------------------------------------------------------------- */
91 /* get the contents of the Numeric object */
92 /* ---------------------------------------------------------------------- */
94 ASSERT (nblocks == Numeric.nblocks) ;
95 Pnum = Numeric.Pnum ;
96 Offp = Numeric.Offp ;
97 Offi = Numeric.Offi ;
98 Offx = Numeric.Offx ;
100 Lip = Numeric.Lip ;
101 Llen = Numeric.Llen ;
102 Uip = Numeric.Uip ;
103 Ulen = Numeric.Ulen ;
104 LUbx = Numeric.LUbx ;
105 Udiag = Numeric.Udiag ;
107 Rs = Numeric.Rs ;
108 X = Numeric.Xwork ;
110 ASSERT (klu_valid (n, Offp, Offi, Offx)) ;
112 /* ---------------------------------------------------------------------- */
113 /* solve in chunks of 4 columns at a time */
114 /* ---------------------------------------------------------------------- */
116 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
119 /* ------------------------------------------------------------------ */
120 /* get the size of the current chunk */
121 /* ------------------------------------------------------------------ */
123 nr = MIN (nrhs - chunk, 4) ;
125 /* ------------------------------------------------------------------ */
126 /* scale and permute the right hand side, X = P*(R\B) */
127 /* ------------------------------------------------------------------ */
129 if (Rs == null)
132 /* no scaling */
133 switch (nr)
136 case 1:
138 for (k = 0 ; k < n ; k++)
140 X [k] = Bz [Pnum [k]] ;
142 break ;
144 case 2:
146 for (k = 0 ; k < n ; k++)
148 i = Pnum [k] ;
149 X [2*k ] = Bz [i ] ;
150 X [2*k + 1] = Bz [i + d ] ;
152 break ;
154 case 3:
156 for (k = 0 ; k < n ; k++)
158 i = Pnum [k] ;
159 X [3*k ] = Bz [i ] ;
160 X [3*k + 1] = Bz [i + d ] ;
161 X [3*k + 2] = Bz [i + d*2] ;
163 break ;
165 case 4:
167 for (k = 0 ; k < n ; k++)
169 i = Pnum [k] ;
170 X [4*k ] = Bz [i ] ;
171 X [4*k + 1] = Bz [i + d ] ;
172 X [4*k + 2] = Bz [i + d*2] ;
173 X [4*k + 3] = Bz [i + d*3] ;
175 break ;
179 else
182 switch (nr)
185 case 1:
187 for (k = 0 ; k < n ; k++)
189 X [k] = Bz [Pnum [k]] / Rs [k] ;
190 //SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
192 break ;
194 case 2:
196 for (k = 0 ; k < n ; k++)
198 i = Pnum [k] ;
199 rs = Rs [k] ;
200 X [2*k] = Bz [i] / rs ;
201 //SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
202 X [2*k + 1] = Bz [i + d] / rs ;
203 //SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
205 break ;
207 case 3:
209 for (k = 0 ; k < n ; k++)
211 i = Pnum [k] ;
212 rs = Rs [k] ;
213 X [3*k] = Bz [i] / rs ;
214 //SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
215 X [3*k + 1] = Bz [i + d] / rs ;
216 //SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
217 X [3*k + 2] = Bz [i + d*2] / rs ;
218 //SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
220 break ;
222 case 4:
224 for (k = 0 ; k < n ; k++)
226 i = Pnum [k] ;
227 rs = Rs [k] ;
228 X [4*k] = Bz [i] / rs ;
229 //SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
230 X [4*k + 1] = Bz [i + d] / rs ;
231 //SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
232 X [4*k + 2] = Bz [i + d*2] / rs ;
233 //SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
234 X [4*k + 3] = Bz [i + d*3] / rs ;
235 //SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
237 break ;
241 /* ------------------------------------------------------------------ */
242 /* solve X = (L*U + Off)\X */
243 /* ------------------------------------------------------------------ */
245 for (block = nblocks-1 ; block >= 0 ; block--)
248 /* -------------------------------------------------------------- */
249 /* the block of size nk is from rows/columns k1 to k2-1 */
250 /* -------------------------------------------------------------- */
252 k1 = R [block] ;
253 k2 = R [block+1] ;
254 nk = k2 - k1 ;
255 PRINTF ("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk) ;
257 /* solve the block system */
258 if (nk == 1)
260 s = Udiag [k1] ;
261 switch (nr)
264 case 1:
265 X [k1] = X [k1] / s ;
266 //DIV (X [k1], X [k1], s) ;
267 break ;
269 case 2:
270 X [2*k1] = X [2*k1] / s ;
271 //DIV (X [2*k1], X [2*k1], s) ;
272 X [2*k1 + 1] = X [2*k1 + 1] / s ;
273 //DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
274 break ;
276 case 3:
277 X [3*k1] = X [3*k1] / s ;
278 //DIV (X [3*k1], X [3*k1], s) ;
279 X [3*k1 + 1] = X [3*k1 + 1] / s ;
280 //DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
281 X [3*k1 + 2] = X [3*k1 + 2] / s ;
282 //DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
283 break ;
285 case 4:
286 X [4*k1] = X [4*k1] / s ;
287 //DIV (X [4*k1], X [4*k1], s) ;
288 X [4*k1 + 1] = X [4*k1 + 1] / s ;
289 //DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
290 X [4*k1 + 2] = X [4*k1 + 2] / s ;
291 //DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
292 X [4*k1 + 3] = X [4*k1 + 3] / s ;
293 //DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
294 break ;
298 else
300 klu_lsolve (nk, Lip + k1, Llen + k1,
301 LUbx [block], nr, X + nr*k1) ;
302 klu_usolve (nk, Uip + k1, Ulen + k1,
303 LUbx [block], Udiag + k1, nr, X + nr*k1) ;
306 /* -------------------------------------------------------------- */
307 /* block back-substitution for the off-diagonal-block entries */
308 /* -------------------------------------------------------------- */
310 if (block > 0)
312 switch (nr)
315 case 1:
317 for (k = k1 ; k < k2 ; k++)
319 pend = Offp [k+1] ;
320 x [0] = X [k] ;
321 for (p = Offp [k] ; p < pend ; p++)
323 X [Offi [p]] -= Offx [p] * x [0] ;
324 //MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
327 break ;
329 case 2:
331 for (k = k1 ; k < k2 ; k++)
333 pend = Offp [k+1] ;
334 x [0] = X [2*k ] ;
335 x [1] = X [2*k + 1] ;
336 for (p = Offp [k] ; p < pend ; p++)
338 i = Offi [p] ;
339 offik = Offx [p] ;
340 X [2*i] -= offik * x [0] ;
341 //MULT_SUB (X [2*i], offik, x [0]) ;
342 X [2*i + 1] -= offik * x [1] ;
343 //MULT_SUB (X [2*i + 1], offik, x [1]) ;
346 break ;
348 case 3:
350 for (k = k1 ; k < k2 ; k++)
352 pend = Offp [k+1] ;
353 x [0] = X [3*k ] ;
354 x [1] = X [3*k + 1] ;
355 x [2] = X [3*k + 2] ;
356 for (p = Offp [k] ; p < pend ; p++)
358 i = Offi [p] ;
359 offik = Offx [p] ;
360 X [3*i] -= offik * x [0] ;
361 //MULT_SUB (X [3*i], offik, x [0]) ;
362 X [3*i + 1] -= offik * x [1] ;
363 //MULT_SUB (X [3*i + 1], offik, x [1]) ;
364 X [3*i + 2] -= offik * x [2] ;
365 //MULT_SUB (X [3*i + 2], offik, x [2]) ;
368 break ;
370 case 4:
372 for (k = k1 ; k < k2 ; k++)
374 pend = Offp [k+1] ;
375 x [0] = X [4*k ] ;
376 x [1] = X [4*k + 1] ;
377 x [2] = X [4*k + 2] ;
378 x [3] = X [4*k + 3] ;
379 for (p = Offp [k] ; p < pend ; p++)
381 i = Offi [p] ;
382 offik = Offx [p] ;
383 X [4*i] -= offik * x [0] ;
384 //MULT_SUB (X [4*i], offik, x [0]) ;
385 X [4*i + 1] -= offik * x [1] ;
386 //MULT_SUB (X [4*i + 1], offik, x [1]) ;
387 X [4*i + 2] -= offik * x [2] ;
388 //MULT_SUB (X [4*i + 2], offik, x [2]) ;
389 X [4*i + 3] -= offik * x [3] ;
390 //MULT_SUB (X [4*i + 3], offik, x [3]) ;
393 break ;
398 /* ------------------------------------------------------------------ */
399 /* permute the result, Bz = Q*X */
400 /* ------------------------------------------------------------------ */
402 switch (nr)
405 case 1:
407 for (k = 0 ; k < n ; k++)
409 Bz [Q [k]] = X [k] ;
411 break ;
413 case 2:
415 for (k = 0 ; k < n ; k++)
417 i = Q [k] ;
418 Bz [i ] = X [2*k ] ;
419 Bz [i + d ] = X [2*k + 1] ;
421 break ;
423 case 3:
425 for (k = 0 ; k < n ; k++)
427 i = Q [k] ;
428 Bz [i ] = X [3*k ] ;
429 Bz [i + d ] = X [3*k + 1] ;
430 Bz [i + d*2] = X [3*k + 2] ;
432 break ;
434 case 4:
436 for (k = 0 ; k < n ; k++)
438 i = Q [k] ;
439 Bz [i ] = X [4*k ] ;
440 Bz [i + d ] = X [4*k + 1] ;
441 Bz [i + d*2] = X [4*k + 2] ;
442 Bz [i + d*3] = X [4*k + 3] ;
444 break ;
447 /* ------------------------------------------------------------------ */
448 /* go to the next chunk of B */
449 /* ------------------------------------------------------------------ */
451 Bz += d*4 ;
453 return (TRUE) ;