Fixing pointers in solvers.
[JKLU.git] / src / main / java / edu / ufl / cise / klu / tdouble / Dklu_tsolve.java
blob107ca5bb10d6ac19a6988c5e6193d03808f28029
1 /**
2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011-2012, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu.ufl.cise.klu.tdouble;
27 import edu.ufl.cise.klu.common.KLU_common;
28 import edu.ufl.cise.klu.common.KLU_numeric;
29 import edu.ufl.cise.klu.common.KLU_symbolic;
31 import static edu.ufl.cise.klu.tdouble.Dklu_dump.klu_valid;
32 import static edu.ufl.cise.klu.tdouble.Dklu.klu_ltsolve;
33 import static edu.ufl.cise.klu.tdouble.Dklu.klu_utsolve;
35 /**
36 * Solve A'x=b using the symbolic and numeric objects from KLU_analyze
37 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
38 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
39 * of size 4n double's (note that columns 2 to 4 of Xwork overlap with
40 * Numeric.Iwork).
42 public class Dklu_tsolve extends Dklu_internal {
44 /**
46 * @param Symbolic
47 * @param Numeric
48 * @param d leading dimension of B
49 * @param nrhs number of right-hand-sides
50 * @param B right-hand-side on input, overwritten with solution to Ax=b on
51 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
52 * @return
54 public static int klu_tsolve(KLU_symbolic Symbolic,
55 KLU_numeric Numeric, int d, int nrhs,
56 double[] B, int B_offset, KLU_common Common)
58 double[] x = new double[4] ;
59 double offik, s ;
60 double rs ;
61 double[] Rs ;
62 double[] Offx, X, Bz, Udiag ;
63 int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen ;
64 double[][] LUbx ;
65 int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
67 /* ---------------------------------------------------------------------- */
68 /* check inputs */
69 /* ---------------------------------------------------------------------- */
71 if (Common == null)
73 return (FALSE) ;
75 if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 ||
76 B == null)
78 Common.status = KLU_INVALID ;
79 return (FALSE) ;
81 Common.status = KLU_OK ;
83 /* ---------------------------------------------------------------------- */
84 /* get the contents of the Symbolic object */
85 /* ---------------------------------------------------------------------- */
87 Bz = B ;
88 n = Symbolic.n ;
89 nblocks = Symbolic.nblocks ;
90 Q = Symbolic.Q ;
91 R = Symbolic.R ;
93 /* ---------------------------------------------------------------------- */
94 /* get the contents of the Numeric object */
95 /* ---------------------------------------------------------------------- */
97 ASSERT (nblocks == Numeric.nblocks) ;
98 Pnum = Numeric.Pnum ;
99 Offp = Numeric.Offp ;
100 Offi = Numeric.Offi ;
101 Offx = Numeric.Offx ;
103 Lip = Numeric.Lip ;
104 Llen = Numeric.Llen ;
105 Uip = Numeric.Uip ;
106 Ulen = Numeric.Ulen ;
107 LUbx = Numeric.LUbx ;
108 Udiag = Numeric.Udiag ;
110 Rs = Numeric.Rs ;
111 X = Numeric.Xwork ;
112 ASSERT (klu_valid (n, Offp, Offi, Offx)) ;
114 /* ---------------------------------------------------------------------- */
115 /* solve in chunks of 4 columns at a time */
116 /* ---------------------------------------------------------------------- */
118 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
121 /* ------------------------------------------------------------------ */
122 /* get the size of the current chunk */
123 /* ------------------------------------------------------------------ */
125 nr = MIN (nrhs - chunk, 4) ;
127 /* ------------------------------------------------------------------ */
128 /* permute the right hand side, X = Q'*B */
129 /* ------------------------------------------------------------------ */
131 switch (nr)
134 case 1:
136 for (k = 0 ; k < n ; k++)
138 X [k] = Bz [B_offset + Q [k]] ;
140 break ;
142 case 2:
144 for (k = 0 ; k < n ; k++)
146 i = Q [k] ;
147 X [2*k ] = Bz [B_offset + i ] ;
148 X [2*k + 1] = Bz [B_offset + i + d ] ;
150 break ;
152 case 3:
154 for (k = 0 ; k < n ; k++)
156 i = Q [k] ;
157 X [3*k ] = Bz [B_offset + i ] ;
158 X [3*k + 1] = Bz [B_offset + i + d ] ;
159 X [3*k + 2] = Bz [B_offset + i + d*2] ;
161 break ;
163 case 4:
165 for (k = 0 ; k < n ; k++)
167 i = Q [k] ;
168 X [4*k ] = Bz [B_offset + i ] ;
169 X [4*k + 1] = Bz [B_offset + i + d ] ;
170 X [4*k + 2] = Bz [B_offset + i + d*2] ;
171 X [4*k + 3] = Bz [B_offset + i + d*3] ;
173 break ;
177 /* ------------------------------------------------------------------ */
178 /* solve X = (L*U + Off)'\X */
179 /* ------------------------------------------------------------------ */
181 for (block = 0 ; block < nblocks ; block++)
184 /* -------------------------------------------------------------- */
185 /* the block of size nk is from rows/columns k1 to k2-1 */
186 /* -------------------------------------------------------------- */
188 k1 = R [block] ;
189 k2 = R [block+1] ;
190 nk = k2 - k1 ;
191 PRINTF ("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk) ;
193 /* -------------------------------------------------------------- */
194 /* block back-substitution for the off-diagonal-block entries */
195 /* -------------------------------------------------------------- */
197 if (block > 0)
199 switch (nr)
202 case 1:
204 for (k = k1 ; k < k2 ; k++)
206 pend = Offp [k+1] ;
207 for (p = Offp [k] ; p < pend ; p++)
210 X [k] -= Offx [p] * X [Offi [p]] ;
211 //MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
215 break ;
217 case 2:
219 for (k = k1 ; k < k2 ; k++)
221 pend = Offp [k+1] ;
222 x [0] = X [2*k ] ;
223 x [1] = X [2*k + 1] ;
224 for (p = Offp [k] ; p < pend ; p++)
226 i = Offi [p] ;
228 offik = Offx [p] ;
230 x [0] -= offik * X [2*i] ;
231 //MULT_SUB (x [0], offik, X [2*i]) ;
232 x [1] -= offik * X [2*i + 1] ;
233 //MULT_SUB (x [1], offik, X [2*i + 1]) ;
235 X [2*k ] = x [0] ;
236 X [2*k + 1] = x [1] ;
238 break ;
240 case 3:
242 for (k = k1 ; k < k2 ; k++)
244 pend = Offp [k+1] ;
245 x [0] = X [3*k ] ;
246 x [1] = X [3*k + 1] ;
247 x [2] = X [3*k + 2] ;
248 for (p = Offp [k] ; p < pend ; p++)
250 i = Offi [p] ;
252 offik = Offx [p] ;
254 x [0] -= offik * X [3*i] ;
255 //MULT_SUB (x [0], offik, X [3*i]) ;
256 x [1] -= offik * X [3*i + 1] ;
257 //MULT_SUB (x [1], offik, X [3*i + 1]) ;
258 x [2] -= offik * X [3*i + 2] ;
259 //MULT_SUB (x [2], offik, X [3*i + 2]) ;
261 X [3*k ] = x [0] ;
262 X [3*k + 1] = x [1] ;
263 X [3*k + 2] = x [2] ;
265 break ;
267 case 4:
269 for (k = k1 ; k < k2 ; k++)
271 pend = Offp [k+1] ;
272 x [0] = X [4*k ] ;
273 x [1] = X [4*k + 1] ;
274 x [2] = X [4*k + 2] ;
275 x [3] = X [4*k + 3] ;
276 for (p = Offp [k] ; p < pend ; p++)
278 i = Offi [p] ;
280 offik = Offx [p] ;
282 x [0] -= offik * X [4*i] ;
283 //MULT_SUB (x [0], offik, X [4*i]) ;
284 x [1] -= offik * X [4*i + 1] ;
285 //MULT_SUB (x [1], offik, X [4*i + 1]) ;
286 x [2] -= offik * X [4*i + 2] ;
287 //MULT_SUB (x [2], offik, X [4*i + 2]) ;
288 x [3] -= offik * X [4*i + 3] ;
289 //MULT_SUB (x [3], offik, X [4*i + 3]) ;
291 X [4*k ] = x [0] ;
292 X [4*k + 1] = x [1] ;
293 X [4*k + 2] = x [2] ;
294 X [4*k + 3] = x [3] ;
296 break ;
300 /* -------------------------------------------------------------- */
301 /* solve the block system */
302 /* -------------------------------------------------------------- */
304 if (nk == 1)
307 s = Udiag [k1] ;
309 switch (nr)
312 case 1:
313 X [k1] = X [k1] / s ;
314 //DIV (X [k1], X [k1], s) ;
315 break ;
317 case 2:
318 X [2*k1] = X [2*k1] / s ;
319 //DIV (X [2*k1], X [2*k1], s) ;
320 X [2*k1 + 1] = X [2*k1 + 1] / s ;
321 //DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
322 break ;
324 case 3:
325 X [3*k1] = X [3*k1] / s ;
326 //DIV (X [3*k1], X [3*k1], s) ;
327 X [3*k1 + 1] = X [3*k1 + 1] / s ;
328 //DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
329 X [3*k1 + 2] = X [3*k1 + 2] / s ;
330 //DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
331 break ;
333 case 4:
334 X [4*k1] = X [4*k1] / s ;
335 //DIV (X [4*k1], X [4*k1], s) ;
336 X [4*k1 + 1] = X [4*k1 + 1] / s ;
337 //DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
338 X [4*k1 + 2] = X [4*k1 + 2] / s ;
339 //DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
340 X [4*k1 + 3] = X [4*k1 + 3] / s ;
341 //DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
342 break ;
346 else
348 klu_utsolve (nk, Uip, k1, Ulen, k1, LUbx [block],
349 Udiag, k1, nr, X, nr*k1) ;
350 klu_ltsolve (nk, Lip, k1, Llen, k1, LUbx [block], nr,
351 X, nr*k1) ;
355 /* ------------------------------------------------------------------ */
356 /* scale and permute the result, Bz = P'(R\X) */
357 /* ------------------------------------------------------------------ */
359 if (Rs == null)
362 /* no scaling */
363 switch (nr)
366 case 1:
368 for (k = 0 ; k < n ; k++)
370 Bz [B_offset + Pnum [k]] = X [k] ;
372 break ;
374 case 2:
376 for (k = 0 ; k < n ; k++)
378 i = Pnum [k] ;
379 Bz [B_offset + i ] = X [2*k ] ;
380 Bz [B_offset + i + d ] = X [2*k + 1] ;
382 break ;
384 case 3:
386 for (k = 0 ; k < n ; k++)
388 i = Pnum [k] ;
389 Bz [B_offset + i ] = X [3*k ] ;
390 Bz [B_offset + i + d ] = X [3*k + 1] ;
391 Bz [B_offset + i + d*2] = X [3*k + 2] ;
393 break ;
395 case 4:
397 for (k = 0 ; k < n ; k++)
399 i = Pnum [k] ;
400 Bz [B_offset + i ] = X [4*k ] ;
401 Bz [B_offset + i + d ] = X [4*k + 1] ;
402 Bz [B_offset + i + d*2] = X [4*k + 2] ;
403 Bz [B_offset + i + d*3] = X [4*k + 3] ;
405 break ;
409 else
412 switch (nr)
415 case 1:
417 for (k = 0 ; k < n ; k++)
419 Bz [B_offset + Pnum [k]] = X [k] / Rs [k] ;
420 //SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
422 break ;
424 case 2:
426 for (k = 0 ; k < n ; k++)
428 i = Pnum [k] ;
429 rs = Rs [k] ;
430 Bz [B_offset + i] = X [2*k] / rs ;
431 //SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
432 Bz [B_offset + i + d] = X [2*k + 1] / rs ;
433 //SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
435 break ;
437 case 3:
439 for (k = 0 ; k < n ; k++)
441 i = Pnum [k] ;
442 rs = Rs [k] ;
443 Bz [B_offset + i] = X [3*k] / rs ;
444 //SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
445 Bz [B_offset + i + d] = X [3*k + 1] / rs ;
446 //SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
447 Bz [B_offset + i + d*2] = X [3*k + 2] / rs ;
448 //SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
450 break ;
452 case 4:
454 for (k = 0 ; k < n ; k++)
456 i = Pnum [k] ;
457 rs = Rs [k] ;
458 Bz [B_offset + i] = X [4*k] / rs ;
459 //SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
460 Bz [B_offset + i + d] = X [4*k + 1] / rs ;
461 //SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
462 Bz [B_offset + i + d*2] = X [4*k + 2] / rs ;
463 //SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
464 Bz [B_offset + i + d*3] = X [4*k + 3] / rs ;
465 //SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
467 break ;
471 /* ------------------------------------------------------------------ */
472 /* go to the next chunk of B */
473 /* ------------------------------------------------------------------ */
475 B_offset += d*4 ;
477 return (TRUE) ;