Adding tdouble package.
[JKLU.git] / src / main / java / edu / ufl / cise / klu / tdouble / Dklu_solve.java
blob04a021efe4eabbd1f21a68798838c7353af43a2e
1 /**
2 * KLU: a sparse LU factorization algorithm.
3 * Copyright (C) 2004-2009, Timothy A. Davis.
4 * Copyright (C) 2011, Richard W. Lincoln.
5 * http://www.cise.ufl.edu/research/sparse/klu
7 * -------------------------------------------------------------------------
9 * KLU is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * KLU is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this Module; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 package edu.ufl.cise.klu.tdouble;
27 /**
28 * Solve Ax=b using the symbolic and numeric objects from KLU_analyze
29 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
30 * performed. Uses Numeric.Xwork as workspace (undefined on input and output),
31 * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
32 * Numeric.Iwork).
34 public class Dklu_solve {
36 /**
38 * @param Symbolic
39 * @param Numeric
40 * @param d leading dimension of B
41 * @param nrhs number of right-hand-sides
42 * @param B right-hand-side on input, overwritten with solution to Ax=b on
43 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
44 * @param Common
45 * @return
47 public static boolean klu_solve(KLU_symbolic Symbolic, KLU_numeric Numeric,
48 int d, int nrhs, double[] B, KLU_common Common)
50 Entry offik, s;
51 Entry[] x = new Entry[4];
52 double rs, Rs;
53 Entry Offx, X, Bz, Udiag;
54 int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen;
55 Unit[] LUbx;
56 int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i;
58 /* ------------------------------------------------------------------ */
59 /* check inputs */
60 /* ------------------------------------------------------------------ */
62 if (Common == null)
64 return false;
66 if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 ||
67 B == null)
69 Common.status = KLU_INVALID;
70 return false;
72 Common.status = KLU_OK;
74 /* ------------------------------------------------------------------ */
75 /* get the contents of the Symbolic object */
76 /* ------------------------------------------------------------------ */
78 Bz = (Entry) B;
79 n = Symbolic.n;
80 nblocks = Symbolic.nblocks;
81 Q = Symbolic.Q;
82 R = Symbolic.R;
84 /* ------------------------------------------------------------------ */
85 /* get the contents of the Numeric object */
86 /* ------------------------------------------------------------------ */
88 assert (nblocks == Numeric.nblocks);
89 Pnum = Numeric.Pnum;
90 Offp = Numeric.Offp;
91 Offi = Numeric.Offi;
92 Offx = (Entry) Numeric.Offx;
94 Lip = Numeric.Lip;
95 Llen = Numeric.Llen;
96 Uip = Numeric.Uip;
97 Ulen = Numeric.Ulen;
98 LUbx = (Unit[]) Numeric.LUbx;
99 Udiag = Numeric.Udiag;
101 Rs = Numeric.Rs;
102 X = (Entry) Numeric.Xwork;
104 assert Dklu_valid.klu_valid(n, Offp, Offi, Offx);
106 /* ------------------------------------------------------------------ */
107 /* solve in chunks of 4 columns at a time */
108 /* ------------------------------------------------------------------ */
110 for (chunk = 0; chunk < nrhs; chunk += 4)
113 /* -------------------------------------------------------------- */
114 /* get the size of the current chunk */
115 /* -------------------------------------------------------------- */
117 nr = min(nrhs - chunk, 4);
119 /* -------------------------------------------------------------- */
120 /* scale and permute the right hand side, X = P*(R\B) */
121 /* -------------------------------------------------------------- */
123 if (Rs == null)
126 /* no scaling */
127 switch (nr)
130 case 1:
132 for (k = 0; k < n; k++)
134 X[k] = Bz[Pnum[k]];
136 break;
138 case 2:
140 for (k = 0; k < n; k++)
142 i = Pnum[k];
143 X[2*k ] = Bz[i ];
144 X[2*k + 1] = Bz[i + d];
146 break;
148 case 3:
150 for (k = 0; k < n; k++)
152 i = Pnum[k];
153 X[3*k ] = Bz[i ];
154 X[3*k + 1] = Bz[i + d ];
155 X[3*k + 2] = Bz[i + d*2];
157 break;
159 case 4:
161 for (k = 0; k < n; k++)
163 i = Pnum[k];
164 X[4*k ] = Bz[i ];
165 X[4*k + 1] = Bz[i + d ];
166 X[4*k + 2] = Bz[i + d*2];
167 X[4*k + 3] = Bz[i + d*3];
169 break;
173 else
176 switch (nr)
179 case 1:
181 for (k = 0; k < n; k++)
183 scale_div_assign(X[k], Bz[Pnum[k]], Rs[k]);
185 break;
187 case 2:
189 for (k = 0; k < n; k++)
191 i = Pnum[k];
192 rs = Rs[k];
193 scale_div_assign(X[2*k], Bz[i], rs);
194 scale_div_assign(X[2*k + 1], Bz[i + d], rs);
196 break;
198 case 3:
200 for (k = 0; k < n; k++)
202 i = Pnum[k];
203 rs = Rs[k];
204 scale_div_assign(X[3*k], Bz[i], rs);
205 scale_div_assign(X[3*k + 1], Bz[i + d], rs);
206 scale_div_assign(X[3*k + 2], Bz[i + d*2], rs);
208 break;
210 case 4:
212 for (k = 0; k < n; k++)
214 i = Pnum[k];
215 rs = Rs[k];
216 scale_div_assign(X[4*k], Bz[i], rs);
217 scale_div_assign(X[4*k + 1], Bz[i + d], rs);
218 scale_div_assign(X[4*k + 2], Bz[i + d*2], rs);
219 scale_div_assign(X[4*k + 3], Bz[i + d*3], rs);
221 break;
226 /* -------------------------------------------------------------- */
227 /* solve X = (L*U + Off)\X */
228 /* -------------------------------------------------------------- */
230 for (block = nblocks-1; block >= 0; block--)
233 /* ---------------------------------------------------------- */
234 /* the block of size nk is from rows/columns k1 to k2-1 */
235 /* ---------------------------------------------------------- */
237 k1 = R[block];
238 k2 = R[block+1];
239 nk = k2 - k1;
240 printf("solve %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk);
242 /* solve the block system */
243 if (nk == 1)
245 s = Udiag[k1];
246 switch (nr)
249 case 1:
250 div(X[k1], X[k1], s);
251 break;
253 case 2:
254 div(X[2*k1], X[2*k1], s);
255 div(X[2*k1 + 1], X[2*k1 + 1], s);
256 break;
258 case 3:
259 div(X[3*k1], X[3*k1], s);
260 div(X[3*k1 + 1], X[3*k1 + 1], s);
261 div(X[3*k1 + 2], X[3*k1 + 2], s);
262 break;
264 case 4:
265 div(X[4*k1], X[4*k1], s);
266 div(X[4*k1 + 1], X[4*k1 + 1], s);
267 div(X[4*k1 + 2], X[4*k1 + 2], s);
268 div(X[4*k1 + 3], X[4*k1 + 3], s);
269 break;
273 else
275 Dklu_lsolve.klu_lsolve(nk, Lip + k1, Llen + k1, LUbx[block],
276 nr, X + nr*k1);
277 Dklu_usolve.klu_usolve(nk, Uip + k1, Ulen + k1, LUbx[block],
278 Udiag + k1, nr, X + nr*k1);
281 /* ---------------------------------------------------------- */
282 /* block back-substitution for the off-diagonal-block entries */
283 /* ---------------------------------------------------------- */
285 if (block > 0)
287 switch (nr)
290 case 1:
292 for (k = k1; k < k2; k++)
294 pend = Offp[k+1];
295 x[0] = X[k];
296 for (p = Offp[k]; p < pend; p++)
298 mult_sub(X[Offi[p]], Offx[p], x[0]);
301 break;
303 case 2:
305 for (k = k1; k < k2; k++)
307 pend = Offp[k+1];
308 x[0] = X[2*k ];
309 x[1] = X[2*k + 1];
310 for (p = Offp[k]; p < pend; p++)
312 i = Offi[p];
313 offik = Offx[p];
314 mult_sub(X[2*i], offik, x[0]);
315 mult_sub(X[2*i + 1], offik, x[1]);
318 break;
320 case 3:
322 for (k = k1; k < k2; k++)
324 pend = Offp[k+1];
325 x[0] = X[3*k ];
326 x[1] = X[3*k + 1];
327 x[2] = X[3*k + 2];
328 for (p = Offp[k]; p < pend; p++)
330 i = Offi[p];
331 offik = Offx[p];
332 mult_sub(X[3*i], offik, x[0]);
333 mult_sub(X[3*i + 1], offik, x[1]);
334 mult_sub(X[3*i + 2], offik, x[2]);
337 break;
339 case 4:
341 for (k = k1; k < k2; k++)
343 pend = Offp[k+1];
344 x[0] = X[4*k ];
345 x[1] = X[4*k + 1];
346 x[2] = X[4*k + 2];
347 x[3] = X[4*k + 3];
348 for (p = Offp[k]; p < pend; p++)
350 i = Offi[p];
351 offik = Offx[p];
352 mult_sub(X[4*i], offik, x[0]);
353 mult_sub(X[4*i + 1], offik, x[1]);
354 mult_sub(X[4*i + 2], offik, x[2]);
355 mult_sub(X[4*i + 3], offik, x[3]);
358 break;
364 /* -------------------------------------------------------------- */
365 /* permute the result, Bz = Q*X */
366 /* -------------------------------------------------------------- */
368 switch (nr) {
370 case 1:
372 for (k = 0; k < n; k++)
374 Bz[Q[k]] = X[k];
376 break;
378 case 2:
380 for (k = 0; k < n; k++)
382 i = Q[k];
383 Bz[i ] = X[2*k ];
384 Bz[i + d ] = X[2*k + 1];
386 break;
388 case 3:
390 for (k = 0; k < n; k++)
392 i = Q[k];
393 Bz [i ] = X[3*k ];
394 Bz [i + d ] = X[3*k + 1];
395 Bz [i + d*2] = X[3*k + 2];
397 break;
399 case 4:
401 for (k = 0; k < n; k++)
403 i = Q[k];
404 Bz [i ] = X[4*k ];
405 Bz [i + d ] = X[4*k + 1];
406 Bz [i + d*2] = X[4*k + 2];
407 Bz [i + d*3] = X[4*k + 3];
409 break;
412 /* -------------------------------------------------------------- */
413 /* go to the next chunk of B */
414 /* -------------------------------------------------------------- */
416 Bz += d*4;
418 return true;