From 0f68e1c8c108c92fe90a661bb0b9466ed4952239 Mon Sep 17 00:00:00 2001 From: Richard Lincoln Date: Sat, 30 Jul 2011 19:50:12 +0100 Subject: [PATCH] Adding tdouble package. --- .../edu/ufl/cise/klu/tdcomplex/KLU_common.java | 2 +- .../java/edu/ufl/cise/klu/tdouble/Dklu_solve.java | 421 ++++++++++++++++++++ .../java/edu/ufl/cise/klu/tdouble/Dklu_sort.java | 171 ++++++++ .../java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java | 435 +++++++++++++++++++++ 4 files changed, 1028 insertions(+), 1 deletion(-) create mode 100644 src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java create mode 100644 src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java create mode 100644 src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java diff --git a/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java b/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java index 5889ede..4f1e859 100644 --- a/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java +++ b/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java @@ -76,5 +76,5 @@ public class KLU_common { NativeLong memusage ; /* current memory usage, in bytes */ NativeLong mempeak ; /* peak memory usage, in bytes */ - + } diff --git a/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java new file mode 100644 index 0000000..04a021e --- /dev/null +++ b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java @@ -0,0 +1,421 @@ +/** + * KLU: a sparse LU factorization algorithm. + * Copyright (C) 2004-2009, Timothy A. Davis. + * Copyright (C) 2011, Richard W. Lincoln. + * http://www.cise.ufl.edu/research/sparse/klu + * + * ------------------------------------------------------------------------- + * + * KLU is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * KLU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this Module; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +package edu.ufl.cise.klu.tdouble; + +/** + * Solve Ax=b using the symbolic and numeric objects from KLU_analyze + * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is + * performed. Uses Numeric.Xwork as workspace (undefined on input and output), + * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with + * Numeric.Iwork). + */ +public class Dklu_solve { + + /** + * + * @param Symbolic + * @param Numeric + * @param d leading dimension of B + * @param nrhs number of right-hand-sides + * @param B right-hand-side on input, overwritten with solution to Ax=b on + * output. Size n*nrhs, in column-oriented form, with leading dimension d. + * @param Common + * @return + */ + public static boolean klu_solve(KLU_symbolic Symbolic, KLU_numeric Numeric, + int d, int nrhs, double[] B, KLU_common Common) + { + Entry offik, s; + Entry[] x = new Entry[4]; + double rs, Rs; + Entry Offx, X, Bz, Udiag; + int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen; + Unit[] LUbx; + int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i; + + /* ------------------------------------------------------------------ */ + /* check inputs */ + /* ------------------------------------------------------------------ */ + + if (Common == null) + { + return false; + } + if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 || + B == null) + { + Common.status = KLU_INVALID; + return false; + } + Common.status = KLU_OK; + + /* ------------------------------------------------------------------ */ + /* get the contents of the Symbolic object */ + /* ------------------------------------------------------------------ */ + + Bz = (Entry) B; + n = Symbolic.n; + nblocks = Symbolic.nblocks; + Q = Symbolic.Q; + R = Symbolic.R; + + /* ------------------------------------------------------------------ */ + /* get the contents of the Numeric object */ + /* ------------------------------------------------------------------ */ + + assert (nblocks == Numeric.nblocks); + Pnum = Numeric.Pnum; + Offp = Numeric.Offp; + Offi = Numeric.Offi; + Offx = (Entry) Numeric.Offx; + + Lip = Numeric.Lip; + Llen = Numeric.Llen; + Uip = Numeric.Uip; + Ulen = Numeric.Ulen; + LUbx = (Unit[]) Numeric.LUbx; + Udiag = Numeric.Udiag; + + Rs = Numeric.Rs; + X = (Entry) Numeric.Xwork; + + assert Dklu_valid.klu_valid(n, Offp, Offi, Offx); + + /* ------------------------------------------------------------------ */ + /* solve in chunks of 4 columns at a time */ + /* ------------------------------------------------------------------ */ + + for (chunk = 0; chunk < nrhs; chunk += 4) + { + + /* -------------------------------------------------------------- */ + /* get the size of the current chunk */ + /* -------------------------------------------------------------- */ + + nr = min(nrhs - chunk, 4); + + /* -------------------------------------------------------------- */ + /* scale and permute the right hand side, X = P*(R\B) */ + /* -------------------------------------------------------------- */ + + if (Rs == null) + { + + /* no scaling */ + switch (nr) + { + + case 1: + + for (k = 0; k < n; k++) + { + X[k] = Bz[Pnum[k]]; + } + break; + + case 2: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + X[2*k ] = Bz[i ]; + X[2*k + 1] = Bz[i + d]; + } + break; + + case 3: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + X[3*k ] = Bz[i ]; + X[3*k + 1] = Bz[i + d ]; + X[3*k + 2] = Bz[i + d*2]; + } + break; + + case 4: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + X[4*k ] = Bz[i ]; + X[4*k + 1] = Bz[i + d ]; + X[4*k + 2] = Bz[i + d*2]; + X[4*k + 3] = Bz[i + d*3]; + } + break; + } + + } + else + { + + switch (nr) + { + + case 1: + + for (k = 0; k < n; k++) + { + scale_div_assign(X[k], Bz[Pnum[k]], Rs[k]); + } + break; + + case 2: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + rs = Rs[k]; + scale_div_assign(X[2*k], Bz[i], rs); + scale_div_assign(X[2*k + 1], Bz[i + d], rs); + } + break; + + case 3: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + rs = Rs[k]; + scale_div_assign(X[3*k], Bz[i], rs); + scale_div_assign(X[3*k + 1], Bz[i + d], rs); + scale_div_assign(X[3*k + 2], Bz[i + d*2], rs); + } + break; + + case 4: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + rs = Rs[k]; + scale_div_assign(X[4*k], Bz[i], rs); + scale_div_assign(X[4*k + 1], Bz[i + d], rs); + scale_div_assign(X[4*k + 2], Bz[i + d*2], rs); + scale_div_assign(X[4*k + 3], Bz[i + d*3], rs); + } + break; + } + + } + + /* -------------------------------------------------------------- */ + /* solve X = (L*U + Off)\X */ + /* -------------------------------------------------------------- */ + + for (block = nblocks-1; block >= 0; block--) + { + + /* ---------------------------------------------------------- */ + /* the block of size nk is from rows/columns k1 to k2-1 */ + /* ---------------------------------------------------------- */ + + k1 = R[block]; + k2 = R[block+1]; + nk = k2 - k1; + printf("solve %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk); + + /* solve the block system */ + if (nk == 1) + { + s = Udiag[k1]; + switch (nr) + { + + case 1: + div(X[k1], X[k1], s); + break; + + case 2: + div(X[2*k1], X[2*k1], s); + div(X[2*k1 + 1], X[2*k1 + 1], s); + break; + + case 3: + div(X[3*k1], X[3*k1], s); + div(X[3*k1 + 1], X[3*k1 + 1], s); + div(X[3*k1 + 2], X[3*k1 + 2], s); + break; + + case 4: + div(X[4*k1], X[4*k1], s); + div(X[4*k1 + 1], X[4*k1 + 1], s); + div(X[4*k1 + 2], X[4*k1 + 2], s); + div(X[4*k1 + 3], X[4*k1 + 3], s); + break; + + } + } + else + { + Dklu_lsolve.klu_lsolve(nk, Lip + k1, Llen + k1, LUbx[block], + nr, X + nr*k1); + Dklu_usolve.klu_usolve(nk, Uip + k1, Ulen + k1, LUbx[block], + Udiag + k1, nr, X + nr*k1); + } + + /* ---------------------------------------------------------- */ + /* block back-substitution for the off-diagonal-block entries */ + /* ---------------------------------------------------------- */ + + if (block > 0) + { + switch (nr) + { + + case 1: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[k]; + for (p = Offp[k]; p < pend; p++) + { + mult_sub(X[Offi[p]], Offx[p], x[0]); + } + } + break; + + case 2: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[2*k ]; + x[1] = X[2*k + 1]; + for (p = Offp[k]; p < pend; p++) + { + i = Offi[p]; + offik = Offx[p]; + mult_sub(X[2*i], offik, x[0]); + mult_sub(X[2*i + 1], offik, x[1]); + } + } + break; + + case 3: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[3*k ]; + x[1] = X[3*k + 1]; + x[2] = X[3*k + 2]; + for (p = Offp[k]; p < pend; p++) + { + i = Offi[p]; + offik = Offx[p]; + mult_sub(X[3*i], offik, x[0]); + mult_sub(X[3*i + 1], offik, x[1]); + mult_sub(X[3*i + 2], offik, x[2]); + } + } + break; + + case 4: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[4*k ]; + x[1] = X[4*k + 1]; + x[2] = X[4*k + 2]; + x[3] = X[4*k + 3]; + for (p = Offp[k]; p < pend; p++) + { + i = Offi[p]; + offik = Offx[p]; + mult_sub(X[4*i], offik, x[0]); + mult_sub(X[4*i + 1], offik, x[1]); + mult_sub(X[4*i + 2], offik, x[2]); + mult_sub(X[4*i + 3], offik, x[3]); + } + } + break; + } + + } + } + + /* -------------------------------------------------------------- */ + /* permute the result, Bz = Q*X */ + /* -------------------------------------------------------------- */ + + switch (nr) { + + case 1: + + for (k = 0; k < n; k++) + { + Bz[Q[k]] = X[k]; + } + break; + + case 2: + + for (k = 0; k < n; k++) + { + i = Q[k]; + Bz[i ] = X[2*k ]; + Bz[i + d ] = X[2*k + 1]; + } + break; + + case 3: + + for (k = 0; k < n; k++) + { + i = Q[k]; + Bz [i ] = X[3*k ]; + Bz [i + d ] = X[3*k + 1]; + Bz [i + d*2] = X[3*k + 2]; + } + break; + + case 4: + + for (k = 0; k < n; k++) + { + i = Q[k]; + Bz [i ] = X[4*k ]; + Bz [i + d ] = X[4*k + 1]; + Bz [i + d*2] = X[4*k + 2]; + Bz [i + d*3] = X[4*k + 3]; + } + break; + } + + /* -------------------------------------------------------------- */ + /* go to the next chunk of B */ + /* -------------------------------------------------------------- */ + + Bz += d*4; + } + return true; + } + +} diff --git a/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java new file mode 100644 index 0000000..db1f92c --- /dev/null +++ b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java @@ -0,0 +1,171 @@ +/** + * KLU: a sparse LU factorization algorithm. + * Copyright (C) 2004-2009, Timothy A. Davis. + * Copyright (C) 2011, Richard W. Lincoln. + * http://www.cise.ufl.edu/research/sparse/klu + * + * ------------------------------------------------------------------------- + * + * KLU is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * KLU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this Module; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +package edu.ufl.cise.klu.tdouble; + +/** + * Sorts the columns of L and U so that the row indices appear in strictly + * increasing order. + * + */ +public class Dklu_sort { + + /** + * Sort L or U using a double-transpose. + */ + public static void sort(int n, int[] Xip, int[] Xlen, Unit LU, int[] Tp, + int[] Tj, Entry Tx, int[] W) + { + int[] Xi; + Entry Xx; + int p, i, j, len, nz, tp, xlen, pend; + + assert Dklu_valid_LU.klu_valid_LU(n, false, Xip, Xlen, LU); + + /* count the number of entries in each row of L or U */ + for (i = 0; i < n; i++) + { + W[i] = 0; + } + for (j = 0; j < n; j++) + { + get_pointer(LU, Xip, Xlen, Xi, Xx, j, len); + for (p = 0; p < len; p++) + { + W[Xi[p]]++; + } + } + + /* construct the row pointers for T */ + nz = 0; + for (i = 0; i < n; i++) + { + Tp[i] = nz; + nz += W[i]; + } + Tp[n] = nz; + for (i = 0; i < n; i++) + { + W[i] = Tp[i]; + } + + /* transpose the matrix into Tp, Ti, Tx */ + for (j = 0; j < n; j++) + { + get_pointer(LU, Xip, Xlen, Xi, Xx, j, len); + for (p = 0; p < len; p++) + { + tp = W[Xi[p]]++; + Tj[tp] = j; + Tx[tp] = Xx[p]; + } + } + + /* transpose the matrix back into Xip, Xlen, Xi, Xx */ + for (j = 0; j < n; j++) + { + W[j] = 0; + } + for (i = 0; i < n; i++) + { + pend = Tp[i+1]; + for (p = Tp[i]; p < pend; p++) + { + j = Tj[p]; + get_pointer(LU, Xip, Xlen, Xi, Xx, j, len); + xlen = W[j]++; + Xi[xlen] = i; + Xx[xlen] = Tx[p]; + } + } + + assert Dklu_valid_LU.klu_valid_LU(n, false, Xip, Xlen, LU); + } + + + public static boolean klu_sort(KLU_symbolic Symbolic, KLU_numeric Numeric, + KLU_common Common) + { + int[] R, W, Tp, Ti, Lip, Uip, Llen, Ulen; + Entry Tx; + Unit[] LUbx; + int n, nk, nz, block, nblocks, maxblock, k1; + size_t m1; + + if (Common == null) + { + return false; + } + Common.status = KLU_OK; + + n = Symbolic.n; + R = Symbolic.R; + nblocks = Symbolic.nblocks; + maxblock = Symbolic.maxblock; + + Lip = Numeric.Lip; + Llen = Numeric.Llen; + Uip = Numeric.Uip; + Ulen = Numeric.Ulen; + LUbx = (Unit[]) Numeric.LUbx; + + m1 = ((size_t) maxblock) + 1; + + /* allocate workspace */ + nz = max(Numeric.max_lnz_block, Numeric.max_unz_block); + W = Dklu_malloc.klu_malloc(maxblock, (Int) sizeof, Common); + Tp = Dklu_malloc.klu_malloc(m1, (Int) sizeof, Common); + Ti = Dklu_malloc.klu_malloc(nz, (Int) sizeof, Common); + Tx = Dklu_malloc.klu_malloc(nz, (Entry) sizeof, Common); + + printf("\n======================= Start sort:\n"); + + if (Common.status == KLU_OK) + { + /* sort each block of L and U */ + for (block = 0; block < nblocks; block++) + { + k1 = R[block]; + nk = R[block+1] - k1; + if (nk > 1) + { + printf("\n-------------------block: %d nk %d\n", block, nk); + sort(nk, Lip + k1, Llen + k1, LUbx[block], Tp, Ti, Tx, W); + sort(nk, Uip + k1, Ulen + k1, LUbx[block], Tp, Ti, Tx, W); + } + } + } + + printf("\n======================= sort done.\n"); + + /* free workspace */ + Dklu_free.klu_free(W, maxblock, (Int) sizeof, Common); + Dklu_free.klu_free(Tp, m1, (Int) sizeof, Common); + Dklu_free.klu_free(Ti, nz, (Int) sizeof, Common); + Dklu_free.klu_free(Tx, nz, (Entry) sizeof, Common); + + return Common.status == KLU_OK; + } + +} diff --git a/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java new file mode 100644 index 0000000..7b840b2 --- /dev/null +++ b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java @@ -0,0 +1,435 @@ +/** + * KLU: a sparse LU factorization algorithm. + * Copyright (C) 2004-2009, Timothy A. Davis. + * Copyright (C) 2011, Richard W. Lincoln. + * http://www.cise.ufl.edu/research/sparse/klu + * + * ------------------------------------------------------------------------- + * + * KLU is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * KLU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this Module; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +package edu.ufl.cise.klu.tdouble; + +/** + * Solve A'x=b using the symbolic and numeric objects from KLU_analyze + * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is + * performed. Uses Numeric.Xwork as workspace (undefined on input and output), + * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with + * Numeric.Iwork). + */ +public class Dklu_tsolve { + + /** + * + * @param Symbolic + * @param Numeric + * @param d leading dimension of B + * @param nrhs number of right-hand-sides + * @param B right-hand-side on input, overwritten with solution to Ax=b on + * output. Size n*nrhs, in column-oriented form, with leading dimension d. + * @return + */ + public static boolean klu_tsolve(KLU_symbolic Symbolic, + KLU_numeric Numeric, int d, int nrhs, double[] B, + KLU_common Common) + { + Entry[] x = new Entry[4]; + Entry offik, s; + double rs; + double[] Rs; + Entry[] Offx, X, Bz, Udiag; + int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen; + Unit[] LUbx; + int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i; + + /* ------------------------------------------------------------------ */ + /* check inputs */ + /* ------------------------------------------------------------------ */ + + if (Common == null) + { + return (false); + } + if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 || + B == null) + { + Common.status = KLU_INVALID; + return false; + } + Common.status = KLU_OK; + + /* ------------------------------------------------------------------ */ + /* get the contents of the Symbolic object */ + /* ------------------------------------------------------------------ */ + + Bz = (Entry) B; + n = Symbolic.n; + nblocks = Symbolic.nblocks; + Q = Symbolic.Q; + R = Symbolic.R; + + /* ------------------------------------------------------------------ */ + /* get the contents of the Numeric object */ + /* ------------------------------------------------------------------ */ + + assert (nblocks == Numeric.nblocks); + Pnum = Numeric.Pnum; + Offp = Numeric.Offp; + Offi = Numeric.Offi; + Offx = (Entry) Numeric.Offx; + + Lip = Numeric.Lip; + Llen = Numeric.Llen; + Uip = Numeric.Uip; + Ulen = Numeric.Ulen; + LUbx = (Unit[]) Numeric.LUbx; + Udiag = Numeric.Udiag; + + Rs = Numeric.Rs; + X = (Entry) Numeric.Xwork; + assert Dklu_valid.klu_valid(n, Offp, Offi, Offx); + + /* ------------------------------------------------------------------ */ + /* solve in chunks of 4 columns at a time */ + /* ------------------------------------------------------------------ */ + + for (chunk = 0; chunk < nrhs; chunk += 4) + { + + /* -------------------------------------------------------------- */ + /* get the size of the current chunk */ + /* -------------------------------------------------------------- */ + + nr = min(nrhs - chunk, 4); + + /* -------------------------------------------------------------- */ + /* permute the right hand side, X = Q'*B */ + /* -------------------------------------------------------------- */ + + switch (nr) + { + + case 1: + + for (k = 0; k < n; k++) + { + X[k] = Bz[Q[k]]; + } + break; + + case 2: + + for (k = 0; k < n; k++) + { + i = Q[k]; + X[2*k ] = Bz[i ]; + X[2*k + 1] = Bz[i + d ]; + } + break; + + case 3: + + for (k = 0; k < n; k++) + { + i = Q[k]; + X[3*k ] = Bz[i ]; + X[3*k + 1] = Bz[i + d ]; + X[3*k + 2] = Bz[i + d*2]; + } + break; + + case 4: + + for (k = 0; k < n; k++) + { + i = Q[k]; + X[4*k ] = Bz[i ]; + X[4*k + 1] = Bz[i + d ]; + X[4*k + 2] = Bz[i + d*2]; + X[4*k + 3] = Bz[i + d*3]; + } + break; + + } + + /* -------------------------------------------------------------- */ + /* solve X = (L*U + Off)'\X */ + /* -------------------------------------------------------------- */ + + for (block = 0; block < nblocks; block++) + { + + /* ---------------------------------------------------------- */ + /* the block of size nk is from rows/columns k1 to k2-1 */ + /* ---------------------------------------------------------- */ + + k1 = R[block]; + k2 = R[block+1]; + nk = k2 - k1; + printf("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk); + + /* ---------------------------------------------------------- */ + /* block back-substitution for the off-diagonal-block entries */ + /* ---------------------------------------------------------- */ + + if (block > 0) + { + switch (nr) + { + + case 1: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + for (p = Offp[k]; p < pend; p++) + { + mult_sub(X[k], Offx[p], X[Offi[p]]); + } + } + break; + + case 2: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[2*k ]; + x[1] = X[2*k + 1]; + for (p = Offp[k]; p < pend; p++) + { + i = Offi[p]; + offik = Offx[p]; + mult_sub(x[0], offik, X[2*i]); + mult_sub(x[1], offik, X[2*i + 1]); + } + X[2*k ] = x[0]; + X[2*k + 1] = x[1]; + } + break; + + case 3: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[3*k ]; + x[1] = X[3*k + 1]; + x[2] = X[3*k + 2]; + for (p = Offp[k]; p < pend; p++) + { + i = Offi[p]; + { + offik = Offx[p]; + } + mult_sub(x[0], offik, X[3*i]); + mult_sub(x[1], offik, X[3*i + 1]); + mult_sub(x[2], offik, X[3*i + 2]); + } + X[3*k ] = x[0]; + X[3*k + 1] = x[1]; + X[3*k + 2] = x[2]; + } + break; + + case 4: + + for (k = k1; k < k2; k++) + { + pend = Offp[k+1]; + x[0] = X[4*k ]; + x[1] = X[4*k + 1]; + x[2] = X[4*k + 2]; + x[3] = X[4*k + 3]; + for (p = Offp[k]; p < pend; p++) + { + i = Offi[p]; + offik = Offx[p]; + mult_sub(x[0], offik, X[4*i]); + mult_sub(x[1], offik, X[4*i + 1]); + mult_sub(x[2], offik, X[4*i + 2]); + mult_sub(x[3], offik, X[4*i + 3]); + } + X[4*k ] = x[0]; + X[4*k + 1] = x[1]; + X[4*k + 2] = x[2]; + X[4*k + 3] = x[3]; + } + break; + } + } + + /* ---------------------------------------------------------- */ + /* solve the block system */ + /* ---------------------------------------------------------- */ + + if (nk == 1) + { + s = Udiag[k1]; + switch (nr) + { + + case 1: + div(X[k1], X[k1], s); + break; + + case 2: + div(X[2*k1], X[2*k1], s); + div(X[2*k1 + 1], X[2*k1 + 1], s); + break; + + case 3: + div(X[3*k1], X[3*k1], s); + div(X[3*k1 + 1], X[3*k1 + 1], s); + div(X[3*k1 + 2], X[3*k1 + 2], s); + break; + + case 4: + div(X[4*k1], X[4*k1], s); + div(X[4*k1 + 1], X[4*k1 + 1], s); + div(X[4*k1 + 2], X[4*k1 + 2], s); + div(X[4*k1 + 3], X[4*k1 + 3], s); + break; + + } + } + else + { + KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx[block], + Udiag + k1, nr, + X + nr*k1); + KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx[block], nr, + X + nr*k1); + } + } + + /* -------------------------------------------------------------- */ + /* scale and permute the result, Bz = P'(R\X) */ + /* -------------------------------------------------------------- */ + + if (Rs == null) + { + + /* no scaling */ + switch (nr) + { + + case 1: + + for (k = 0; k < n; k++) + { + Bz[Pnum[k]] = X[k]; + } + break; + + case 2: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + Bz[i ] = X[2*k ]; + Bz[i + d ] = X[2*k + 1]; + } + break; + + case 3: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + Bz[i ] = X[3*k ]; + Bz[i + d ] = X[3*k + 1]; + Bz[i + d*2] = X[3*k + 2]; + } + break; + + case 4: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + Bz[i ] = X[4*k ]; + Bz[i + d ] = X[4*k + 1]; + Bz[i + d*2] = X[4*k + 2]; + Bz[i + d*3] = X[4*k + 3]; + } + break; + } + + } + else + { + + switch (nr) + { + + case 1: + + for (k = 0; k < n; k++) + { + scale_div_assign(Bz[Pnum[k]], X[k], Rs[k]); + } + break; + + case 2: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + rs = Rs[k]; + scale_div_assign(Bz[i], X[2*k], rs); + scale_div_assign(Bz[i + d], X[2*k + 1], rs); + } + break; + + case 3: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + rs = Rs[k]; + scale_div_assign(Bz[i], X[3*k], rs); + scale_div_assign(Bz[i + d], X[3*k + 1], rs); + scale_div_assign(Bz[i + d*2], X[3*k + 2], rs); + } + break; + + case 4: + + for (k = 0; k < n; k++) + { + i = Pnum[k]; + rs = Rs[k]; + scale_div_assign(Bz[i], X[4*k], rs); + scale_div_assign(Bz[i + d], X[4*k + 1], rs); + scale_div_assign(Bz[i + d*2], X[4*k + 2], rs); + scale_div_assign(Bz[i + d*3], X[4*k + 3], rs); + } + break; + } + } + + /* -------------------------------------------------------------- */ + /* go to the next chunk of B */ + /* -------------------------------------------------------------- */ + + Bz += d*4; + } + return true; + } + +} -- 2.11.4.GIT