From 0f68e1c8c108c92fe90a661bb0b9466ed4952239 Mon Sep 17 00:00:00 2001
From: Richard Lincoln <r.w.lincoln@gmail.com>
Date: Sat, 30 Jul 2011 19:50:12 +0100
Subject: [PATCH] Adding tdouble package.

---
 .../edu/ufl/cise/klu/tdcomplex/KLU_common.java     |   2 +-
 .../java/edu/ufl/cise/klu/tdouble/Dklu_solve.java  | 421 ++++++++++++++++++++
 .../java/edu/ufl/cise/klu/tdouble/Dklu_sort.java   | 171 ++++++++
 .../java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java | 435 +++++++++++++++++++++
 4 files changed, 1028 insertions(+), 1 deletion(-)
 create mode 100644 src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java
 create mode 100644 src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java
 create mode 100644 src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java

diff --git a/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java b/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java
index 5889ede..4f1e859 100644
--- a/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java
+++ b/src/main/java/edu/ufl/cise/klu/tdcomplex/KLU_common.java
@@ -76,5 +76,5 @@ public class KLU_common {
 
 	NativeLong memusage ;   /* current memory usage, in bytes */
 	NativeLong mempeak ;    /* peak memory usage, in bytes */
-	
+
 }
diff --git a/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java
new file mode 100644
index 0000000..04a021e
--- /dev/null
+++ b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_solve.java
@@ -0,0 +1,421 @@
+/**
+ * KLU: a sparse LU factorization algorithm.
+ * Copyright (C) 2004-2009, Timothy A. Davis.
+ * Copyright (C) 2011, Richard W. Lincoln.
+ * http://www.cise.ufl.edu/research/sparse/klu
+ *
+ * -------------------------------------------------------------------------
+ *
+ * KLU is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * KLU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this Module; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+package edu.ufl.cise.klu.tdouble;
+
+/**
+ * Solve Ax=b using the symbolic and numeric objects from KLU_analyze
+ * (or KLU_analyze_given) and KLU_factor.  Note that no iterative refinement is
+ * performed.  Uses Numeric.Xwork as workspace (undefined on input and output),
+ * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
+ * Numeric.Iwork).
+ */
+public class Dklu_solve {
+
+	/**
+	 *
+	 * @param Symbolic
+	 * @param Numeric
+	 * @param d leading dimension of B
+	 * @param nrhs number of right-hand-sides
+	 * @param B right-hand-side on input, overwritten with solution to Ax=b on
+	 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
+	 * @param Common
+	 * @return
+	 */
+	public static boolean klu_solve(KLU_symbolic Symbolic, KLU_numeric Numeric,
+			int d, int nrhs, double[] B, KLU_common Common)
+	{
+		Entry offik, s;
+		Entry[] x = new Entry[4];
+		double rs, Rs;
+		Entry Offx, X, Bz, Udiag;
+		int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen;
+		Unit[] LUbx;
+		int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i;
+
+		/* ------------------------------------------------------------------ */
+		/* check inputs */
+		/* ------------------------------------------------------------------ */
+
+		if (Common == null)
+		{
+			return false;
+		}
+		if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 ||
+			B == null)
+		{
+			Common.status = KLU_INVALID;
+			return false;
+		}
+		Common.status = KLU_OK;
+
+		/* ------------------------------------------------------------------ */
+		/* get the contents of the Symbolic object */
+		/* ------------------------------------------------------------------ */
+
+		Bz = (Entry) B;
+		n = Symbolic.n;
+		nblocks = Symbolic.nblocks;
+		Q = Symbolic.Q;
+		R = Symbolic.R;
+
+		/* ------------------------------------------------------------------ */
+		/* get the contents of the Numeric object */
+		/* ------------------------------------------------------------------ */
+
+		assert (nblocks == Numeric.nblocks);
+		Pnum = Numeric.Pnum;
+		Offp = Numeric.Offp;
+		Offi = Numeric.Offi;
+		Offx = (Entry) Numeric.Offx;
+
+		Lip  = Numeric.Lip;
+		Llen = Numeric.Llen;
+		Uip  = Numeric.Uip;
+		Ulen = Numeric.Ulen;
+		LUbx = (Unit[]) Numeric.LUbx;
+		Udiag = Numeric.Udiag;
+
+		Rs = Numeric.Rs;
+		X = (Entry) Numeric.Xwork;
+
+		assert Dklu_valid.klu_valid(n, Offp, Offi, Offx);
+
+		/* ------------------------------------------------------------------ */
+		/* solve in chunks of 4 columns at a time */
+		/* ------------------------------------------------------------------ */
+
+		for (chunk = 0; chunk < nrhs; chunk += 4)
+		{
+
+			/* -------------------------------------------------------------- */
+			/* get the size of the current chunk */
+			/* -------------------------------------------------------------- */
+
+			nr = min(nrhs - chunk, 4);
+
+			/* -------------------------------------------------------------- */
+			/* scale and permute the right hand side, X = P*(R\B) */
+			/* -------------------------------------------------------------- */
+
+			if (Rs == null)
+			{
+
+				/* no scaling */
+				switch (nr)
+				{
+
+					case 1:
+
+						for (k = 0; k < n; k++)
+						{
+							X[k] = Bz[Pnum[k]];
+						}
+						break;
+
+					case 2:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							X[2*k    ] = Bz[i    ];
+							X[2*k + 1] = Bz[i + d];
+						}
+						break;
+
+					case 3:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							X[3*k    ] = Bz[i      ];
+							X[3*k + 1] = Bz[i + d  ];
+							X[3*k + 2] = Bz[i + d*2];
+						}
+						break;
+
+					case 4:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							X[4*k    ] = Bz[i      ];
+							X[4*k + 1] = Bz[i + d  ];
+							X[4*k + 2] = Bz[i + d*2];
+							X[4*k + 3] = Bz[i + d*3];
+						}
+						break;
+				}
+
+			}
+			else
+			{
+
+				switch (nr)
+				{
+
+					case 1:
+
+						for (k = 0; k < n; k++)
+						{
+							scale_div_assign(X[k], Bz[Pnum[k]], Rs[k]);
+						}
+						break;
+
+					case 2:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							rs = Rs[k];
+							scale_div_assign(X[2*k], Bz[i], rs);
+							scale_div_assign(X[2*k + 1], Bz[i + d], rs);
+						}
+						break;
+
+					case 3:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							rs = Rs[k];
+							scale_div_assign(X[3*k], Bz[i], rs);
+							scale_div_assign(X[3*k + 1], Bz[i + d], rs);
+							scale_div_assign(X[3*k + 2], Bz[i + d*2], rs);
+						}
+						break;
+
+					case 4:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							rs = Rs[k];
+							scale_div_assign(X[4*k], Bz[i], rs);
+							scale_div_assign(X[4*k + 1], Bz[i + d], rs);
+							scale_div_assign(X[4*k + 2], Bz[i + d*2], rs);
+							scale_div_assign(X[4*k + 3], Bz[i + d*3], rs);
+						}
+						break;
+				}
+
+			}
+
+			/* -------------------------------------------------------------- */
+			/* solve X = (L*U + Off)\X */
+			/* -------------------------------------------------------------- */
+
+			for (block = nblocks-1; block >= 0; block--)
+			{
+
+				/* ---------------------------------------------------------- */
+				/* the block of size nk is from rows/columns k1 to k2-1 */
+				/* ---------------------------------------------------------- */
+
+				k1 = R[block];
+				k2 = R[block+1];
+				nk = k2 - k1;
+				printf("solve %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk);
+
+				/* solve the block system */
+				if (nk == 1)
+				{
+					s = Udiag[k1];
+					switch (nr)
+					{
+
+						case 1:
+							div(X[k1], X[k1], s);
+							break;
+
+						case 2:
+							div(X[2*k1], X[2*k1], s);
+							div(X[2*k1 + 1], X[2*k1 + 1], s);
+							break;
+
+						case 3:
+							div(X[3*k1], X[3*k1], s);
+							div(X[3*k1 + 1], X[3*k1 + 1], s);
+							div(X[3*k1 + 2], X[3*k1 + 2], s);
+							break;
+
+						case 4:
+							div(X[4*k1], X[4*k1], s);
+							div(X[4*k1 + 1], X[4*k1 + 1], s);
+							div(X[4*k1 + 2], X[4*k1 + 2], s);
+							div(X[4*k1 + 3], X[4*k1 + 3], s);
+							break;
+
+					}
+				}
+				else
+				{
+					Dklu_lsolve.klu_lsolve(nk, Lip + k1, Llen + k1, LUbx[block],
+							nr, X + nr*k1);
+					Dklu_usolve.klu_usolve(nk, Uip + k1, Ulen + k1, LUbx[block],
+							Udiag + k1, nr, X + nr*k1);
+				}
+
+				/* ---------------------------------------------------------- */
+				/* block back-substitution for the off-diagonal-block entries */
+				/* ---------------------------------------------------------- */
+
+				if (block > 0)
+				{
+					switch (nr)
+					{
+
+						case 1:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[k];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									mult_sub(X[Offi[p]], Offx[p], x[0]);
+								}
+							}
+							break;
+
+						case 2:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[2*k    ];
+								x[1] = X[2*k + 1];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									i = Offi[p];
+									offik = Offx[p];
+									mult_sub(X[2*i], offik, x[0]);
+									mult_sub(X[2*i + 1], offik, x[1]);
+								}
+							}
+							break;
+
+						case 3:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[3*k    ];
+								x[1] = X[3*k + 1];
+								x[2] = X[3*k + 2];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									i = Offi[p];
+									offik = Offx[p];
+									mult_sub(X[3*i], offik, x[0]);
+									mult_sub(X[3*i + 1], offik, x[1]);
+									mult_sub(X[3*i + 2], offik, x[2]);
+								}
+							}
+							break;
+
+						case 4:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[4*k    ];
+								x[1] = X[4*k + 1];
+								x[2] = X[4*k + 2];
+								x[3] = X[4*k + 3];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									i = Offi[p];
+									offik = Offx[p];
+									mult_sub(X[4*i], offik, x[0]);
+									mult_sub(X[4*i + 1], offik, x[1]);
+									mult_sub(X[4*i + 2], offik, x[2]);
+									mult_sub(X[4*i + 3], offik, x[3]);
+								}
+							}
+							break;
+					}
+
+				}
+			}
+
+			/* -------------------------------------------------------------- */
+			/* permute the result, Bz  = Q*X */
+			/* -------------------------------------------------------------- */
+
+			switch (nr) {
+
+				case 1:
+
+					for (k = 0; k < n; k++)
+					{
+						Bz[Q[k]] = X[k];
+					}
+					break;
+
+				case 2:
+
+					for (k = 0; k < n; k++)
+					{
+						i = Q[k];
+						Bz[i      ] = X[2*k    ];
+						Bz[i + d  ] = X[2*k + 1];
+					}
+					break;
+
+				case 3:
+
+					for (k = 0; k < n; k++)
+					{
+						i = Q[k];
+						Bz [i      ] = X[3*k    ];
+						Bz [i + d  ] = X[3*k + 1];
+						Bz [i + d*2] = X[3*k + 2];
+					}
+					break;
+
+				case 4:
+
+					for (k = 0; k < n; k++)
+					{
+						i = Q[k];
+						Bz [i      ] = X[4*k    ];
+						Bz [i + d  ] = X[4*k + 1];
+						Bz [i + d*2] = X[4*k + 2];
+						Bz [i + d*3] = X[4*k + 3];
+					}
+					break;
+			}
+
+			/* -------------------------------------------------------------- */
+			/* go to the next chunk of B */
+			/* -------------------------------------------------------------- */
+
+			Bz += d*4;
+		}
+		return true;
+	}
+
+}
diff --git a/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java
new file mode 100644
index 0000000..db1f92c
--- /dev/null
+++ b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_sort.java
@@ -0,0 +1,171 @@
+/**
+ * KLU: a sparse LU factorization algorithm.
+ * Copyright (C) 2004-2009, Timothy A. Davis.
+ * Copyright (C) 2011, Richard W. Lincoln.
+ * http://www.cise.ufl.edu/research/sparse/klu
+ *
+ * -------------------------------------------------------------------------
+ *
+ * KLU is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * KLU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this Module; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+package edu.ufl.cise.klu.tdouble;
+
+/**
+ * Sorts the columns of L and U so that the row indices appear in strictly
+ * increasing order.
+ *
+ */
+public class Dklu_sort {
+
+	/**
+	 * Sort L or U using a double-transpose.
+	 */
+	public static void sort(int n, int[] Xip, int[] Xlen, Unit LU, int[] Tp,
+			int[] Tj, Entry Tx, int[] W)
+	{
+		int[] Xi;
+		Entry Xx;
+		int p, i, j, len, nz, tp, xlen, pend;
+
+		assert Dklu_valid_LU.klu_valid_LU(n, false, Xip, Xlen, LU);
+
+		/* count the number of entries in each row of L or U */
+		for (i = 0; i < n; i++)
+		{
+			W[i] = 0;
+		}
+		for (j = 0; j < n; j++)
+		{
+			get_pointer(LU, Xip, Xlen, Xi, Xx, j, len);
+			for (p = 0; p < len; p++)
+			{
+				W[Xi[p]]++;
+			}
+		}
+
+		/* construct the row pointers for T */
+		nz = 0;
+		for (i = 0; i < n; i++)
+		{
+			Tp[i] = nz;
+			nz += W[i];
+		}
+		Tp[n] = nz;
+		for (i = 0; i < n; i++)
+		{
+			W[i] = Tp[i];
+		}
+
+		/* transpose the matrix into Tp, Ti, Tx */
+		for (j = 0; j < n; j++)
+		{
+			get_pointer(LU, Xip, Xlen, Xi, Xx, j, len);
+			for (p = 0; p < len; p++)
+			{
+				tp = W[Xi[p]]++;
+				Tj[tp] = j;
+				Tx[tp] = Xx[p];
+			}
+		}
+
+		/* transpose the matrix back into Xip, Xlen, Xi, Xx */
+		for (j = 0; j < n; j++)
+		{
+			W[j] = 0;
+		}
+		for (i = 0; i < n; i++)
+		{
+			pend = Tp[i+1];
+			for (p = Tp[i]; p < pend; p++)
+			{
+				j = Tj[p];
+				get_pointer(LU, Xip, Xlen, Xi, Xx, j, len);
+				xlen = W[j]++;
+				Xi[xlen] = i;
+				Xx[xlen] = Tx[p];
+			}
+		}
+
+		assert Dklu_valid_LU.klu_valid_LU(n, false, Xip, Xlen, LU);
+	}
+
+
+	public static boolean klu_sort(KLU_symbolic Symbolic, KLU_numeric Numeric,
+			KLU_common Common)
+	{
+		int[] R, W, Tp, Ti, Lip, Uip, Llen, Ulen;
+		Entry Tx;
+		Unit[] LUbx;
+		int n, nk, nz, block, nblocks, maxblock, k1;
+		size_t m1;
+
+		if (Common == null)
+		{
+			return false;
+		}
+		Common.status = KLU_OK;
+
+		n = Symbolic.n;
+		R = Symbolic.R;
+		nblocks = Symbolic.nblocks;
+		maxblock = Symbolic.maxblock;
+
+		Lip  = Numeric.Lip;
+		Llen = Numeric.Llen;
+		Uip  = Numeric.Uip;
+		Ulen = Numeric.Ulen;
+		LUbx = (Unit[]) Numeric.LUbx;
+
+		m1 = ((size_t) maxblock) + 1;
+
+		/* allocate workspace */
+		nz = max(Numeric.max_lnz_block, Numeric.max_unz_block);
+		W  = Dklu_malloc.klu_malloc(maxblock, (Int) sizeof, Common);
+		Tp = Dklu_malloc.klu_malloc(m1, (Int) sizeof, Common);
+		Ti = Dklu_malloc.klu_malloc(nz, (Int) sizeof, Common);
+		Tx = Dklu_malloc.klu_malloc(nz, (Entry) sizeof, Common);
+
+		printf("\n======================= Start sort:\n");
+
+		if (Common.status == KLU_OK)
+		{
+			/* sort each block of L and U */
+			for (block = 0; block < nblocks; block++)
+			{
+				k1 = R[block];
+				nk = R[block+1] - k1;
+				if (nk > 1)
+				{
+					printf("\n-------------------block: %d nk %d\n", block, nk);
+					sort(nk, Lip + k1, Llen + k1, LUbx[block], Tp, Ti, Tx, W);
+					sort(nk, Uip + k1, Ulen + k1, LUbx[block], Tp, Ti, Tx, W);
+				}
+			}
+		}
+
+		printf("\n======================= sort done.\n");
+
+		/* free workspace */
+		Dklu_free.klu_free(W, maxblock, (Int) sizeof, Common);
+		Dklu_free.klu_free(Tp, m1, (Int) sizeof, Common);
+		Dklu_free.klu_free(Ti, nz, (Int) sizeof, Common);
+		Dklu_free.klu_free(Tx, nz, (Entry) sizeof, Common);
+
+		return Common.status == KLU_OK;
+	}
+
+}
diff --git a/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java
new file mode 100644
index 0000000..7b840b2
--- /dev/null
+++ b/src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java
@@ -0,0 +1,435 @@
+/**
+ * KLU: a sparse LU factorization algorithm.
+ * Copyright (C) 2004-2009, Timothy A. Davis.
+ * Copyright (C) 2011, Richard W. Lincoln.
+ * http://www.cise.ufl.edu/research/sparse/klu
+ *
+ * -------------------------------------------------------------------------
+ *
+ * KLU is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * KLU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this Module; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+package edu.ufl.cise.klu.tdouble;
+
+/**
+ * Solve A'x=b using the symbolic and numeric objects from KLU_analyze
+ * (or KLU_analyze_given) and KLU_factor.  Note that no iterative refinement is
+ * performed.  Uses Numeric.Xwork as workspace (undefined on input and output),
+ * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
+ * Numeric.Iwork).
+ */
+public class Dklu_tsolve {
+
+	/**
+	 *
+	 * @param Symbolic
+	 * @param Numeric
+	 * @param d leading dimension of B
+	 * @param nrhs number of right-hand-sides
+	 * @param B right-hand-side on input, overwritten with solution to Ax=b on
+	 * output. Size n*nrhs, in column-oriented form, with leading dimension d.
+	 * @return
+	 */
+	public static boolean klu_tsolve(KLU_symbolic Symbolic,
+			KLU_numeric Numeric, int d, int nrhs, double[] B,
+			KLU_common Common)
+	{
+		Entry[] x = new Entry[4];
+		Entry offik, s;
+		double rs;
+		double[] Rs;
+		Entry[] Offx, X, Bz, Udiag;
+		int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen;
+		Unit[] LUbx;
+		int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i;
+
+		/* ------------------------------------------------------------------ */
+		/* check inputs */
+		/* ------------------------------------------------------------------ */
+
+		if (Common == null)
+		{
+			return (false);
+		}
+		if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 ||
+			B == null)
+		{
+			Common.status = KLU_INVALID;
+			return false;
+		}
+		Common.status = KLU_OK;
+
+		/* ------------------------------------------------------------------ */
+		/* get the contents of the Symbolic object */
+		/* ------------------------------------------------------------------ */
+
+		Bz = (Entry) B;
+		n = Symbolic.n;
+		nblocks = Symbolic.nblocks;
+		Q = Symbolic.Q;
+		R = Symbolic.R;
+
+		/* ------------------------------------------------------------------ */
+		/* get the contents of the Numeric object */
+		/* ------------------------------------------------------------------ */
+
+		assert (nblocks == Numeric.nblocks);
+		Pnum = Numeric.Pnum;
+		Offp = Numeric.Offp;
+		Offi = Numeric.Offi;
+		Offx = (Entry) Numeric.Offx;
+
+		Lip  = Numeric.Lip;
+		Llen = Numeric.Llen;
+		Uip  = Numeric.Uip;
+		Ulen = Numeric.Ulen;
+		LUbx = (Unit[]) Numeric.LUbx;
+		Udiag = Numeric.Udiag;
+
+		Rs = Numeric.Rs;
+		X = (Entry) Numeric.Xwork;
+		assert Dklu_valid.klu_valid(n, Offp, Offi, Offx);
+
+		/* ------------------------------------------------------------------ */
+		/* solve in chunks of 4 columns at a time */
+		/* ------------------------------------------------------------------ */
+
+		for (chunk = 0; chunk < nrhs; chunk += 4)
+		{
+
+			/* -------------------------------------------------------------- */
+			/* get the size of the current chunk */
+			/* -------------------------------------------------------------- */
+
+			nr = min(nrhs - chunk, 4);
+
+			/* -------------------------------------------------------------- */
+			/* permute the right hand side, X = Q'*B */
+			/* -------------------------------------------------------------- */
+
+			switch (nr)
+			{
+
+				case 1:
+
+					for (k = 0; k < n; k++)
+					{
+						X[k] = Bz[Q[k]];
+					}
+					break;
+
+				case 2:
+
+					for (k = 0; k < n; k++)
+					{
+						i = Q[k];
+						X[2*k    ] = Bz[i      ];
+						X[2*k + 1] = Bz[i + d  ];
+					}
+					break;
+
+				case 3:
+
+					for (k = 0; k < n; k++)
+					{
+						i = Q[k];
+						X[3*k    ] = Bz[i      ];
+						X[3*k + 1] = Bz[i + d  ];
+						X[3*k + 2] = Bz[i + d*2];
+					}
+					break;
+
+				case 4:
+
+					for (k = 0; k < n; k++)
+					{
+						i = Q[k];
+						X[4*k    ] = Bz[i      ];
+						X[4*k + 1] = Bz[i + d  ];
+						X[4*k + 2] = Bz[i + d*2];
+						X[4*k + 3] = Bz[i + d*3];
+					}
+					break;
+
+			}
+
+			/* -------------------------------------------------------------- */
+			/* solve X = (L*U + Off)'\X */
+			/* -------------------------------------------------------------- */
+
+			for (block = 0; block < nblocks; block++)
+			{
+
+				/* ---------------------------------------------------------- */
+				/* the block of size nk is from rows/columns k1 to k2-1 */
+				/* ---------------------------------------------------------- */
+
+				k1 = R[block];
+				k2 = R[block+1];
+				nk = k2 - k1;
+				printf("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk);
+
+				/* ---------------------------------------------------------- */
+				/* block back-substitution for the off-diagonal-block entries */
+				/* ---------------------------------------------------------- */
+
+				if (block > 0)
+				{
+					switch (nr)
+						{
+
+						case 1:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									mult_sub(X[k], Offx[p], X[Offi[p]]);
+								}
+							}
+							break;
+
+						case 2:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[2*k    ];
+								x[1] = X[2*k + 1];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									i = Offi[p];
+									offik = Offx[p];
+									mult_sub(x[0], offik, X[2*i]);
+									mult_sub(x[1], offik, X[2*i + 1]);
+								}
+								X[2*k    ] = x[0];
+								X[2*k + 1] = x[1];
+							}
+							break;
+
+						case 3:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[3*k    ];
+								x[1] = X[3*k + 1];
+								x[2] = X[3*k + 2];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									i = Offi[p];
+									{
+										offik = Offx[p];
+									}
+									mult_sub(x[0], offik, X[3*i]);
+									mult_sub(x[1], offik, X[3*i + 1]);
+									mult_sub(x[2], offik, X[3*i + 2]);
+								}
+								X[3*k    ] = x[0];
+								X[3*k + 1] = x[1];
+								X[3*k + 2] = x[2];
+							}
+							break;
+
+						case 4:
+
+							for (k = k1; k < k2; k++)
+							{
+								pend = Offp[k+1];
+								x[0] = X[4*k    ];
+								x[1] = X[4*k + 1];
+								x[2] = X[4*k + 2];
+								x[3] = X[4*k + 3];
+								for (p = Offp[k]; p < pend; p++)
+								{
+									i = Offi[p];
+									offik = Offx[p];
+									mult_sub(x[0], offik, X[4*i]);
+									mult_sub(x[1], offik, X[4*i + 1]);
+									mult_sub(x[2], offik, X[4*i + 2]);
+									mult_sub(x[3], offik, X[4*i + 3]);
+								}
+								X[4*k    ] = x[0];
+								X[4*k + 1] = x[1];
+								X[4*k + 2] = x[2];
+								X[4*k + 3] = x[3];
+							}
+							break;
+						}
+				}
+
+				/* ---------------------------------------------------------- */
+				/* solve the block system */
+				/* ---------------------------------------------------------- */
+
+				if (nk == 1)
+				{
+					s = Udiag[k1];
+					switch (nr)
+					{
+
+						case 1:
+							div(X[k1], X[k1], s);
+							break;
+
+						case 2:
+							div(X[2*k1], X[2*k1], s);
+							div(X[2*k1 + 1], X[2*k1 + 1], s);
+							break;
+
+						case 3:
+							div(X[3*k1], X[3*k1], s);
+							div(X[3*k1 + 1], X[3*k1 + 1], s);
+							div(X[3*k1 + 2], X[3*k1 + 2], s);
+							break;
+
+						case 4:
+							div(X[4*k1], X[4*k1], s);
+							div(X[4*k1 + 1], X[4*k1 + 1], s);
+							div(X[4*k1 + 2], X[4*k1 + 2], s);
+							div(X[4*k1 + 3], X[4*k1 + 3], s);
+							break;
+
+					}
+				}
+				else
+				{
+					KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx[block],
+							Udiag + k1, nr,
+							X + nr*k1);
+					KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx[block], nr,
+							X + nr*k1);
+				}
+			}
+
+			/* -------------------------------------------------------------- */
+			/* scale and permute the result, Bz  = P'(R\X) */
+			/* -------------------------------------------------------------- */
+
+			if (Rs == null)
+			{
+
+				/* no scaling */
+				switch (nr)
+				{
+
+					case 1:
+
+						for (k = 0; k < n; k++)
+						{
+							Bz[Pnum[k]] = X[k];
+						}
+						break;
+
+					case 2:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							Bz[i      ] = X[2*k    ];
+							Bz[i + d  ] = X[2*k + 1];
+						}
+						break;
+
+					case 3:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							Bz[i      ] = X[3*k    ];
+							Bz[i + d  ] = X[3*k + 1];
+							Bz[i + d*2] = X[3*k + 2];
+						}
+						break;
+
+					case 4:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							Bz[i      ] = X[4*k    ];
+							Bz[i + d  ] = X[4*k + 1];
+							Bz[i + d*2] = X[4*k + 2];
+							Bz[i + d*3] = X[4*k + 3];
+						}
+						break;
+				}
+
+			}
+			else
+			{
+
+				switch (nr)
+				{
+
+					case 1:
+
+						for (k = 0; k < n; k++)
+						{
+							scale_div_assign(Bz[Pnum[k]], X[k], Rs[k]);
+						}
+						break;
+
+					case 2:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							rs = Rs[k];
+							scale_div_assign(Bz[i], X[2*k], rs);
+							scale_div_assign(Bz[i + d], X[2*k + 1], rs);
+						}
+						break;
+
+					case 3:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							rs = Rs[k];
+							scale_div_assign(Bz[i], X[3*k], rs);
+							scale_div_assign(Bz[i + d], X[3*k + 1], rs);
+							scale_div_assign(Bz[i + d*2], X[3*k + 2], rs);
+						}
+						break;
+
+					case 4:
+
+						for (k = 0; k < n; k++)
+						{
+							i = Pnum[k];
+							rs = Rs[k];
+							scale_div_assign(Bz[i], X[4*k], rs);
+							scale_div_assign(Bz[i + d], X[4*k + 1], rs);
+							scale_div_assign(Bz[i + d*2], X[4*k + 2], rs);
+							scale_div_assign(Bz[i + d*3], X[4*k + 3], rs);
+						}
+						break;
+				}
+			}
+
+			/* -------------------------------------------------------------- */
+			/* go to the next chunk of B */
+			/* -------------------------------------------------------------- */
+
+			Bz  += d*4;
+		}
+		return true;
+	}
+
+}
-- 
2.11.4.GIT