src/main/java/edu/ufl/cise/klu/tdouble/Dklu_tsolve.java

   1 /**
   2  * KLU: a sparse LU factorization algorithm.
   3  * Copyright (C) 2004-2009, Timothy A. Davis.
   4  * Copyright (C) 2011-2012, Richard W. Lincoln.
   5  * http://www.cise.ufl.edu/research/sparse/klu
   6  *
   7  * -------------------------------------------------------------------------
   8  *
   9  * KLU is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * KLU is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this Module; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  22  *
  23  */
  24
  25 package edu.ufl.cise.klu.tdouble;
  26
  27 import edu.ufl.cise.klu.common.KLU_common;
  28 import edu.ufl.cise.klu.common.KLU_numeric;
  29 import edu.ufl.cise.klu.common.KLU_symbolic;
  30
  31 import static edu.ufl.cise.klu.tdouble.Dklu_dump.klu_valid;
  32 import static edu.ufl.cise.klu.tdouble.Dklu.klu_ltsolve;
  33 import static edu.ufl.cise.klu.tdouble.Dklu.klu_utsolve;
  34
  35 /**
  36  * Solve A'x=b using the symbolic and numeric objects from KLU_analyze
  37  * (or KLU_analyze_given) and KLU_factor.  Note that no iterative refinement is
  38  * performed.  Uses Numeric.Xwork as workspace (undefined on input and output),
  39  * of size 4n double's (note that columns 2 to 4 of Xwork overlap with
  40  * Numeric.Iwork).
  41  */
  42 public class Dklu_tsolve extends Dklu_internal {
  43
  44         /**
  45          *
  46          * @param Symbolic
  47          * @param Numeric
  48          * @param d leading dimension of B
  49          * @param nrhs number of right-hand-sides
  50          * @param B right-hand-side on input, overwritten with solution to Ax=b on
  51          * output. Size n*nrhs, in column-oriented form, with leading dimension d.
  52          * @return
  53          */
  54         public static int klu_tsolve(KLU_symbolic Symbolic,
  55                         KLU_numeric Numeric, int d, int nrhs,
  56                         double[] B, int B_offset, KLU_common Common)
  57         {
  58                 double[] x = new double[4] ;
  59                 double offik, s ;
  60                 double rs ;
  61                 double[] Rs ;
  62                 double[] Offx, X, Bz, Udiag ;
  63                 int[] Q, R, Pnum, Offp, Offi, Lip, Uip, Llen, Ulen ;
  64                 double[][] LUbx ;
  65                 int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
  66
  67                 /* ---------------------------------------------------------------------- */
  68                 /* check inputs */
  69                 /* ---------------------------------------------------------------------- */
  70
  71                 if (Common == null)
  72                 {
  73                         return (FALSE) ;
  74                 }
  75                 if (Numeric == null || Symbolic == null || d < Symbolic.n || nrhs < 0 ||
  76                         B == null)
  77                 {
  78                         Common.status = KLU_INVALID ;
  79                         return (FALSE) ;
  80                 }
  81                 Common.status = KLU_OK ;
  82
  83                 /* ---------------------------------------------------------------------- */
  84                 /* get the contents of the Symbolic object */
  85                 /* ---------------------------------------------------------------------- */
  86
  87                 Bz = B ;
  88                 n = Symbolic.n ;
  89                 nblocks = Symbolic.nblocks ;
  90                 Q = Symbolic.Q ;
  91                 R = Symbolic.R ;
  92
  93                 /* ---------------------------------------------------------------------- */
  94                 /* get the contents of the Numeric object */
  95                 /* ---------------------------------------------------------------------- */
  96
  97                 ASSERT (nblocks == Numeric.nblocks) ;
  98                 Pnum = Numeric.Pnum ;
  99                 Offp = Numeric.Offp ;
 100                 Offi = Numeric.Offi ;
 101                 Offx = Numeric.Offx ;
 102
 103                 Lip  = Numeric.Lip ;
 104                 Llen = Numeric.Llen ;
 105                 Uip  = Numeric.Uip ;
 106                 Ulen = Numeric.Ulen ;
 107                 LUbx = Numeric.LUbx ;
 108                 Udiag = Numeric.Udiag ;
 109
 110                 Rs = Numeric.Rs ;
 111                 X = Numeric.Xwork ;
 112                 if (!NDEBUG) ASSERT (klu_valid (n, Offp, Offi, Offx)) ;
 113
 114                 /* ---------------------------------------------------------------------- */
 115                 /* solve in chunks of 4 columns at a time */
 116                 /* ---------------------------------------------------------------------- */
 117
 118                 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
 119                 {
 120
 121                         /* ------------------------------------------------------------------ */
 122                         /* get the size of the current chunk */
 123                         /* ------------------------------------------------------------------ */
 124
 125                         nr = MIN (nrhs - chunk, 4) ;
 126
 127                         /* ------------------------------------------------------------------ */
 128                         /* permute the right hand side, X = Q'*B */
 129                         /* ------------------------------------------------------------------ */
 130
 131                         switch (nr)
 132                         {
 133
 134                                 case 1:
 135
 136                                         for (k = 0 ; k < n ; k++)
 137                                         {
 138                                                 X [k] = Bz  [B_offset + Q [k]] ;
 139                                         }
 140                                         break ;
 141
 142                                 case 2:
 143
 144                                         for (k = 0 ; k < n ; k++)
 145                                         {
 146                                                 i = Q [k] ;
 147                                                 X [2*k    ] = Bz [B_offset + i      ] ;
 148                                                 X [2*k + 1] = Bz [B_offset + i + d  ] ;
 149                                         }
 150                                         break ;
 151
 152                                 case 3:
 153
 154                                         for (k = 0 ; k < n ; k++)
 155                                         {
 156                                                 i = Q [k] ;
 157                                                 X [3*k    ] = Bz [B_offset + i      ] ;
 158                                                 X [3*k + 1] = Bz [B_offset + i + d  ] ;
 159                                                 X [3*k + 2] = Bz [B_offset + i + d*2] ;
 160                                         }
 161                                         break ;
 162
 163                                 case 4:
 164
 165                                         for (k = 0 ; k < n ; k++)
 166                                         {
 167                                                 i = Q [k] ;
 168                                                 X [4*k    ] = Bz [B_offset + i      ] ;
 169                                                 X [4*k + 1] = Bz [B_offset + i + d  ] ;
 170                                                 X [4*k + 2] = Bz [B_offset + i + d*2] ;
 171                                                 X [4*k + 3] = Bz [B_offset + i + d*3] ;
 172                                         }
 173                                         break ;
 174
 175                         }
 176
 177                         /* ------------------------------------------------------------------ */
 178                         /* solve X = (L*U + Off)'\X */
 179                         /* ------------------------------------------------------------------ */
 180
 181                         for (block = 0 ; block < nblocks ; block++)
 182                         {
 183
 184                                 /* -------------------------------------------------------------- */
 185                                 /* the block of size nk is from rows/columns k1 to k2-1 */
 186                                 /* -------------------------------------------------------------- */
 187
 188                                 k1 = R [block] ;
 189                                 k2 = R [block+1] ;
 190                                 nk = k2 - k1 ;
 191                                 PRINTF ("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk) ;
 192
 193                                 /* -------------------------------------------------------------- */
 194                                 /* block back-substitution for the off-diagonal-block entries */
 195                                 /* -------------------------------------------------------------- */
 196
 197                                 if (block > 0)
 198                                 {
 199                                         switch (nr)
 200                                         {
 201
 202                                         case 1:
 203
 204                                                 for (k = k1 ; k < k2 ; k++)
 205                                                 {
 206                                                         pend = Offp [k+1] ;
 207                                                         for (p = Offp [k] ; p < pend ; p++)
 208                                                         {
 209                                                                 {
 210                                                                         //MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
 211                                                                         X [k] -= Offx [p] * X [Offi [p]] ;
 212                                                                 }
 213                                                         }
 214                                                 }
 215                                                 break ;
 216
 217                                         case 2:
 218
 219                                                 for (k = k1 ; k < k2 ; k++)
 220                                                 {
 221                                                         pend = Offp [k+1] ;
 222                                                         x [0] = X [2*k    ] ;
 223                                                         x [1] = X [2*k + 1] ;
 224                                                         for (p = Offp [k] ; p < pend ; p++)
 225                                                         {
 226                                                                 i = Offi [p] ;
 227                                                                 {
 228                                                                         offik = Offx [p] ;
 229                                                                 }
 230                                                                 //MULT_SUB (x [0], offik, X [2*i]) ;
 231                                                                 x [0] -= offik * X [2*i] ;
 232                                                                 //MULT_SUB (x [1], offik, X [2*i + 1]) ;
 233                                                                 x [1] -= offik * X [2*i + 1] ;
 234                                                         }
 235                                                         X [2*k    ] = x [0] ;
 236                                                         X [2*k + 1] = x [1] ;
 237                                                 }
 238                                                 break ;
 239
 240                                         case 3:
 241
 242                                                 for (k = k1 ; k < k2 ; k++)
 243                                                 {
 244                                                         pend = Offp [k+1] ;
 245                                                         x [0] = X [3*k    ] ;
 246                                                         x [1] = X [3*k + 1] ;
 247                                                         x [2] = X [3*k + 2] ;
 248                                                         for (p = Offp [k] ; p < pend ; p++)
 249                                                         {
 250                                                                 i = Offi [p] ;
 251                                                                 {
 252                                                                         offik = Offx [p] ;
 253                                                                 }
 254                                                                 //MULT_SUB (x [0], offik, X [3*i]) ;
 255                                                                 x [0] -= offik * X [3*i] ;
 256                                                                 //MULT_SUB (x [1], offik, X [3*i + 1]) ;
 257                                                                 x [1] -= offik * X [3*i + 1] ;
 258                                                                 //MULT_SUB (x [2], offik, X [3*i + 2]) ;
 259                                                                 x [2] -= offik * X [3*i + 2] ;
 260                                                         }
 261                                                         X [3*k    ] = x [0] ;
 262                                                         X [3*k + 1] = x [1] ;
 263                                                         X [3*k + 2] = x [2] ;
 264                                                 }
 265                                                 break ;
 266
 267                                         case 4:
 268
 269                                                 for (k = k1 ; k < k2 ; k++)
 270                                                 {
 271                                                         pend = Offp [k+1] ;
 272                                                         x [0] = X [4*k    ] ;
 273                                                         x [1] = X [4*k + 1] ;
 274                                                         x [2] = X [4*k + 2] ;
 275                                                         x [3] = X [4*k + 3] ;
 276                                                         for (p = Offp [k] ; p < pend ; p++)
 277                                                         {
 278                                                                 i = Offi [p] ;
 279                                                                 {
 280                                                                         offik = Offx [p] ;
 281                                                                 }
 282                                                                 //MULT_SUB (x [0], offik, X [4*i]) ;
 283                                                                 x [0] -= offik * X [4*i] ;
 284                                                                 //MULT_SUB (x [1], offik, X [4*i + 1]) ;
 285                                                                 x [1] -= offik * X [4*i + 1] ;
 286                                                                 //MULT_SUB (x [2], offik, X [4*i + 2]) ;
 287                                                                 x [2] -= offik * X [4*i + 2] ;
 288                                                                 //MULT_SUB (x [3], offik, X [4*i + 3]) ;
 289                                                                 x [3] -= offik * X [4*i + 3] ;
 290                                                         }
 291                                                         X [4*k    ] = x [0] ;
 292                                                         X [4*k + 1] = x [1] ;
 293                                                         X [4*k + 2] = x [2] ;
 294                                                         X [4*k + 3] = x [3] ;
 295                                                 }
 296                                                 break ;
 297                                         }
 298                                 }
 299
 300                                 /* -------------------------------------------------------------- */
 301                                 /* solve the block system */
 302                                 /* -------------------------------------------------------------- */
 303
 304                                 if (nk == 1)
 305                                 {
 306                                         {
 307                                                 s = Udiag [k1] ;
 308                                         }
 309                                         switch (nr)
 310                                         {
 311
 312                                                 case 1:
 313                                                         //DIV (X [k1], X [k1], s) ;
 314                                                         X [k1] = X [k1] / s ;
 315                                                         break ;
 316
 317                                                 case 2:
 318                                                         //DIV (X [2*k1], X [2*k1], s) ;
 319                                                         X [2*k1] = X [2*k1] / s ;
 320                                                         //DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
 321                                                         X [2*k1 + 1] = X [2*k1 + 1] / s ;
 322                                                         break ;
 323
 324                                                 case 3:
 325                                                         //DIV (X [3*k1], X [3*k1], s) ;
 326                                                         X [3*k1] = X [3*k1] / s ;
 327                                                         //DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
 328                                                         X [3*k1 + 1] = X [3*k1 + 1] / s ;
 329                                                         //DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
 330                                                         X [3*k1 + 2] = X [3*k1 + 2] / s ;
 331                                                         break ;
 332
 333                                                 case 4:
 334                                                         //DIV (X [4*k1], X [4*k1], s) ;
 335                                                         X [4*k1] = X [4*k1] / s ;
 336                                                         //DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
 337                                                         X [4*k1 + 1] = X [4*k1 + 1] / s ;
 338                                                         //DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
 339                                                         X [4*k1 + 2] = X [4*k1 + 2] / s ;
 340                                                         //DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
 341                                                         X [4*k1 + 3] = X [4*k1 + 3] / s ;
 342                                                         break ;
 343
 344                                         }
 345                                 }
 346                                 else
 347                                 {
 348                                         klu_utsolve (nk, Uip, k1, Ulen, k1, LUbx [block],
 349                                                         Udiag, k1, nr, X, nr*k1) ;
 350                                         klu_ltsolve (nk, Lip, k1, Llen, k1, LUbx [block], nr,
 351                                                         X, nr*k1) ;
 352                                 }
 353                         }
 354
 355                         /* ------------------------------------------------------------------ */
 356                         /* scale and permute the result, Bz  = P'(R\X) */
 357                         /* ------------------------------------------------------------------ */
 358
 359                         if (Rs == null)
 360                         {
 361
 362                                 /* no scaling */
 363                                 switch (nr)
 364                                 {
 365
 366                                         case 1:
 367
 368                                                 for (k = 0 ; k < n ; k++)
 369                                                 {
 370                                                         Bz  [B_offset + Pnum [k]] = X [k] ;
 371                                                 }
 372                                                 break ;
 373
 374                                         case 2:
 375
 376                                                 for (k = 0 ; k < n ; k++)
 377                                                 {
 378                                                         i = Pnum [k] ;
 379                                                         Bz  [B_offset + i      ] = X [2*k    ] ;
 380                                                         Bz  [B_offset + i + d  ] = X [2*k + 1] ;
 381                                                 }
 382                                                 break ;
 383
 384                                         case 3:
 385
 386                                                 for (k = 0 ; k < n ; k++)
 387                                                 {
 388                                                         i = Pnum [k] ;
 389                                                         Bz  [B_offset + i      ] = X [3*k    ] ;
 390                                                         Bz  [B_offset + i + d  ] = X [3*k + 1] ;
 391                                                         Bz  [B_offset + i + d*2] = X [3*k + 2] ;
 392                                                 }
 393                                                 break ;
 394
 395                                         case 4:
 396
 397                                                 for (k = 0 ; k < n ; k++)
 398                                                 {
 399                                                         i = Pnum [k] ;
 400                                                         Bz  [B_offset + i      ] = X [4*k    ] ;
 401                                                         Bz  [B_offset + i + d  ] = X [4*k + 1] ;
 402                                                         Bz  [B_offset + i + d*2] = X [4*k + 2] ;
 403                                                         Bz  [B_offset + i + d*3] = X [4*k + 3] ;
 404                                                 }
 405                                                 break ;
 406                                 }
 407
 408                         }
 409                         else
 410                         {
 411
 412                                 switch (nr)
 413                                 {
 414
 415                                         case 1:
 416
 417                                                 for (k = 0 ; k < n ; k++)
 418                                                 {
 419                                                         //SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
 420                                                         Bz [B_offset + Pnum [k]] = X [k] / Rs [k] ;
 421                                                 }
 422                                                 break ;
 423
 424                                         case 2:
 425
 426                                                 for (k = 0 ; k < n ; k++)
 427                                                 {
 428                                                         i = Pnum [k] ;
 429                                                         rs = Rs [k] ;
 430                                                         //SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
 431                                                         Bz [B_offset + i] = X [2*k] / rs ;
 432                                                         //SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
 433                                                         Bz [B_offset + i + d] = X [2*k + 1] / rs ;
 434                                                 }
 435                                                 break ;
 436
 437                                         case 3:
 438
 439                                                 for (k = 0 ; k < n ; k++)
 440                                                 {
 441                                                         i = Pnum [k] ;
 442                                                         rs = Rs [k] ;
 443                                                         //SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
 444                                                         Bz [B_offset + i] = X [3*k] / rs ;
 445                                                         //SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
 446                                                         Bz [B_offset + i + d] = X [3*k + 1] / rs ;
 447                                                         //SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
 448                                                         Bz [B_offset + i + d*2] = X [3*k + 2] / rs ;
 449                                                 }
 450                                                 break ;
 451
 452                                         case 4:
 453
 454                                                 for (k = 0 ; k < n ; k++)
 455                                                 {
 456                                                         i = Pnum [k] ;
 457                                                         rs = Rs [k] ;
 458                                                         //SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
 459                                                         Bz [B_offset + i] = X [4*k] / rs ;
 460                                                         //SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
 461                                                         Bz [B_offset + i + d] = X [4*k + 1] / rs ;
 462                                                         //SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
 463                                                         Bz [B_offset + i + d*2] = X [4*k + 2] / rs ;
 464                                                         //SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
 465                                                         Bz [B_offset + i + d*3] = X [4*k + 3] / rs ;
 466                                                 }
 467                                                 break ;
 468                                 }
 469                         }
 470
 471                         /* ------------------------------------------------------------------ */
 472                         /* go to the next chunk of B */
 473                         /* ------------------------------------------------------------------ */
 474
 475                         B_offset += d*4 ;
 476                 }
 477                 return (TRUE) ;
 478         }
 479
 480 }