src/Math/newuoa.hh

   1 /*
   2   This is NEWUOA for unconstrained minimization. The codes were written
   3   by Powell in Fortran and then translated to C with f2c. I further
   4   modified the code to make it independent of libf2c and f2c.h. Please
   5   refer to "The NEWUOA software for unconstrained optimization without
   6   derivatives", which is available at www.damtp.cam.ac.uk, for more
   7   information.
   8  */
   9 /*
  10   The original fortran codes are distributed without restrictions. The
  11   C++ codes are distributed under MIT license.
  12  */
  13 /* The MIT License
  14
  15    Copyright (c) 2004, by M.J.D. Powell <mjdp@cam.ac.uk>
  16                  2008, by Attractive Chaos <attractivechaos@aol.co.uk>
  17
  18    Permission is hereby granted, free of charge, to any person obtaining
  19    a copy of this software and associated documentation files (the
  20    "Software"), to deal in the Software without restriction, including
  21    without limitation the rights to use, copy, modify, merge, publish,
  22    distribute, sublicense, and/or sell copies of the Software, and to
  23    permit persons to whom the Software is furnished to do so, subject to
  24    the following conditions:
  25
  26    The above copyright notice and this permission notice shall be
  27    included in all copies or substantial portions of the Software.
  28
  29    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  30    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  31    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  32    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  33    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  34    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  35    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  36    SOFTWARE.
  37 */
  38
  39 /*
  40   XCSoar notes:
  41   - this has not been tested with fixed math type
  42   - error conditions are not checked (printf disabled)
  43 */
  44
  45 #ifndef AC_NEWUOA_HH_
  46 #define AC_NEWUOA_HH_
  47 #include <math.h>
  48 #include <algorithm>
  49
  50 template<class TYPE, class Func>
  51 TYPE min_newuoa(int n, TYPE *x, Func &func, TYPE r_start=1e7, TYPE tol=1e-8, int max_iter=5000);
  52
  53 template<class TYPE, class Func>
  54 static int biglag_(int n, int npt, TYPE *xopt, TYPE *xpt, TYPE *bmat, TYPE *zmat, int *idz,
  55                                    int *ndim, int *knew, TYPE *delta, TYPE *d__, TYPE *alpha, TYPE *hcol, TYPE *gc,
  56                                    TYPE *gd, TYPE *s, TYPE *w, Func &func)
  57 {
  58         /* N is the number of variables. NPT is the number of interpolation
  59          * equations. XOPT is the best interpolation point so far. XPT
  60          * contains the coordinates of the current interpolation
  61          * points. BMAT provides the last N columns of H.  ZMAT and IDZ give
  62          * a factorization of the first NPT by NPT submatrix of H. NDIM is
  63          * the first dimension of BMAT and has the value NPT+N.  KNEW is the
  64          * index of the interpolation point that is going to be moved. DEBLLTA
  65          * is the current trust region bound. D will be set to the step from
  66          * XOPT to the new point. ABLLPHA will be set to the KNEW-th diagonal
  67          * element of the H matrix. HCOBLL, GC, GD, S and W will be used for
  68          * working space. */
  69         /* The step D is calculated in a way that attempts to maximize the
  70          * modulus of BLLFUNC(XOPT+D), subject to the bound ||D|| .BLLE. DEBLLTA,
  71          * where BLLFUNC is the KNEW-th BLLagrange function. */
  72
  73         int xpt_dim1, xpt_offset, bmat_dim1, bmat_offset, zmat_dim1, zmat_offset,
  74                 i__1, i__2, i__, j, k, iu, nptm, iterc, isave;
  75         TYPE sp, ss, cf1, cf2, cf3, cf4, cf5, dhd, cth, tau, sth, sum, temp, step,
  76                 angle, scale, denom, delsq, tempa, tempb, twopi, taubeg, tauold, taumax,
  77                 d__1, dd, gg;
  78
  79         /* Parameter adjustments */
  80         tempa = tempb = 0.0;
  81         zmat_dim1 = npt;
  82         zmat_offset = 1 + zmat_dim1;
  83         zmat -= zmat_offset;
  84         xpt_dim1 = npt;
  85         xpt_offset = 1 + xpt_dim1;
  86         xpt -= xpt_offset;
  87         --xopt;
  88         bmat_dim1 = *ndim;
  89         bmat_offset = 1 + bmat_dim1;
  90         bmat -= bmat_offset;
  91         --d__; --hcol; --gc; --gd; --s; --w;
  92         /* Function Body */
  93         twopi = 2.0 * M_PI;
  94         delsq = *delta * *delta;
  95         nptm = npt - n - 1;
  96         /* Set the first NPT components of HCOBLL to the leading elements of
  97          * the KNEW-th column of H. */
  98         iterc = 0;
  99         i__1 = npt;
 100         for (k = 1; k <= i__1; ++k) hcol[k] = 0;
 101         i__1 = nptm;
 102         for (j = 1; j <= i__1; ++j) {
 103                 temp = zmat[*knew + j * zmat_dim1];
 104                 if (j < *idz) temp = -temp;
 105                 i__2 = npt;
 106                 for (k = 1; k <= i__2; ++k)
 107                         hcol[k] += temp * zmat[k + j * zmat_dim1];
 108         }
 109         *alpha = hcol[*knew];
 110         /* Set the unscaled initial direction D. Form the gradient of BLLFUNC
 111          * atXOPT, and multiply D by the second derivative matrix of
 112          * BLLFUNC. */
 113         dd = 0;
 114         i__2 = n;
 115         for (i__ = 1; i__ <= i__2; ++i__) {
 116                 d__[i__] = xpt[*knew + i__ * xpt_dim1] - xopt[i__];
 117                 gc[i__] = bmat[*knew + i__ * bmat_dim1];
 118                 gd[i__] = 0;
 119                 /* Computing 2nd power */
 120                 d__1 = d__[i__];
 121                 dd += d__1 * d__1;
 122         }
 123         i__2 = npt;
 124         for (k = 1; k <= i__2; ++k) {
 125                 temp = 0;
 126                 sum = 0;
 127                 i__1 = n;
 128                 for (j = 1; j <= i__1; ++j) {
 129                         temp += xpt[k + j * xpt_dim1] * xopt[j];
 130                         sum += xpt[k + j * xpt_dim1] * d__[j];
 131                 }
 132                 temp = hcol[k] * temp;
 133                 sum = hcol[k] * sum;
 134                 i__1 = n;
 135                 for (i__ = 1; i__ <= i__1; ++i__) {
 136                         gc[i__] += temp * xpt[k + i__ * xpt_dim1];
 137                         gd[i__] += sum * xpt[k + i__ * xpt_dim1];
 138                 }
 139         }
 140         /* Scale D and GD, with a sign change if required. Set S to another
 141          * vector in the initial two dimensional subspace. */
 142         gg = sp = dhd = 0;
 143         i__1 = n;
 144         for (i__ = 1; i__ <= i__1; ++i__) {
 145                 /* Computing 2nd power */
 146                 d__1 = gc[i__];
 147                 gg += d__1 * d__1;
 148                 sp += d__[i__] * gc[i__];
 149                 dhd += d__[i__] * gd[i__];
 150         }
 151         scale = *delta / sqrt(dd);
 152         if (sp * dhd < 0) scale = -scale;
 153         temp = 0;
 154         if (sp * sp > dd * .99 * gg) temp = 1.0;
 155         tau = scale * (fabs(sp) + 0.5 * scale * fabs(dhd));
 156         if (gg * delsq < tau * .01 * tau) temp = 1.0;
 157         i__1 = n;
 158         for (i__ = 1; i__ <= i__1; ++i__) {
 159                 d__[i__] = scale * d__[i__];
 160                 gd[i__] = scale * gd[i__];
 161                 s[i__] = gc[i__] + temp * gd[i__];
 162         }
 163         /* Begin the iteration by overwriting S with a vector that has the
 164          * required length and direction, except that termination occurs if
 165          * the given D and S are nearly parallel. */
 166         for (iterc = 0; iterc != n; ++iterc) {
 167                 dd = sp = ss = 0;
 168                 i__1 = n;
 169                 for (i__ = 1; i__ <= i__1; ++i__) {
 170                         /* Computing 2nd power */
 171                         d__1 = d__[i__];
 172                         dd += d__1 * d__1;
 173                         sp += d__[i__] * s[i__];
 174                         /* Computing 2nd power */
 175                         d__1 = s[i__];
 176                         ss += d__1 * d__1;
 177                 }
 178                 temp = dd * ss - sp * sp;
 179                 if (temp <= dd * 1e-8 * ss) return 0;
 180                 denom = sqrt(temp);
 181                 i__1 = n;
 182                 for (i__ = 1; i__ <= i__1; ++i__) {
 183                         s[i__] = (dd * s[i__] - sp * d__[i__]) / denom;
 184                         w[i__] = 0;
 185                 }
 186                 /* Calculate the coefficients of the objective function on the
 187                  * circle, beginning with the multiplication of S by the second
 188                  * derivative matrix. */
 189                 i__1 = npt;
 190                 for (k = 1; k <= i__1; ++k) {
 191                         sum = 0;
 192                         i__2 = n;
 193                         for (j = 1; j <= i__2; ++j)
 194                                 sum += xpt[k + j * xpt_dim1] * s[j];
 195                         sum = hcol[k] * sum;
 196                         i__2 = n;
 197                         for (i__ = 1; i__ <= i__2; ++i__)
 198                                 w[i__] += sum * xpt[k + i__ * xpt_dim1];
 199                 }
 200                 cf1 = cf2 = cf3 = cf4 = cf5 = 0;
 201                 i__2 = n;
 202                 for (i__ = 1; i__ <= i__2; ++i__) {
 203                         cf1 += s[i__] * w[i__];
 204                         cf2 += d__[i__] * gc[i__];
 205                         cf3 += s[i__] * gc[i__];
 206                         cf4 += d__[i__] * gd[i__];
 207                         cf5 += s[i__] * gd[i__];
 208                 }
 209                 cf1 = 0.5 * cf1;
 210                 cf4 = 0.5 * cf4 - cf1;
 211                 /* Seek the value of the angle that maximizes the modulus of TAU. */
 212                 taubeg = cf1 + cf2 + cf4;
 213                 taumax = tauold = taubeg;
 214                 isave = 0;
 215                 iu = 49;
 216                 temp = twopi / (TYPE) (iu + 1);
 217                 i__2 = iu;
 218                 for (i__ = 1; i__ <= i__2; ++i__) {
 219                         angle = (TYPE) i__ *temp;
 220                         cth = cos(angle);
 221                         sth = sin(angle);
 222                         tau = cf1 + (cf2 + cf4 * cth) * cth + (cf3 + cf5 * cth) * sth;
 223                         if (fabs(tau) > fabs(taumax)) {
 224                                 taumax = tau;
 225                                 isave = i__;
 226                                 tempa = tauold;
 227                         } else if (i__ == isave + 1) tempb = tau;
 228                         tauold = tau;
 229                 }
 230                 if (isave == 0) tempa = tau;
 231                 if (isave == iu) tempb = taubeg;
 232                 step = 0;
 233                 if (tempa != tempb) {
 234                         tempa -= taumax;
 235                         tempb -= taumax;
 236                         step = 0.5 * (tempa - tempb) / (tempa + tempb);
 237                 }
 238                 angle = temp * ((TYPE) isave + step);
 239                 /* Calculate the new D and GD. Then test for convergence. */
 240                 cth = cos(angle);
 241                 sth = sin(angle);
 242                 tau = cf1 + (cf2 + cf4 * cth) * cth + (cf3 + cf5 * cth) * sth;
 243                 i__2 = n;
 244                 for (i__ = 1; i__ <= i__2; ++i__) {
 245                         d__[i__] = cth * d__[i__] + sth * s[i__];
 246                         gd[i__] = cth * gd[i__] + sth * w[i__];
 247                         s[i__] = gc[i__] + gd[i__];
 248                 }
 249                 if (fabs(tau) <= fabs(taubeg) * 1.1) return 0;
 250         }
 251         return 0;
 252 }
 253
 254 template<class TYPE>
 255 static int bigden_(int n, int npt, TYPE *xopt, TYPE *xpt, TYPE *bmat, TYPE *zmat, int *idz,
 256                                    int *ndim, int *kopt, int *knew, TYPE *d__, TYPE *w, TYPE *vlag, TYPE *beta,
 257                                    TYPE *s, TYPE *wvec, TYPE *prod)
 258 {
 259         /* N is the number of variables.
 260          * NPT is the number of interpolation equations.
 261          * XOPT is the best interpolation point so far.
 262          * XPT contains the coordinates of the current interpolation points.
 263          * BMAT provides the last N columns of H.
 264          * ZMAT and IDZ give a factorization of the first NPT by NPT
 265            submatrix of H.
 266          * NDIM is the first dimension of BMAT and has the value NPT+N.
 267          * KOPT is the index of the optimal interpolation point.
 268          * KNEW is the index of the interpolation point that is going to be
 269            moved.
 270          * D will be set to the step from XOPT to the new point, and on
 271            entry it should be the D that was calculated by the last call of
 272            BIGBDLAG. The length of the initial D provides a trust region bound
 273            on the final D.
 274          * W will be set to Wcheck for the final choice of D.
 275          * VBDLAG will be set to Theta*Wcheck+e_b for the final choice of D.
 276          * BETA will be set to the value that will occur in the updating
 277            formula when the KNEW-th interpolation point is moved to its new
 278            position.
 279          * S, WVEC, PROD and the private arrays DEN, DENEX and PAR will be
 280            used for working space.
 281          * D is calculated in a way that should provide a denominator with a
 282            large modulus in the updating formula when the KNEW-th
 283            interpolation point is shifted to the new position XOPT+D. */
 284
 285         int xpt_dim1, xpt_offset, bmat_dim1, bmat_offset, zmat_dim1, zmat_offset,
 286                 wvec_dim1, wvec_offset, prod_dim1, prod_offset, i__1, i__2, i__, j, k,
 287                 isave, iterc, jc, ip, iu, nw, ksav, nptm;
 288         TYPE dd, d__1, ds, ss, den[9], par[9], tau, sum, diff, temp, step,
 289                 alpha, angle, denex[9], tempa, tempb, tempc, ssden, dtest, xoptd,
 290                 twopi, xopts, denold, denmax, densav, dstemp, sumold, sstemp, xoptsq;
 291
 292         /* Parameter adjustments */
 293         zmat_dim1 = npt;
 294         zmat_offset = 1 + zmat_dim1;
 295         zmat -= zmat_offset;
 296         xpt_dim1 = npt;
 297         xpt_offset = 1 + xpt_dim1;
 298         xpt -= xpt_offset;
 299         --xopt;
 300         prod_dim1 = *ndim;
 301         prod_offset = 1 + prod_dim1;
 302         prod -= prod_offset;
 303         wvec_dim1 = *ndim;
 304         wvec_offset = 1 + wvec_dim1;
 305         wvec -= wvec_offset;
 306         bmat_dim1 = *ndim;
 307         bmat_offset = 1 + bmat_dim1;
 308         bmat -= bmat_offset;
 309         --d__; --w; --vlag; --s;
 310         /* Function Body */
 311         twopi = atan(1.0) * 8.;
 312         nptm = npt - n - 1;
 313         /* Store the first NPT elements of the KNEW-th column of H in W(N+1)
 314          * to W(N+NPT). */
 315         i__1 = npt;
 316         for (k = 1; k <= i__1; ++k) w[n + k] = 0;
 317         i__1 = nptm;
 318         for (j = 1; j <= i__1; ++j) {
 319                 temp = zmat[*knew + j * zmat_dim1];
 320                 if (j < *idz) temp = -temp;
 321                 i__2 = npt;
 322                 for (k = 1; k <= i__2; ++k)
 323                         w[n + k] += temp * zmat[k + j * zmat_dim1];
 324         }
 325         alpha = w[n + *knew];
 326         /* The initial search direction D is taken from the last call of
 327          * BIGBDLAG, and the initial S is set below, usually to the direction
 328          * from X_OPT to X_KNEW, but a different direction to an
 329          * interpolation point may be chosen, in order to prevent S from
 330          * being nearly parallel to D. */
 331         dd = ds = ss = xoptsq = 0;
 332         i__2 = n;
 333         for (i__ = 1; i__ <= i__2; ++i__) {
 334                 /* Computing 2nd power */
 335                 d__1 = d__[i__];
 336                 dd += d__1 * d__1;
 337                 s[i__] = xpt[*knew + i__ * xpt_dim1] - xopt[i__];
 338                 ds += d__[i__] * s[i__];
 339                 /* Computing 2nd power */
 340                 d__1 = s[i__];
 341                 ss += d__1 * d__1;
 342                 /* Computing 2nd power */
 343                 d__1 = xopt[i__];
 344                 xoptsq += d__1 * d__1;
 345         }
 346         if (ds * ds > dd * .99 * ss) {
 347                 ksav = *knew;
 348                 dtest = ds * ds / ss;
 349                 i__2 = npt;
 350                 for (k = 1; k <= i__2; ++k) {
 351                         if (k != *kopt) {
 352                                 dstemp = 0;
 353                                 sstemp = 0;
 354                                 i__1 = n;
 355                                 for (i__ = 1; i__ <= i__1; ++i__) {
 356                                         diff = xpt[k + i__ * xpt_dim1] - xopt[i__];
 357                                         dstemp += d__[i__] * diff;
 358                                         sstemp += diff * diff;
 359                                 }
 360                                 if (dstemp * dstemp / sstemp < dtest) {
 361                                         ksav = k;
 362                                         dtest = dstemp * dstemp / sstemp;
 363                                         ds = dstemp;
 364                                         ss = sstemp;
 365                                 }
 366                         }
 367                 }
 368                 i__2 = n;
 369                 for (i__ = 1; i__ <= i__2; ++i__)
 370                         s[i__] = xpt[ksav + i__ * xpt_dim1] - xopt[i__];
 371         }
 372         ssden = dd * ss - ds * ds;
 373         iterc = 0;
 374         densav = 0;
 375         /* Begin the iteration by overwriting S with a vector that has the
 376          * required length and direction. */
 377 BDL70:
 378         ++iterc;
 379         temp = 1.0 / sqrt(ssden);
 380         xoptd = xopts = 0;
 381         i__2 = n;
 382         for (i__ = 1; i__ <= i__2; ++i__) {
 383                 s[i__] = temp * (dd * s[i__] - ds * d__[i__]);
 384                 xoptd += xopt[i__] * d__[i__];
 385                 xopts += xopt[i__] * s[i__];
 386         }
 387         /* Set the coefficients of the first 2.0 terms of BETA. */
 388         tempa = 0.5 * xoptd * xoptd;
 389         tempb = 0.5 * xopts * xopts;
 390         den[0] = dd * (xoptsq + 0.5 * dd) + tempa + tempb;
 391         den[1] = 2.0 * xoptd * dd;
 392         den[2] = 2.0 * xopts * dd;
 393         den[3] = tempa - tempb;
 394         den[4] = xoptd * xopts;
 395         for (i__ = 6; i__ <= 9; ++i__) den[i__ - 1] = 0;
 396         /* Put the coefficients of Wcheck in WVEC. */
 397         i__2 = npt;
 398         for (k = 1; k <= i__2; ++k) {
 399                 tempa = tempb = tempc = 0;
 400                 i__1 = n;
 401                 for (i__ = 1; i__ <= i__1; ++i__) {
 402                         tempa += xpt[k + i__ * xpt_dim1] * d__[i__];
 403                         tempb += xpt[k + i__ * xpt_dim1] * s[i__];
 404                         tempc += xpt[k + i__ * xpt_dim1] * xopt[i__];
 405                 }
 406                 wvec[k + wvec_dim1] = 0.25 * (tempa * tempa + tempb * tempb);
 407                 wvec[k + (wvec_dim1 << 1)] = tempa * tempc;
 408                 wvec[k + wvec_dim1 * 3] = tempb * tempc;
 409                 wvec[k + (wvec_dim1 << 2)] = 0.25 * (tempa * tempa - tempb * tempb);
 410                 wvec[k + wvec_dim1 * 5] = 0.5 * tempa * tempb;
 411         }
 412         i__2 = n;
 413         for (i__ = 1; i__ <= i__2; ++i__) {
 414                 ip = i__ + npt;
 415                 wvec[ip + wvec_dim1] = 0;
 416                 wvec[ip + (wvec_dim1 << 1)] = d__[i__];
 417                 wvec[ip + wvec_dim1 * 3] = s[i__];
 418                 wvec[ip + (wvec_dim1 << 2)] = 0;
 419                 wvec[ip + wvec_dim1 * 5] = 0;
 420         }
 421         /* Put the coefficents of THETA*Wcheck in PROD. */
 422         for (jc = 1; jc <= 5; ++jc) {
 423                 nw = npt;
 424                 if (jc == 2 || jc == 3) nw = *ndim;
 425                 i__2 = npt;
 426                 for (k = 1; k <= i__2; ++k) prod[k + jc * prod_dim1] = 0;
 427                 i__2 = nptm;
 428                 for (j = 1; j <= i__2; ++j) {
 429                         sum = 0;
 430                         i__1 = npt;
 431                         for (k = 1; k <= i__1; ++k) sum += zmat[k + j * zmat_dim1] * wvec[k + jc * wvec_dim1];
 432                         if (j < *idz) sum = -sum;
 433                         i__1 = npt;
 434                         for (k = 1; k <= i__1; ++k)
 435                                 prod[k + jc * prod_dim1] += sum * zmat[k + j * zmat_dim1];
 436                 }
 437                 if (nw == *ndim) {
 438                         i__1 = npt;
 439                         for (k = 1; k <= i__1; ++k) {
 440                                 sum = 0;
 441                                 i__2 = n;
 442                                 for (j = 1; j <= i__2; ++j)
 443                                         sum += bmat[k + j * bmat_dim1] * wvec[npt + j + jc * wvec_dim1];
 444                                 prod[k + jc * prod_dim1] += sum;
 445                         }
 446                 }
 447                 i__1 = n;
 448                 for (j = 1; j <= i__1; ++j) {
 449                         sum = 0;
 450                         i__2 = nw;
 451                         for (i__ = 1; i__ <= i__2; ++i__)
 452                                 sum += bmat[i__ + j * bmat_dim1] * wvec[i__ + jc * wvec_dim1];
 453                         prod[npt + j + jc * prod_dim1] = sum;
 454                 }
 455         }
 456         /* Include in DEN the part of BETA that depends on THETA. */
 457         i__1 = *ndim;
 458         for (k = 1; k <= i__1; ++k) {
 459                 sum = 0;
 460                 for (i__ = 1; i__ <= 5; ++i__) {
 461                         par[i__ - 1] = 0.5 * prod[k + i__ * prod_dim1] * wvec[k + i__ * wvec_dim1];
 462                         sum += par[i__ - 1];
 463                 }
 464                 den[0] = den[0] - par[0] - sum;
 465                 tempa = prod[k + prod_dim1] * wvec[k + (wvec_dim1 << 1)] + prod[k + (
 466                                      prod_dim1 << 1)] * wvec[k + wvec_dim1];
 467                 tempb = prod[k + (prod_dim1 << 1)] * wvec[k + (wvec_dim1 << 2)] +
 468                         prod[k + (prod_dim1 << 2)] * wvec[k + (wvec_dim1 << 1)];
 469                 tempc = prod[k + prod_dim1 * 3] * wvec[k + wvec_dim1 * 5] + prod[k +
 470                                    prod_dim1 * 5] * wvec[k + wvec_dim1 * 3];
 471                 den[1] = den[1] - tempa - 0.5 * (tempb + tempc);
 472                 den[5] -= 0.5 * (tempb - tempc);
 473                 tempa = prod[k + prod_dim1] * wvec[k + wvec_dim1 * 3] + prod[k +
 474                                        prod_dim1 * 3] * wvec[k + wvec_dim1];
 475                 tempb = prod[k + (prod_dim1 << 1)] * wvec[k + wvec_dim1 * 5] + prod[k
 476                               + prod_dim1 * 5] * wvec[k + (wvec_dim1 << 1)];
 477                 tempc = prod[k + prod_dim1 * 3] * wvec[k + (wvec_dim1 << 2)] + prod[k
 478                               + (prod_dim1 << 2)] * wvec[k + wvec_dim1 * 3];
 479                 den[2] = den[2] - tempa - 0.5 * (tempb - tempc);
 480                 den[6] -= 0.5 * (tempb + tempc);
 481                 tempa = prod[k + prod_dim1] * wvec[k + (wvec_dim1 << 2)] + prod[k + (
 482                                      prod_dim1 << 2)] * wvec[k + wvec_dim1];
 483                 den[3] = den[3] - tempa - par[1] + par[2];
 484                 tempa = prod[k + prod_dim1] * wvec[k + wvec_dim1 * 5] + prod[k +
 485                                        prod_dim1 * 5] * wvec[k + wvec_dim1];
 486                 tempb = prod[k + (prod_dim1 << 1)] * wvec[k + wvec_dim1 * 3] + prod[k
 487                               + prod_dim1 * 3] * wvec[k + (wvec_dim1 << 1)];
 488                 den[4] = den[4] - tempa - 0.5 * tempb;
 489                 den[7] = den[7] - par[3] + par[4];
 490                 tempa = prod[k + (prod_dim1 << 2)] * wvec[k + wvec_dim1 * 5] + prod[k
 491                               + prod_dim1 * 5] * wvec[k + (wvec_dim1 << 2)];
 492                 den[8] -= 0.5 * tempa;
 493         }
 494         /* Extend DEN so that it holds all the coefficients of DENOM. */
 495         sum = 0;
 496         for (i__ = 1; i__ <= 5; ++i__) {
 497                 /* Computing 2nd power */
 498                 d__1 = prod[*knew + i__ * prod_dim1];
 499                 par[i__ - 1] = 0.5 * (d__1 * d__1);
 500                 sum += par[i__ - 1];
 501         }
 502         denex[0] = alpha * den[0] + par[0] + sum;
 503         tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + (prod_dim1 << 1)];
 504         tempb = prod[*knew + (prod_dim1 << 1)] * prod[*knew + (prod_dim1 << 2)];
 505         tempc = prod[*knew + prod_dim1 * 3] * prod[*knew + prod_dim1 * 5];
 506         denex[1] = alpha * den[1] + tempa + tempb + tempc;
 507         denex[5] = alpha * den[5] + tempb - tempc;
 508         tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + prod_dim1 * 3];
 509         tempb = prod[*knew + (prod_dim1 << 1)] * prod[*knew + prod_dim1 * 5];
 510         tempc = prod[*knew + prod_dim1 * 3] * prod[*knew + (prod_dim1 << 2)];
 511         denex[2] = alpha * den[2] + tempa + tempb - tempc;
 512         denex[6] = alpha * den[6] + tempb + tempc;
 513         tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + (prod_dim1 << 2)];
 514         denex[3] = alpha * den[3] + tempa + par[1] - par[2];
 515         tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + prod_dim1 * 5];
 516         denex[4] = alpha * den[4] + tempa + prod[*knew + (prod_dim1 << 1)] * prod[
 517                                                      *knew + prod_dim1 * 3];
 518         denex[7] = alpha * den[7] + par[3] - par[4];
 519         denex[8] = alpha * den[8] + prod[*knew + (prod_dim1 << 2)] * prod[*knew +
 520                                                              prod_dim1 * 5];
 521         /* Seek the value of the angle that maximizes the modulus of DENOM. */
 522         sum = denex[0] + denex[1] + denex[3] + denex[5] + denex[7];
 523         denold = denmax = sum;
 524         isave = 0;
 525         iu = 49;
 526         temp = twopi / (TYPE) (iu + 1);
 527         par[0] = 1.0;
 528         i__1 = iu;
 529         for (i__ = 1; i__ <= i__1; ++i__) {
 530                 angle = (TYPE) i__ *temp;
 531                 par[1] = cos(angle);
 532                 par[2] = sin(angle);
 533                 for (j = 4; j <= 8; j += 2) {
 534                         par[j - 1] = par[1] * par[j - 3] - par[2] * par[j - 2];
 535                         par[j] = par[1] * par[j - 2] + par[2] * par[j - 3];
 536                 }
 537                 sumold = sum;
 538                 sum = 0;
 539                 for (j = 1; j <= 9; ++j)
 540                         sum += denex[j - 1] * par[j - 1];
 541                 if (fabs(sum) > fabs(denmax)) {
 542                         denmax = sum;
 543                         isave = i__;
 544                         tempa = sumold;
 545                 } else if (i__ == isave + 1) {
 546                         tempb = sum;
 547                 }
 548         }
 549         if (isave == 0) tempa = sum;
 550         if (isave == iu) tempb = denold;
 551         step = 0;
 552         if (tempa != tempb) {
 553                 tempa -= denmax;
 554                 tempb -= denmax;
 555                 step = 0.5 * (tempa - tempb) / (tempa + tempb);
 556         }
 557         angle = temp * ((TYPE) isave + step);
 558         /* Calculate the new parameters of the denominator, the new VBDLAG
 559          * vector and the new D. Then test for convergence. */
 560         par[1] = cos(angle);
 561         par[2] = sin(angle);
 562         for (j = 4; j <= 8; j += 2) {
 563                 par[j - 1] = par[1] * par[j - 3] - par[2] * par[j - 2];
 564                 par[j] = par[1] * par[j - 2] + par[2] * par[j - 3];
 565         }
 566         *beta = 0;
 567         denmax = 0;
 568         for (j = 1; j <= 9; ++j) {
 569                 *beta += den[j - 1] * par[j - 1];
 570                 denmax += denex[j - 1] * par[j - 1];
 571         }
 572         i__1 = *ndim;
 573         for (k = 1; k <= i__1; ++k) {
 574                 vlag[k] = 0;
 575                 for (j = 1; j <= 5; ++j)
 576                         vlag[k] += prod[k + j * prod_dim1] * par[j - 1];
 577         }
 578         tau = vlag[*knew];
 579         dd = tempa = tempb = 0;
 580         i__1 = n;
 581         for (i__ = 1; i__ <= i__1; ++i__) {
 582                 d__[i__] = par[1] * d__[i__] + par[2] * s[i__];
 583                 w[i__] = xopt[i__] + d__[i__];
 584                 /* Computing 2nd power */
 585                 d__1 = d__[i__];
 586                 dd += d__1 * d__1;
 587                 tempa += d__[i__] * w[i__];
 588                 tempb += w[i__] * w[i__];
 589         }
 590         if (iterc >= n) goto BDL340;
 591         if (iterc > 1) densav = max(densav, denold);
 592         if (fabs(denmax) <= fabs(densav) * 1.1) goto BDL340;
 593         densav = denmax;
 594         /* Set S to 0.5 the gradient of the denominator with respect to
 595          * D. Then branch for the next iteration. */
 596         i__1 = n;
 597         for (i__ = 1; i__ <= i__1; ++i__) {
 598                 temp = tempa * xopt[i__] + tempb * d__[i__] - vlag[npt + i__];
 599                 s[i__] = tau * bmat[*knew + i__ * bmat_dim1] + alpha * temp;
 600         }
 601         i__1 = npt;
 602         for (k = 1; k <= i__1; ++k) {
 603                 sum = 0;
 604                 i__2 = n;
 605                 for (j = 1; j <= i__2; ++j)
 606                         sum += xpt[k + j * xpt_dim1] * w[j];
 607                 temp = (tau * w[n + k] - alpha * vlag[k]) * sum;
 608                 i__2 = n;
 609                 for (i__ = 1; i__ <= i__2; ++i__)
 610                         s[i__] += temp * xpt[k + i__ * xpt_dim1];
 611         }
 612         ss = 0;
 613         ds = 0;
 614         i__2 = n;
 615         for (i__ = 1; i__ <= i__2; ++i__) {
 616                 /* Computing 2nd power */
 617                 d__1 = s[i__];
 618                 ss += d__1 * d__1;
 619                 ds += d__[i__] * s[i__];
 620         }
 621         ssden = dd * ss - ds * ds;
 622         if (ssden >= dd * 1e-8 * ss) goto BDL70;
 623         /* Set the vector W before the RETURN from the subroutine. */
 624 BDL340:
 625         i__2 = *ndim;
 626         for (k = 1; k <= i__2; ++k) {
 627                 w[k] = 0;
 628                 for (j = 1; j <= 5; ++j) w[k] += wvec[k + j * wvec_dim1] * par[j - 1];
 629         }
 630         vlag[*kopt] += 1.0;
 631         return 0;
 632 }
 633
 634 template<class TYPE>
 635 int trsapp_(int n, int npt, TYPE * xopt, TYPE * xpt, TYPE * gq, TYPE * hq, TYPE * pq,
 636                         TYPE * delta, TYPE * step, TYPE * d__, TYPE * g, TYPE * hd, TYPE * hs, TYPE * crvmin)
 637 {
 638         /* The arguments NPT, XOPT, XPT, GQ, HQ and PQ have their usual
 639          * meanings, in order to define the current quadratic model Q.
 640          * DETRLTA is the trust region radius, and has to be positive. STEP
 641          * will be set to the calculated trial step. The arrays D, G, HD and
 642          * HS will be used for working space. CRVMIN will be set to the
 643          * least curvature of H aint the conjugate directions that occur,
 644          * except that it is set to 0 if STEP goes all the way to the trust
 645          * region boundary. The calculation of STEP begins with the
 646          * truncated conjugate gradient method. If the boundary of the trust
 647          * region is reached, then further changes to STEP may be made, each
 648          * one being in the 2D space spanned by the current STEP and the
 649          * corresponding gradient of Q. Thus STEP should provide a
 650          * substantial reduction to Q within the trust region. */
 651
 652         int xpt_dim1, xpt_offset, i__1, i__2, i__, j, k, ih, iu, iterc,
 653                 isave, itersw, itermax;
 654         TYPE d__1, d__2, dd, cf, dg, gg, ds, sg, ss, dhd, dhs,
 655                 cth, sgk, shs, sth, qadd, qbeg, qred, qmin, temp,
 656                 qsav, qnew, ggbeg, alpha, angle, reduc, ggsav, delsq,
 657                 tempa, tempb, bstep, ratio, twopi, angtest;
 658
 659         /* Parameter adjustments */
 660         tempa = tempb = shs = sg = bstep = ggbeg = gg = qred = dd = 0.0;
 661         xpt_dim1 = npt;
 662         xpt_offset = 1 + xpt_dim1;
 663         xpt -= xpt_offset;
 664         --xopt; --gq; --hq; --pq; --step; --d__; --g; --hd; --hs;
 665         /* Function Body */
 666         twopi = 2.0 * M_PI;
 667         delsq = *delta * *delta;
 668         iterc = 0;
 669         itermax = n;
 670         itersw = itermax;
 671         i__1 = n;
 672         for (i__ = 1; i__ <= i__1; ++i__) d__[i__] = xopt[i__];
 673         goto TRL170;
 674         /* Prepare for the first line search. */
 675 TRL20:
 676         qred = dd = 0;
 677         i__1 = n;
 678         for (i__ = 1; i__ <= i__1; ++i__) {
 679                 step[i__] = 0;
 680                 hs[i__] = 0;
 681                 g[i__] = gq[i__] + hd[i__];
 682                 d__[i__] = -g[i__];
 683                 /* Computing 2nd power */
 684                 d__1 = d__[i__];
 685                 dd += d__1 * d__1;
 686         }
 687         *crvmin = 0;
 688         if (dd == 0) goto TRL160;
 689         ds = ss = 0;
 690         gg = dd;
 691         ggbeg = gg;
 692         /* Calculate the step to the trust region boundary and the product
 693          * HD. */
 694 TRL40:
 695         ++iterc;
 696         temp = delsq - ss;
 697         bstep = temp / (ds + sqrt(ds * ds + dd * temp));
 698         goto TRL170;
 699 TRL50:
 700         dhd = 0;
 701         i__1 = n;
 702         for (j = 1; j <= i__1; ++j) dhd += d__[j] * hd[j];
 703         /* Update CRVMIN and set the step-length ATRLPHA. */
 704         alpha = bstep;
 705         if (dhd > 0) {
 706                 temp = dhd / dd;
 707                 if (iterc == 1) *crvmin = temp;
 708                 *crvmin = min(*crvmin, temp);
 709                 /* Computing MIN */
 710                 d__1 = alpha, d__2 = gg / dhd;
 711                 alpha = min(d__1, d__2);
 712         }
 713         qadd = alpha * (gg - 0.5 * alpha * dhd);
 714         qred += qadd;
 715         /* Update STEP and HS. */
 716         ggsav = gg;
 717         gg = 0;
 718         i__1 = n;
 719         for (i__ = 1; i__ <= i__1; ++i__) {
 720                 step[i__] += alpha * d__[i__];
 721                 hs[i__] += alpha * hd[i__];
 722                 /* Computing 2nd power */
 723                 d__1 = g[i__] + hs[i__];
 724                 gg += d__1 * d__1;
 725         }
 726         /* Begin another conjugate direction iteration if required. */
 727         if (alpha < bstep) {
 728                 if (qadd <= qred * .01 || gg <= ggbeg * 1e-4 || iterc == itermax) goto TRL160;
 729                 temp = gg / ggsav;
 730                 dd = ds = ss = 0;
 731                 i__1 = n;
 732                 for (i__ = 1; i__ <= i__1; ++i__) {
 733                         d__[i__] = temp * d__[i__] - g[i__] - hs[i__];
 734                         /* Computing 2nd power */
 735                         d__1 = d__[i__];
 736                         dd += d__1 * d__1;
 737                         ds += d__[i__] * step[i__];
 738                         /* Computing 2nd power */
 739                         d__1 = step[i__];
 740                         ss += d__1 * d__1;
 741                 }
 742                 if (ds <= 0) goto TRL160;
 743                 if (ss < delsq) goto TRL40;
 744         }
 745         *crvmin = 0;
 746         itersw = iterc;
 747         /* Test whether an alternative iteration is required. */
 748 TRL90:
 749         if (gg <= ggbeg * 1e-4) goto TRL160;
 750         sg = 0;
 751         shs = 0;
 752         i__1 = n;
 753         for (i__ = 1; i__ <= i__1; ++i__) {
 754                 sg += step[i__] * g[i__];
 755                 shs += step[i__] * hs[i__];
 756         }
 757         sgk = sg + shs;
 758         angtest = sgk / sqrt(gg * delsq);
 759         if (angtest <= -.99) goto TRL160;
 760         /* Begin the alternative iteration by calculating D and HD and some
 761          * scalar products. */
 762         ++iterc;
 763         temp = sqrt(delsq * gg - sgk * sgk);
 764         tempa = delsq / temp;
 765         tempb = sgk / temp;
 766         i__1 = n;
 767         for (i__ = 1; i__ <= i__1; ++i__)
 768                 d__[i__] = tempa * (g[i__] + hs[i__]) - tempb * step[i__];
 769         goto TRL170;
 770 TRL120:
 771         dg = dhd = dhs = 0;
 772         i__1 = n;
 773         for (i__ = 1; i__ <= i__1; ++i__) {
 774                 dg += d__[i__] * g[i__];
 775                 dhd += hd[i__] * d__[i__];
 776                 dhs += hd[i__] * step[i__];
 777         }
 778         /* Seek the value of the angle that minimizes Q. */
 779         cf = 0.5 * (shs - dhd);
 780         qbeg = sg + cf;
 781         qsav = qmin = qbeg;
 782         isave = 0;
 783         iu = 49;
 784         temp = twopi / (TYPE) (iu + 1);
 785         i__1 = iu;
 786         for (i__ = 1; i__ <= i__1; ++i__) {
 787                 angle = (TYPE) i__ *temp;
 788                 cth = cos(angle);
 789                 sth = sin(angle);
 790                 qnew = (sg + cf * cth) * cth + (dg + dhs * cth) * sth;
 791                 if (qnew < qmin) {
 792                         qmin = qnew;
 793                         isave = i__;
 794                         tempa = qsav;
 795                 } else if (i__ == isave + 1) tempb = qnew;
 796                 qsav = qnew;
 797         }
 798         if ((TYPE) isave == 0) tempa = qnew;
 799         if (isave == iu) tempb = qbeg;
 800         angle = 0;
 801         if (tempa != tempb) {
 802                 tempa -= qmin;
 803                 tempb -= qmin;
 804                 angle = 0.5 * (tempa - tempb) / (tempa + tempb);
 805         }
 806         angle = temp * ((TYPE) isave + angle);
 807         /* Calculate the new STEP and HS. Then test for convergence. */
 808         cth = cos(angle);
 809         sth = sin(angle);
 810         reduc = qbeg - (sg + cf * cth) * cth - (dg + dhs * cth) * sth;
 811         gg = 0;
 812         i__1 = n;
 813         for (i__ = 1; i__ <= i__1; ++i__) {
 814                 step[i__] = cth * step[i__] + sth * d__[i__];
 815                 hs[i__] = cth * hs[i__] + sth * hd[i__];
 816                 /* Computing 2nd power */
 817                 d__1 = g[i__] + hs[i__];
 818                 gg += d__1 * d__1;
 819         }
 820         qred += reduc;
 821         ratio = reduc / qred;
 822         if (iterc < itermax && ratio > .01) goto TRL90;
 823 TRL160:
 824         return 0;
 825         /* The following instructions act as a subroutine for setting the
 826          * vector HD to the vector D multiplied by the second derivative
 827          * matrix of Q.  They are called from three different places, which
 828          * are distinguished by the value of ITERC. */
 829 TRL170:
 830         i__1 = n;
 831         for (i__ = 1; i__ <= i__1; ++i__) hd[i__] = 0;
 832         i__1 = npt;
 833         for (k = 1; k <= i__1; ++k) {
 834                 temp = 0;
 835                 i__2 = n;
 836                 for (j = 1; j <= i__2; ++j)
 837                         temp += xpt[k + j * xpt_dim1] * d__[j];
 838                 temp *= pq[k];
 839                 i__2 = n;
 840                 for (i__ = 1; i__ <= i__2; ++i__)
 841                         hd[i__] += temp * xpt[k + i__ * xpt_dim1];
 842         }
 843         ih = 0;
 844         i__2 = n;
 845         for (j = 1; j <= i__2; ++j) {
 846                 i__1 = j;
 847                 for (i__ = 1; i__ <= i__1; ++i__) {
 848                         ++ih;
 849                         if (i__ < j) hd[j] += hq[ih] * d__[i__];
 850                         hd[i__] += hq[ih] * d__[j];
 851                 }
 852         }
 853         if (iterc == 0) goto TRL20;
 854         if (iterc <= itersw) goto TRL50;
 855         goto TRL120;
 856 }
 857
 858 template<class TYPE>
 859 static int update_(int n, int npt, TYPE *bmat, TYPE *zmat, int *idz, int *ndim, TYPE *vlag,
 860                                    TYPE *beta, int *knew, TYPE *w)
 861 {
 862         /* The arrays BMAT and ZMAT with IDZ are updated, in order to shift
 863          * the interpolation point that has index KNEW. On entry, VLAG
 864          * contains the components of the vector Theta*Wcheck+e_b of the
 865          * updating formula (6.11), and BETA holds the value of the
 866          * parameter that has this name. The vector W is used for working
 867          * space. */
 868
 869         int bmat_dim1, bmat_offset, zmat_dim1, zmat_offset, i__1, i__2, i__,
 870                 j, ja, jb, jl, jp, nptm, iflag;
 871         TYPE d__1, d__2, tau, temp, scala, scalb, alpha, denom, tempa, tempb, tausq;
 872
 873         /* Parameter adjustments */
 874         tempb = 0.0;
 875         zmat_dim1 = npt;
 876         zmat_offset = 1 + zmat_dim1;
 877         zmat -= zmat_offset;
 878         bmat_dim1 = *ndim;
 879         bmat_offset = 1 + bmat_dim1;
 880         bmat -= bmat_offset;
 881         --vlag;
 882         --w;
 883         /* Function Body */
 884         nptm = npt - n - 1;
 885         /* Apply the rotations that put zeros in the KNEW-th row of ZMAT. */
 886         jl = 1;
 887         i__1 = nptm;
 888         for (j = 2; j <= i__1; ++j) {
 889                 if (j == *idz) {
 890                         jl = *idz;
 891                 } else if (zmat[*knew + j * zmat_dim1] != 0) {
 892                         /* Computing 2nd power */
 893                         d__1 = zmat[*knew + jl * zmat_dim1];
 894                         /* Computing 2nd power */
 895                         d__2 = zmat[*knew + j * zmat_dim1];
 896                         temp = sqrt(d__1 * d__1 + d__2 * d__2);
 897                         tempa = zmat[*knew + jl * zmat_dim1] / temp;
 898                         tempb = zmat[*knew + j * zmat_dim1] / temp;
 899                         i__2 = npt;
 900                         for (i__ = 1; i__ <= i__2; ++i__) {
 901                                 temp = tempa * zmat[i__ + jl * zmat_dim1] + tempb * zmat[i__
 902                                                            + j * zmat_dim1];
 903                                 zmat[i__ + j * zmat_dim1] = tempa * zmat[i__ + j * zmat_dim1]
 904                                         - tempb * zmat[i__ + jl * zmat_dim1];
 905                                 zmat[i__ + jl * zmat_dim1] = temp;
 906                         }
 907                         zmat[*knew + j * zmat_dim1] = 0;
 908                 }
 909         }
 910         /* Put the first NPT components of the KNEW-th column of HLAG into
 911          * W, and calculate the parameters of the updating formula. */
 912         tempa = zmat[*knew + zmat_dim1];
 913         if (*idz >= 2) tempa = -tempa;
 914         if (jl > 1) tempb = zmat[*knew + jl * zmat_dim1];
 915         i__1 = npt;
 916         for (i__ = 1; i__ <= i__1; ++i__) {
 917                 w[i__] = tempa * zmat[i__ + zmat_dim1];
 918                 if (jl > 1) w[i__] += tempb * zmat[i__ + jl * zmat_dim1];
 919         }
 920         alpha = w[*knew];
 921         tau = vlag[*knew];
 922         tausq = tau * tau;
 923         denom = alpha * *beta + tausq;
 924         vlag[*knew] -= 1.0;
 925         /* Complete the updating of ZMAT when there is only 1.0 nonzero
 926          * element in the KNEW-th row of the new matrix ZMAT, but, if IFLAG
 927          * is set to 1.0, then the first column of ZMAT will be exchanged
 928          * with another 1.0 later. */
 929         iflag = 0;
 930         if (jl == 1) {
 931                 temp = sqrt((fabs(denom)));
 932                 tempb = tempa / temp;
 933                 tempa = tau / temp;
 934                 i__1 = npt;
 935                 for (i__ = 1; i__ <= i__1; ++i__)
 936                         zmat[i__ + zmat_dim1] = tempa * zmat[i__ + zmat_dim1] - tempb *
 937                                 vlag[i__];
 938                 if (*idz == 1 && temp < 0) *idz = 2;
 939                 if (*idz >= 2 && temp >= 0) iflag = 1;
 940         } else {
 941                 /* Complete the updating of ZMAT in the alternative case. */
 942                 ja = 1;
 943                 if (*beta >= 0) {
 944                         ja = jl;
 945                 }
 946                 jb = jl + 1 - ja;
 947                 temp = zmat[*knew + jb * zmat_dim1] / denom;
 948                 tempa = temp * *beta;
 949                 tempb = temp * tau;
 950                 temp = zmat[*knew + ja * zmat_dim1];
 951                 scala = 1.0 / sqrt(fabs(*beta) * temp * temp + tausq);
 952                 scalb = scala * sqrt((fabs(denom)));
 953                 i__1 = npt;
 954                 for (i__ = 1; i__ <= i__1; ++i__) {
 955                         zmat[i__ + ja * zmat_dim1] = scala * (tau * zmat[i__ + ja *
 956                                              zmat_dim1] - temp * vlag[i__]);
 957                         zmat[i__ + jb * zmat_dim1] = scalb * (zmat[i__ + jb * zmat_dim1]
 958                                       - tempa * w[i__] - tempb * vlag[i__]);
 959                 }
 960                 if (denom <= 0) {
 961                         if (*beta < 0) ++(*idz);
 962                         if (*beta >= 0) iflag = 1;
 963                 }
 964         }
 965         /* IDZ is reduced in the following case, and usually the first
 966          * column of ZMAT is exchanged with a later 1.0. */
 967         if (iflag == 1) {
 968                 --(*idz);
 969                 i__1 = npt;
 970                 for (i__ = 1; i__ <= i__1; ++i__) {
 971                         temp = zmat[i__ + zmat_dim1];
 972                         zmat[i__ + zmat_dim1] = zmat[i__ + *idz * zmat_dim1];
 973                         zmat[i__ + *idz * zmat_dim1] = temp;
 974                 }
 975         }
 976         /* Finally, update the matrix BMAT. */
 977         i__1 = n;
 978         for (j = 1; j <= i__1; ++j) {
 979                 jp = npt + j;
 980                 w[jp] = bmat[*knew + j * bmat_dim1];
 981                 tempa = (alpha * vlag[jp] - tau * w[jp]) / denom;
 982                 tempb = (-(*beta) * w[jp] - tau * vlag[jp]) / denom;
 983                 i__2 = jp;
 984                 for (i__ = 1; i__ <= i__2; ++i__) {
 985                         bmat[i__ + j * bmat_dim1] = bmat[i__ + j * bmat_dim1] + tempa *
 986                                 vlag[i__] + tempb * w[i__];
 987                         if (i__ > npt) {
 988                                 bmat[jp + (i__ - npt) * bmat_dim1] = bmat[i__ + j *
 989                                                                  bmat_dim1];
 990                         }
 991                 }
 992         }
 993         return 0;
 994 }
 995
 996 template<class TYPE, class Func>
 997 static TYPE newuob_(int n, int npt, TYPE *x,
 998                                         TYPE rhobeg, TYPE rhoend, int *ret_nf, int maxfun,
 999                                         TYPE *xbase, TYPE *xopt, TYPE *xnew,
1000                                         TYPE *xpt, TYPE *fval, TYPE *gq, TYPE *hq,
1001                                         TYPE *pq, TYPE *bmat, TYPE *zmat, int *ndim,
1002                                         TYPE *d__, TYPE *vlag, TYPE *w, Func &func)
1003 {
1004         /* XBASE will hold a shift of origin that should reduce the
1005            contributions from rounding errors to values of the model and
1006            Lagrange functions.
1007          * XOPT will be set to the displacement from XBASE of the vector of
1008            variables that provides the least calculated F so far.
1009          * XNEW will be set to the displacement from XBASE of the vector of
1010            variables for the current calculation of F.
1011          * XPT will contain the interpolation point coordinates relative to
1012            XBASE.
1013          * FVAL will hold the values of F at the interpolation points.
1014          * GQ will hold the gradient of the quadratic model at XBASE.
1015          * HQ will hold the explicit second derivatives of the quadratic
1016            model.
1017          * PQ will contain the parameters of the implicit second derivatives
1018            of the quadratic model.
1019          * BMAT will hold the last N columns of H.
1020          * ZMAT will hold the factorization of the leading NPT by NPT
1021            submatrix of H, this factorization being ZMAT times Diag(DZ)
1022            times ZMAT^T, where the elements of DZ are plus or minus 1.0, as
1023            specified by IDZ.
1024          * NDIM is the first dimension of BMAT and has the value NPT+N.
1025          * D is reserved for trial steps from XOPT.
1026          * VLAG will contain the values of the Lagrange functions at a new
1027            point X.  They are part of a product that requires VLAG to be of
1028            length NDIM.
1029          * The array W will be used for working space. Its length must be at
1030            least 10*NDIM = 10*(NPT+N). Set some constants. */
1031
1032         int xpt_dim1, xpt_offset, bmat_dim1, bmat_offset, zmat_dim1, zmat_offset,
1033                 i__1, i__2, i__3, i__, j, k, ih, nf, nh, ip, jp, np, nfm, idz, ipt, jpt,
1034                 nfmm, knew, kopt, nptm, ksave, nfsav, itemp, ktemp, itest, nftest;
1035         TYPE d__1, d__2, d__3, f, dx, dsq, rho, sum, fbeg, diff, beta, gisq,
1036                 temp, suma, sumb, fopt, bsum, gqsq, xipt, xjpt, sumz, diffa, diffb,
1037                 diffc, hdiag, alpha, delta, recip, reciq, fsave, dnorm, ratio, dstep,
1038                 vquad, tempq, rhosq, detrat, crvmin, distsq, xoptsq;
1039
1040         /* Parameter adjustments */
1041         diffc = ratio = dnorm = diffa = diffb = xoptsq = f = 0.0;
1042         beta = 0;
1043         nfsav = 0;
1044         kopt = 1;
1045         rho = fbeg = fopt = xjpt = xipt = 0.0;
1046         itest = ipt = jpt = 0;
1047         alpha = dstep = 0.0;
1048         zmat_dim1 = npt;
1049         zmat_offset = 1 + zmat_dim1;
1050         zmat -= zmat_offset;
1051         xpt_dim1 = npt;
1052         xpt_offset = 1 + xpt_dim1;
1053         xpt -= xpt_offset;
1054         --x; --xbase; --xopt; --xnew; --fval; --gq; --hq; --pq;
1055         bmat_dim1 = *ndim;
1056         bmat_offset = 1 + bmat_dim1;
1057         bmat -= bmat_offset;
1058         --d__;
1059         --vlag;
1060         --w;
1061         /* Function Body */
1062         np = n + 1;
1063         nh = n * np / 2;
1064         nptm = npt - np;
1065         nftest = (maxfun > 1)? maxfun : 1;
1066         /* Set the initial elements of XPT, BMAT, HQ, PQ and ZMAT to 0. */
1067         i__1 = n;
1068         for (j = 1; j <= i__1; ++j) {
1069                 xbase[j] = x[j];
1070                 i__2 = npt;
1071                 for (k = 1; k <= i__2; ++k)
1072                         xpt[k + j * xpt_dim1] = 0;
1073                 i__2 = *ndim;
1074                 for (i__ = 1; i__ <= i__2; ++i__)
1075                         bmat[i__ + j * bmat_dim1] = 0;
1076         }
1077         i__2 = nh;
1078         for (ih = 1; ih <= i__2; ++ih) hq[ih] = 0;
1079         i__2 = npt;
1080         for (k = 1; k <= i__2; ++k) {
1081                 pq[k] = 0;
1082                 i__1 = nptm;
1083                 for (j = 1; j <= i__1; ++j)
1084                         zmat[k + j * zmat_dim1] = 0;
1085         }
1086         /* Begin the initialization procedure. NF becomes 1.0 more than the
1087          * number of function values so far. The coordinates of the
1088          * displacement of the next initial interpolation point from XBASE
1089          * are set in XPT(NF,.). */
1090         rhosq = rhobeg * rhobeg;
1091         recip = 1.0 / rhosq;
1092         reciq = sqrt(.5) / rhosq;
1093         nf = 0;
1094 L50:
1095         nfm = nf;
1096         nfmm = nf - n;
1097         ++nf;
1098         if (nfm <= n << 1) {
1099                 if (nfm >= 1 && nfm <= n) {
1100                         xpt[nf + nfm * xpt_dim1] = rhobeg;
1101                 } else if (nfm > n) {
1102                         xpt[nf + nfmm * xpt_dim1] = -(rhobeg);
1103                 }
1104         } else {
1105                 itemp = (nfmm - 1) / n;
1106                 jpt = nfm - itemp * n - n;
1107                 ipt = jpt + itemp;
1108                 if (ipt > n) {
1109                         itemp = jpt;
1110                         jpt = ipt - n;
1111                         ipt = itemp;
1112                 }
1113                 xipt = rhobeg;
1114                 if (fval[ipt + np] < fval[ipt + 1]) xipt = -xipt;
1115                 xjpt = rhobeg;
1116                 if (fval[jpt + np] < fval[jpt + 1]) xjpt = -xjpt;
1117                 xpt[nf + ipt * xpt_dim1] = xipt;
1118                 xpt[nf + jpt * xpt_dim1] = xjpt;
1119         }
1120         /* Calculate the next value of F, label 70 being reached immediately
1121          * after this calculation. The least function value so far and its
1122          * index are required. */
1123         i__1 = n;
1124         for (j = 1; j <= i__1; ++j)
1125                 x[j] = xpt[nf + j * xpt_dim1] + xbase[j];
1126         goto L310;
1127 L70:
1128         fval[nf] = f;
1129         if (nf == 1) {
1130                 fbeg = fopt = f;
1131                 kopt = 1;
1132         } else if (f < fopt) {
1133                 fopt = f;
1134                 kopt = nf;
1135         }
1136         /* Set the non0 initial elements of BMAT and the quadratic model
1137          * in the cases when NF is at most 2*N+1. */
1138         if (nfm <= n << 1) {
1139                 if (nfm >= 1 && nfm <= n) {
1140                         gq[nfm] = (f - fbeg) / rhobeg;
1141                         if (npt < nf + n) {
1142                                 bmat[nfm * bmat_dim1 + 1] = -1.0 / rhobeg;
1143                                 bmat[nf + nfm * bmat_dim1] = 1.0 / rhobeg;
1144                                 bmat[npt + nfm + nfm * bmat_dim1] = -.5 * rhosq;
1145                         }
1146                 } else if (nfm > n) {
1147                         bmat[nf - n + nfmm * bmat_dim1] = .5 / rhobeg;
1148                         bmat[nf + nfmm * bmat_dim1] = -.5 / rhobeg;
1149                         zmat[nfmm * zmat_dim1 + 1] = -reciq - reciq;
1150                         zmat[nf - n + nfmm * zmat_dim1] = reciq;
1151                         zmat[nf + nfmm * zmat_dim1] = reciq;
1152                         ih = nfmm * (nfmm + 1) / 2;
1153                         temp = (fbeg - f) / rhobeg;
1154                         hq[ih] = (gq[nfmm] - temp) / rhobeg;
1155                         gq[nfmm] = .5 * (gq[nfmm] + temp);
1156                 }
1157                 /* Set the off-diagonal second derivatives of the Lagrange
1158                  * functions and the initial quadratic model. */
1159         } else {
1160                 ih = ipt * (ipt - 1) / 2 + jpt;
1161                 if (xipt < 0) ipt += n;
1162                 if (xjpt < 0) jpt += n;
1163                 zmat[nfmm * zmat_dim1 + 1] = recip;
1164                 zmat[nf + nfmm * zmat_dim1] = recip;
1165                 zmat[ipt + 1 + nfmm * zmat_dim1] = -recip;
1166                 zmat[jpt + 1 + nfmm * zmat_dim1] = -recip;
1167                 hq[ih] = (fbeg - fval[ipt + 1] - fval[jpt + 1] + f) / (xipt * xjpt);
1168         }
1169         if (nf < npt) goto L50;
1170         /* Begin the iterative procedure, because the initial model is
1171          * complete. */
1172         rho = rhobeg;
1173         delta = rho;
1174         idz = 1;
1175         diffa = diffb = itest = 0;
1176         xoptsq = 0.0;
1177         i__1 = n;
1178         for (i__ = 1; i__ <= i__1; ++i__) {
1179                 xopt[i__] = xpt[kopt + i__ * xpt_dim1];
1180                 /* Computing 2nd power */
1181                 d__1 = xopt[i__];
1182                 xoptsq += d__1 * d__1;
1183         }
1184 L90:
1185         nfsav = nf;
1186         /* Generate the next trust region step and test its length. Set KNEW
1187          * to -1 if the purpose of the next F will be to improve the
1188          * model. */
1189 L100:
1190         knew = 0;
1191         trsapp_(n, npt, &xopt[1], &xpt[xpt_offset], &gq[1], &hq[1], &pq[1], &
1192            delta, &d__[1], &w[1], &w[np], &w[np + n], &w[np + (n << 1)], &
1193                 crvmin);
1194         dsq = 0;
1195         i__1 = n;
1196         for (i__ = 1; i__ <= i__1; ++i__) {
1197                 /* Computing 2nd power */
1198                 d__1 = d__[i__];
1199                 dsq += d__1 * d__1;
1200         }
1201         /* Computing MIN */
1202         d__1 = delta, d__2 = sqrt(dsq);
1203         dnorm = min(d__1, d__2);
1204         if (dnorm < .5 * rho) {
1205                 knew = -1;
1206                 delta = 0.1 * delta;
1207                 ratio = -1.;
1208                 if (delta <= rho * 1.5) delta = rho;
1209                 if (nf <= nfsav + 2) goto L460;
1210                 temp = crvmin * .125 * rho * rho;
1211                 /* Computing MAX */
1212                 d__1 = max(diffa, diffb);
1213                 if (temp <= max(d__1, diffc)) goto L460;
1214                 goto L490;
1215         }
1216         /* Shift XBASE if XOPT may be too far from XBASE. First make the
1217          * changes to BMAT that do not depend on ZMAT. */
1218 L120:
1219         if (dsq <= xoptsq * .001) {
1220                 tempq = xoptsq * .25;
1221                 i__1 = npt;
1222                 for (k = 1; k <= i__1; ++k) {
1223                         sum = 0;
1224                         i__2 = n;
1225                         for (i__ = 1; i__ <= i__2; ++i__)
1226                                 sum += xpt[k + i__ * xpt_dim1] * xopt[i__];
1227                         temp = pq[k] * sum;
1228                         sum -= .5 * xoptsq;
1229                         w[npt + k] = sum;
1230                         i__2 = n;
1231                         for (i__ = 1; i__ <= i__2; ++i__) {
1232                                 gq[i__] += temp * xpt[k + i__ * xpt_dim1];
1233                                 xpt[k + i__ * xpt_dim1] -= .5 * xopt[i__];
1234                                 vlag[i__] = bmat[k + i__ * bmat_dim1];
1235                                 w[i__] = sum * xpt[k + i__ * xpt_dim1] + tempq * xopt[i__];
1236                                 ip = npt + i__;
1237                                 i__3 = i__;
1238                                 for (j = 1; j <= i__3; ++j)
1239                                         bmat[ip + j * bmat_dim1] = bmat[ip + j * bmat_dim1] +
1240                                                 vlag[i__] * w[j] + w[i__] * vlag[j];
1241                         }
1242                 }
1243                 /* Then the revisions of BMAT that depend on ZMAT are
1244                  * calculated. */
1245                 i__3 = nptm;
1246                 for (k = 1; k <= i__3; ++k) {
1247                         sumz = 0;
1248                         i__2 = npt;
1249                         for (i__ = 1; i__ <= i__2; ++i__) {
1250                                 sumz += zmat[i__ + k * zmat_dim1];
1251                                 w[i__] = w[npt + i__] * zmat[i__ + k * zmat_dim1];
1252                         }
1253                         i__2 = n;
1254                         for (j = 1; j <= i__2; ++j) {
1255                                 sum = tempq * sumz * xopt[j];
1256                                 i__1 = npt;
1257                                 for (i__ = 1; i__ <= i__1; ++i__)
1258                                         sum += w[i__] * xpt[i__ + j * xpt_dim1];
1259                                 vlag[j] = sum;
1260                                 if (k < idz) sum = -sum;
1261                                 i__1 = npt;
1262                                 for (i__ = 1; i__ <= i__1; ++i__)
1263                                         bmat[i__ + j * bmat_dim1] += sum * zmat[i__ + k * zmat_dim1];
1264                         }
1265                         i__1 = n;
1266                         for (i__ = 1; i__ <= i__1; ++i__) {
1267                                 ip = i__ + npt;
1268                                 temp = vlag[i__];
1269                                 if (k < idz) temp = -temp;
1270                                 i__2 = i__;
1271                                 for (j = 1; j <= i__2; ++j)
1272                                         bmat[ip + j * bmat_dim1] += temp * vlag[j];
1273                         }
1274                 }
1275                 /* The following instructions complete the shift of XBASE,
1276                  * including the changes to the parameters of the quadratic
1277                  * model. */
1278                 ih = 0;
1279                 i__2 = n;
1280                 for (j = 1; j <= i__2; ++j) {
1281                         w[j] = 0;
1282                         i__1 = npt;
1283                         for (k = 1; k <= i__1; ++k) {
1284                                 w[j] += pq[k] * xpt[k + j * xpt_dim1];
1285                                 xpt[k + j * xpt_dim1] -= .5 * xopt[j];
1286                         }
1287                         i__1 = j;
1288                         for (i__ = 1; i__ <= i__1; ++i__) {
1289                                 ++ih;
1290                                 if (i__ < j) gq[j] += hq[ih] * xopt[i__];
1291                                 gq[i__] += hq[ih] * xopt[j];
1292                                 hq[ih] = hq[ih] + w[i__] * xopt[j] + xopt[i__] * w[j];
1293                                 bmat[npt + i__ + j * bmat_dim1] = bmat[npt + j + i__ *
1294                                                                  bmat_dim1];
1295                         }
1296                 }
1297                 i__1 = n;
1298                 for (j = 1; j <= i__1; ++j) {
1299                         xbase[j] += xopt[j];
1300                         xopt[j] = 0;
1301                 }
1302                 xoptsq = 0;
1303         }
1304         /* Pick the model step if KNEW is positive. A different choice of D
1305          * may be made later, if the choice of D by BIGLAG causes
1306          * substantial cancellation in DENOM. */
1307         if (knew > 0) {
1308                 biglag_(n, npt, &xopt[1], &xpt[xpt_offset], &bmat[bmat_offset], &zmat[zmat_offset], &idz,
1309                                 ndim, &knew, &dstep, &d__[1], &alpha, &vlag[1], &vlag[npt + 1], &w[1], &w[np], &w[np + n], func);
1310         }
1311         /* Calculate VLAG and BETA for the current choice of D. The first
1312          * NPT components of W_check will be held in W. */
1313         i__1 = npt;
1314         for (k = 1; k <= i__1; ++k) {
1315                 suma = 0;
1316                 sumb = 0;
1317                 sum = 0;
1318                 i__2 = n;
1319                 for (j = 1; j <= i__2; ++j) {
1320                         suma += xpt[k + j * xpt_dim1] * d__[j];
1321                         sumb += xpt[k + j * xpt_dim1] * xopt[j];
1322                         sum += bmat[k + j * bmat_dim1] * d__[j];
1323                 }
1324                 w[k] = suma * (.5 * suma + sumb);
1325                 vlag[k] = sum;
1326         }
1327         beta = 0;
1328         i__1 = nptm;
1329         for (k = 1; k <= i__1; ++k) {
1330                 sum = 0;
1331                 i__2 = npt;
1332                 for (i__ = 1; i__ <= i__2; ++i__)
1333                         sum += zmat[i__ + k * zmat_dim1] * w[i__];
1334                 if (k < idz) {
1335                         beta += sum * sum;
1336                         sum = -sum;
1337                 } else beta -= sum * sum;
1338                 i__2 = npt;
1339                 for (i__ = 1; i__ <= i__2; ++i__)
1340                         vlag[i__] += sum * zmat[i__ + k * zmat_dim1];
1341         }
1342         bsum = 0;
1343         dx = 0;
1344         i__2 = n;
1345         for (j = 1; j <= i__2; ++j) {
1346                 sum = 0;
1347                 i__1 = npt;
1348                 for (i__ = 1; i__ <= i__1; ++i__)
1349                         sum += w[i__] * bmat[i__ + j * bmat_dim1];
1350                 bsum += sum * d__[j];
1351                 jp = npt + j;
1352                 i__1 = n;
1353                 for (k = 1; k <= i__1; ++k)
1354                         sum += bmat[jp + k * bmat_dim1] * d__[k];
1355                 vlag[jp] = sum;
1356                 bsum += sum * d__[j];
1357                 dx += d__[j] * xopt[j];
1358         }
1359         beta = dx * dx + dsq * (xoptsq + dx + dx + .5 * dsq) + beta - bsum;
1360         vlag[kopt] += 1.0;
1361         /* If KNEW is positive and if the cancellation in DENOM is
1362          * unacceptable, then BIGDEN calculates an alternative model step,
1363          * XNEW being used for working space. */
1364         if (knew > 0) {
1365                 /* Computing 2nd power */
1366                 d__1 = vlag[knew];
1367                 temp = 1.0 + alpha * beta / (d__1 * d__1);
1368                 if (fabs(temp) <= .8) {
1369                         bigden_(n, npt, &xopt[1], &xpt[xpt_offset], &bmat[bmat_offset], &
1370                                 zmat[zmat_offset], &idz, ndim, &kopt, &knew, &d__[1], &w[
1371                                                                                          1], &vlag[1], &beta, &xnew[1], &w[*ndim + 1], &w[*ndim *
1372                                                                     6 + 1]);
1373                 }
1374         }
1375         /* Calculate the next value of the objective function. */
1376 L290:
1377         i__2 = n;
1378         for (i__ = 1; i__ <= i__2; ++i__) {
1379                 xnew[i__] = xopt[i__] + d__[i__];
1380                 x[i__] = xbase[i__] + xnew[i__];
1381         }
1382         ++nf;
1383 L310:
1384         if (nf > nftest) {
1385                 --nf;
1386 //              fprintf(stderr, "++ Return from NEWUOA because CALFUN has been called MAXFUN times.\n");
1387                 goto L530;
1388         }
1389         f = func(n, &x[1]);
1390         if (nf <= npt) goto L70;
1391         if (knew == -1) goto L530;
1392         /* Use the quadratic model to predict the change in F due to the
1393          * step D, and set DIFF to the error of this prediction. */
1394         vquad = ih = 0;
1395         i__2 = n;
1396         for (j = 1; j <= i__2; ++j) {
1397                 vquad += d__[j] * gq[j];
1398                 i__1 = j;
1399                 for (i__ = 1; i__ <= i__1; ++i__) {
1400                         ++ih;
1401                         temp = d__[i__] * xnew[j] + d__[j] * xopt[i__];
1402                         if (i__ == j) temp = .5 * temp;
1403                         vquad += temp * hq[ih];
1404                 }
1405         }
1406         i__1 = npt;
1407         for (k = 1; k <= i__1; ++k) vquad += pq[k] * w[k];
1408         diff = f - fopt - vquad;
1409         diffc = diffb;
1410         diffb = diffa;
1411         diffa = fabs(diff);
1412         if (dnorm > rho) nfsav = nf;
1413         /* Update FOPT and XOPT if the new F is the least value of the
1414          * objective function so far. The branch when KNEW is positive
1415          * occurs if D is not a trust region step. */
1416         fsave = fopt;
1417         if (f < fopt) {
1418                 fopt = f;
1419                 xoptsq = 0;
1420                 i__1 = n;
1421                 for (i__ = 1; i__ <= i__1; ++i__) {
1422                         xopt[i__] = xnew[i__];
1423                         /* Computing 2nd power */
1424                         d__1 = xopt[i__];
1425                         xoptsq += d__1 * d__1;
1426                 }
1427         }
1428         ksave = knew;
1429         if (knew > 0) goto L410;
1430         /* Pick the next value of DELTA after a trust region step. */
1431         if (vquad >= 0) {
1432 //              fprintf(stderr, "++ Return from NEWUOA because a trust region step has failed to reduce Q.\n");
1433                 goto L530;
1434         }
1435         ratio = (f - fsave) / vquad;
1436         if (ratio <= 0.1) {
1437                 delta = .5 * dnorm;
1438         } else if (ratio <= .7) {
1439                 /* Computing MAX */
1440                 d__1 = .5 * delta;
1441                 delta = max(d__1, dnorm);
1442         } else {
1443                 /* Computing MAX */
1444                 d__1 = .5 * delta, d__2 = dnorm + dnorm;
1445                 delta = max(d__1, d__2);
1446         }
1447         if (delta <= rho * 1.5) delta = rho;
1448         /* Set KNEW to the index of the next interpolation point to be
1449          * deleted. */
1450         /* Computing MAX */
1451         d__2 = 0.1 * delta;
1452         /* Computing 2nd power */
1453         d__1 = max(d__2, rho);
1454         rhosq = d__1 * d__1;
1455         ktemp = 0;
1456         detrat = 0.0;
1457         if (f >= fsave) {
1458                 ktemp = kopt;
1459                 detrat = 1.0;
1460         }
1461         i__1 = npt;
1462         for (k = 1; k <= i__1; ++k) {
1463                 hdiag = 0;
1464                 i__2 = nptm;
1465                 for (j = 1; j <= i__2; ++j) {
1466                         temp = 1.0;
1467                         if (j < idz) temp = -1.0;
1468                         /* Computing 2nd power */
1469                         d__1 = zmat[k + j * zmat_dim1];
1470                         hdiag += temp * (d__1 * d__1);
1471                 }
1472                 /* Computing 2nd power */
1473                 d__2 = vlag[k];
1474                 temp = (d__1 = beta * hdiag + d__2 * d__2, fabs(d__1));
1475                 distsq = 0;
1476                 i__2 = n;
1477                 for (j = 1; j <= i__2; ++j) {
1478                         /* Computing 2nd power */
1479                         d__1 = xpt[k + j * xpt_dim1] - xopt[j];
1480                         distsq += d__1 * d__1;
1481                 }
1482                 if (distsq > rhosq) {
1483                         /* Computing 3rd power */
1484                         d__1 = distsq / rhosq;
1485                         temp *= d__1 * (d__1 * d__1);
1486                 }
1487                 if (temp > detrat && k != ktemp) {
1488                         detrat = temp;
1489                         knew = k;
1490                 }
1491         }
1492         if (knew == 0) goto L460;
1493         /* Update BMAT, ZMAT and IDZ, so that the KNEW-th interpolation
1494          * point can be moved. Begin the updating of the quadratic model,
1495          * starting with the explicit second derivative term. */
1496 L410:
1497         update_(n, npt, &bmat[bmat_offset], &zmat[zmat_offset], &idz, ndim, &vlag[1], &beta, &knew, &w[1]);
1498         fval[knew] = f;
1499         ih = 0;
1500         i__1 = n;
1501         for (i__ = 1; i__ <= i__1; ++i__) {
1502                 temp = pq[knew] * xpt[knew + i__ * xpt_dim1];
1503                 i__2 = i__;
1504                 for (j = 1; j <= i__2; ++j) {
1505                         ++ih;
1506                         hq[ih] += temp * xpt[knew + j * xpt_dim1];
1507                 }
1508         }
1509         pq[knew] = 0;
1510         /* Update the other second derivative parameters, and then the
1511          * gradient vector of the model. Also include the new interpolation
1512          * point. */
1513         i__2 = nptm;
1514         for (j = 1; j <= i__2; ++j) {
1515                 temp = diff * zmat[knew + j * zmat_dim1];
1516                 if (j < idz) temp = -temp;
1517                 i__1 = npt;
1518                 for (k = 1; k <= i__1; ++k) {
1519                         pq[k] += temp * zmat[k + j * zmat_dim1];
1520                 }
1521         }
1522         gqsq = 0;
1523         i__1 = n;
1524         for (i__ = 1; i__ <= i__1; ++i__) {
1525                 gq[i__] += diff * bmat[knew + i__ * bmat_dim1];
1526                 /* Computing 2nd power */
1527                 d__1 = gq[i__];
1528                 gqsq += d__1 * d__1;
1529                 xpt[knew + i__ * xpt_dim1] = xnew[i__];
1530         }
1531         /* If a trust region step makes a small change to the objective
1532          * function, then calculate the gradient of the least Frobenius norm
1533          * interpolant at XBASE, and store it in W, using VLAG for a vector
1534          * of right hand sides. */
1535         if (ksave == 0 && delta == rho) {
1536                 if (fabs(ratio) > .01) {
1537                         itest = 0;
1538                 } else {
1539                         i__1 = npt;
1540                         for (k = 1; k <= i__1; ++k)
1541                                 vlag[k] = fval[k] - fval[kopt];
1542                         gisq = 0;
1543                         i__1 = n;
1544                         for (i__ = 1; i__ <= i__1; ++i__) {
1545                                 sum = 0;
1546                                 i__2 = npt;
1547                                 for (k = 1; k <= i__2; ++k)
1548                                         sum += bmat[k + i__ * bmat_dim1] * vlag[k];
1549                                 gisq += sum * sum;
1550                                 w[i__] = sum;
1551                         }
1552                         /* Test whether to replace the new quadratic model by the
1553                          * least Frobenius norm interpolant, making the replacement
1554                          * if the test is satisfied. */
1555                         ++itest;
1556                         if (gqsq < gisq * 100.) itest = 0;
1557                         if (itest >= 3) {
1558                                 i__1 = n;
1559                                 for (i__ = 1; i__ <= i__1; ++i__) gq[i__] = w[i__];
1560                                 i__1 = nh;
1561                                 for (ih = 1; ih <= i__1; ++ih) hq[ih] = 0;
1562                                 i__1 = nptm;
1563                                 for (j = 1; j <= i__1; ++j) {
1564                                         w[j] = 0;
1565                                         i__2 = npt;
1566                                         for (k = 1; k <= i__2; ++k)
1567                                                 w[j] += vlag[k] * zmat[k + j * zmat_dim1];
1568                                         if (j < idz) w[j] = -w[j];
1569                                 }
1570                                 i__1 = npt;
1571                                 for (k = 1; k <= i__1; ++k) {
1572                                         pq[k] = 0;
1573                                         i__2 = nptm;
1574                                         for (j = 1; j <= i__2; ++j)
1575                                                 pq[k] += zmat[k + j * zmat_dim1] * w[j];
1576                                 }
1577                                 itest = 0;
1578                         }
1579                 }
1580         }
1581         if (f < fsave) kopt = knew;
1582         /* If a trust region step has provided a sufficient decrease in F,
1583          * then branch for another trust region calculation. The case
1584          * KSAVE>0 occurs when the new function value was calculated by a
1585          * model step. */
1586         if (f <= fsave + 0.1 * vquad) goto L100;
1587         if (ksave > 0) goto L100;
1588         /* Alternatively, find out if the interpolation points are close
1589          * enough to the best point so far. */
1590         knew = 0;
1591 L460:
1592         distsq = delta * 4. * delta;
1593         i__2 = npt;
1594         for (k = 1; k <= i__2; ++k) {
1595                 sum = 0;
1596                 i__1 = n;
1597                 for (j = 1; j <= i__1; ++j) {
1598                         /* Computing 2nd power */
1599                         d__1 = xpt[k + j * xpt_dim1] - xopt[j];
1600                         sum += d__1 * d__1;
1601                 }
1602                 if (sum > distsq) {
1603                         knew = k;
1604                         distsq = sum;
1605                 }
1606         }
1607         /* If KNEW is positive, then set DSTEP, and branch back for the next
1608          * iteration, which will generate a "model step". */
1609         if (knew > 0) {
1610                 /* Computing MAX and MIN*/
1611                 d__2 = 0.1 * sqrt(distsq), d__3 = .5 * delta;
1612                 d__1 = min(d__2, d__3);
1613                 dstep = max(d__1, rho);
1614                 dsq = dstep * dstep;
1615                 goto L120;
1616         }
1617         if (ratio > 0) goto L100;
1618         if (max(delta, dnorm) > rho) goto L100;
1619         /* The calculations with the current value of RHO are complete. Pick
1620          * the next values of RHO and DELTA. */
1621 L490:
1622         if (rho > rhoend) {
1623                 delta = .5 * rho;
1624                 ratio = rho / rhoend;
1625                 if (ratio <= 16.) rho = rhoend;
1626                 else if (ratio <= 250.) rho = sqrt(ratio) * rhoend;
1627                 else rho = 0.1 * rho;
1628                 delta = max(delta, rho);
1629                 goto L90;
1630         }
1631         /* Return from the calculation, after another Newton-Raphson step,
1632          * if it is too short to have been tried before. */
1633         if (knew == -1) goto L290;
1634 L530:
1635         if (fopt <= f) {
1636                 i__2 = n;
1637                 for (i__ = 1; i__ <= i__2; ++i__)
1638                         x[i__] = xbase[i__] + xopt[i__];
1639                 f = fopt;
1640         }
1641         *ret_nf = nf;
1642         return f;
1643 }
1644
1645 template<class TYPE, class Func>
1646 static TYPE newuoa_(int n, int npt, TYPE *x, TYPE rhobeg, TYPE rhoend, int *ret_nf, int maxfun, TYPE *w, Func &func)
1647 {
1648         /* This subroutine seeks the least value of a function of many
1649          * variables, by a trust region method that forms quadratic models
1650          * by interpolation. There can be some freedom in the interpolation
1651          * conditions, which is taken up by minimizing the Frobenius norm of
1652          * the change to the second derivative of the quadratic model,
1653          * beginning with a zero matrix. The arguments of the subroutine are
1654          * as follows. */
1655
1656         /* N must be set to the number of variables and must be at least
1657          * two. NPT is the number of interpolation conditions. Its value
1658          * must be in the interval [N+2,(N+1)(N+2)/2]. Initial values of the
1659          * variables must be set in X(1),X(2),...,X(N). They will be changed
1660          * to the values that give the least calculated F. RHOBEG and RHOEND
1661          * must be set to the initial and final values of a trust region
1662          * radius, so both must be positive with RHOEND<=RHOBEG. Typically
1663          * RHOBEG should be about one tenth of the greatest expected change
1664          * to a variable, and RHOEND should indicate the accuracy that is
1665          * required in the final values of the variables. MAXFUN must be set
1666          * to an upper bound on the number of calls of CALFUN.  The array W
1667          * will be used for working space. Its length must be at least
1668          * (NPT+13)*(NPT+N)+3*N*(N+3)/2. */
1669
1670         /* SUBROUTINE CALFUN (N,X,F) must be provided by the user. It must
1671          * set F to the value of the objective function for the variables
1672          * X(1),X(2),...,X(N). Partition the working space array, so that
1673          * different parts of it can be treated separately by the subroutine
1674          * that performs the main calculation. */
1675
1676         int id, np, iw, igq, ihq, ixb, ifv, ipq, ivl, ixn,
1677                 ixo, ixp, ndim, nptm, ibmat, izmat;
1678
1679         /* Parameter adjustments */
1680         --w; --x;
1681         /* Function Body */
1682         np = n + 1;
1683         nptm = npt - np;
1684         if (npt < n + 2 || npt > (n + 2) * np / 2) {
1685 //              fprintf(stderr, "** Return from NEWUOA because NPT is not in the required interval.\n");
1686                 return 1;
1687         }
1688         ndim = npt + n;
1689         ixb = 1;
1690         ixo = ixb + n;
1691         ixn = ixo + n;
1692         ixp = ixn + n;
1693         ifv = ixp + n * npt;
1694         igq = ifv + npt;
1695         ihq = igq + n;
1696         ipq = ihq + n * np / 2;
1697         ibmat = ipq + npt;
1698         izmat = ibmat + ndim * n;
1699         id = izmat + npt * nptm;
1700         ivl = id + n;
1701         iw = ivl + ndim;
1702         /* The above settings provide a partition of W for subroutine
1703          * NEWUOB. The partition requires the first NPT*(NPT+N)+5*N*(N+3)/2
1704          * elements of W plus the space that is needed by the last array of
1705          * NEWUOB. */
1706         return newuob_(n, npt, &x[1], rhobeg, rhoend, ret_nf, maxfun, &w[ixb], &w[ixo], &w[ixn],
1707                                    &w[ixp], &w[ifv], &w[igq], &w[ihq], &w[ipq], &w[ibmat], &w[izmat],
1708                                    &ndim, &w[id], &w[ivl], &w[iw], func);
1709 }
1710
1711 template<class TYPE, class Func>
1712 TYPE min_newuoa(int n, TYPE *x, Func &func, TYPE rb, TYPE tol, int max_iter)
1713 {
1714         int npt = 2 * n + 1, rnf;
1715         TYPE ret;
1716         TYPE *w = (TYPE*)calloc((npt+13)*(npt+n) + 3*n*(n+3)/2 + 11, sizeof(TYPE));
1717         ret = newuoa_(n, 2*n+1, x, rb, tol, &rnf, max_iter, w, func);
1718         free(w);
1719         return ret;
1720 }
1721
1722 #endif