src/external/lmfit/lmmin.cpp

   1 /*
   2  * Library:   lmfit (Levenberg-Marquardt least squares fitting)
   3  *
   4  * File:      lmmin.c
   5  *
   6  * Contents:  Levenberg-Marquardt minimization.
   7  *
   8  * Copyright: MINPACK authors, The University of Chikago (1980-1999)
   9  *            Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
  10  *
  11  * License:   see ../COPYING (FreeBSD)
  12  *
  13  * Homepage:  apps.jcns.fz-juelich.de/lmfit
  14  */
  15
  16 #include <assert.h>
  17 #include <stdlib.h>
  18 #include <stdio.h>
  19 #include <math.h>
  20 #include <float.h>
  21 #include "lmmin.h"
  22
  23 #define MIN(a,b) (((a)<=(b)) ? (a) : (b))
  24 #define MAX(a,b) (((a)>=(b)) ? (a) : (b))
  25 #define SQR(x)   (x)*(x)
  26
  27 /* Declare functions that do the heavy numerics.
  28    Implementions are in this source file, below lmmin.
  29    Dependences: lmmin calls qrfac and lmpar; lmpar calls qrsolv. */
  30 static void lm_lmpar(
  31     const int n, double *const r, const int ldr, int *const ipvt,
  32     double *const diag, double *const qtb, double delta, double *const par,
  33     double *const x,
  34     double *const sdiag, double *const aux, double *const xdi);
  35 static void lm_qrfac(
  36     const int m, const int n, double *const a, int *const ipvt,
  37     double *const rdiag, double *const acnorm, double *const wa );
  38 static void lm_qrsolv(
  39     const int n, double *const r, const int ldr, int *const ipvt,
  40     double *const diag, double *const qtb, double *const x,
  41     double *const sdiag, double *const wa);
  42
  43
  44 /*****************************************************************************/
  45 /*  Numeric constants                                                        */
  46 /*****************************************************************************/
  47
  48 /* machine-dependent constants from float.h */
  49 #define LM_MACHEP     DBL_EPSILON   /* resolution of arithmetic */
  50 #define LM_DWARF      DBL_MIN       /* smallest nonzero number */
  51 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
  52 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
  53 #define LM_USERTOL    30*LM_MACHEP  /* users are recommended to require this */
  54
  55 /* If the above values do not work, the following seem good for an x86:
  56  LM_MACHEP     .555e-16
  57  LM_DWARF      9.9e-324
  58  LM_SQRT_DWARF 1.e-160
  59  LM_SQRT_GIANT 1.e150
  60  LM_USER_TOL   1.e-14
  61    The following values should work on any machine:
  62  LM_MACHEP     1.2e-16
  63  LM_DWARF      1.0e-38
  64  LM_SQRT_DWARF 3.834e-20
  65  LM_SQRT_GIANT 1.304e19
  66  LM_USER_TOL   1.e-14
  67 */
  68
  69 const lm_control_struct lm_control_double = {
  70     LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL, 100., 100, 1,
  71     NULL, 0, -1, -1 };
  72 const lm_control_struct lm_control_float = {
  73     1.e-7,      1.e-7,      1.e-7,      1.e-7,      100., 100, 1,
  74     NULL, 0, -1, -1 };
  75
  76
  77 /*****************************************************************************/
  78 /*  Message texts (indexed by status.info)                                   */
  79 /*****************************************************************************/
  80
  81 const char *lm_infmsg[] = {
  82     "found zero (sum of squares below underflow limit)",
  83     "converged  (the relative error in the sum of squares is at most tol)",
  84     "converged  (the relative error of the parameter vector is at most tol)",
  85     "converged  (both errors are at most tol)",
  86     "trapped    (by degeneracy; increasing epsilon might help)",
  87     "exhausted  (number of function calls exceeding preset patience)",
  88     "failed     (ftol<tol: cannot reduce sum of squares any further)",
  89     "failed     (xtol<tol: cannot improve approximate solution any further)",
  90     "failed     (gtol<tol: cannot improve approximate solution any further)",
  91     "crashed    (not enough memory)",
  92     "exploded   (fatal coding error: improper input parameters)",
  93     "stopped    (break requested within function evaluation)",
  94     "found nan  (function value is not-a-number or infinite)",
  95     "won't fit  (no free parameter)"
  96 };
  97
  98 const char *lm_shortmsg[] = {
  99     "found zero",      //  0
 100     "converged (f)",   //  1
 101     "converged (p)",   //  2
 102     "converged (2)",   //  3
 103     "degenerate",      //  4
 104     "call limit",      //  5
 105     "failed (f)",      //  6
 106     "failed (p)",      //  7
 107     "failed (o)",      //  8
 108     "no memory",       //  9
 109     "invalid input",   // 10
 110     "user break",      // 11
 111     "found nan",       // 12
 112     "no free par"      // 13
 113 };
 114
 115
 116 /*****************************************************************************/
 117 /*  Monitoring auxiliaries.                                                  */
 118 /*****************************************************************************/
 119
 120 static void lm_print_pars(const int nout, const double *par, FILE* fout)
 121 {
 122     fprintf(fout, "  pars:");
 123     for (int i = 0; i < nout; ++i)
 124         fprintf(fout, " %23.16g", par[i]);
 125     fprintf(fout, "\n");
 126 }
 127
 128
 129 /*****************************************************************************/
 130 /*  lmmin (main minimization routine)                                        */
 131 /*****************************************************************************/
 132
 133 void lmmin(
 134     const int n, double *const x, const int m, const double* y,
 135     const void *const data,
 136     void (*const evaluate)(
 137         const double *const par, const int m_dat, const void *const data,
 138         double *const fvec, int *const userbreak),
 139     const lm_control_struct *const C, lm_status_struct *const S)
 140 {
 141     int j, i;
 142     double actred, dirder, fnorm, fnorm1, gnorm, pnorm,
 143         prered, ratio, step, sum, temp, temp1, temp2, temp3;
 144     static double p1 = 0.1, p0001 = 1.0e-4;
 145
 146     int maxfev = C->patience * (n+1);
 147
 148     int    inner_success; /* flag for loop control */
 149     double lmpar = 0;     /* Levenberg-Marquardt parameter */
 150     double delta = 0;
 151     double xnorm = 0;
 152     double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
 153
 154     int nout = C->n_maxpri==-1 ? n : MIN(C->n_maxpri, n);
 155
 156     /* The workaround msgfile=NULL is needed for default initialization */
 157     FILE* msgfile = C->msgfile ? C->msgfile : stdout;
 158
 159     /* Default status info; must be set ahead of first return statements */
 160     S->outcome = 0;      /* status code */
 161     S->userbreak = 0;
 162     S->nfev = 0;      /* function evaluation counter */
 163
 164 /***  Check input parameters for errors.  ***/
 165
 166     if ( n < 0 ) {
 167         fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
 168         S->outcome = 10; /* invalid parameter */
 169         return;
 170     }
 171     if (m < n) {
 172         fprintf(stderr, "lmmin: number of data points (%i) "
 173                 "smaller than number of parameters (%i)\n", m, n);
 174         S->outcome = 10;
 175         return;
 176     }
 177     if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
 178         fprintf(stderr,
 179                 "lmmin: negative tolerance (at least one of %g %g %g)\n",
 180                 C->ftol, C->xtol, C->gtol);
 181         S->outcome = 10;
 182         return;
 183     }
 184     if (maxfev <= 0) {
 185         fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
 186                 maxfev);
 187         S->outcome = 10;
 188         return;
 189     }
 190     if (C->stepbound <= 0) {
 191         fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
 192         S->outcome = 10;
 193         return;
 194     }
 195     if (C->scale_diag != 0 && C->scale_diag != 1) {
 196         fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
 197                 "should be 0 or 1\n", C->scale_diag);
 198         S->outcome = 10;
 199         return;
 200     }
 201
 202 /***  Allocate work space.  ***/
 203
 204     /* Allocate total workspace with just one system call */
 205     char *ws;
 206     if ( ( ws = static_cast<char *>(malloc(
 207                (2*m+5*n+m*n)*sizeof(double) + n*sizeof(int)) ) ) == NULL ) {
 208         S->outcome = 9;
 209         return;
 210     }
 211
 212     /* Assign workspace segments. */
 213     char *pws = ws;
 214     double *fvec = (double*) pws; pws += m * sizeof(double)/sizeof(char);
 215     double *diag = (double*) pws; pws += n * sizeof(double)/sizeof(char);
 216     double *qtf  = (double*) pws; pws += n * sizeof(double)/sizeof(char);
 217     double *fjac = (double*) pws; pws += n*m*sizeof(double)/sizeof(char);
 218     double *wa1  = (double*) pws; pws += n * sizeof(double)/sizeof(char);
 219     double *wa2  = (double*) pws; pws += n * sizeof(double)/sizeof(char);
 220     double *wa3  = (double*) pws; pws += n * sizeof(double)/sizeof(char);
 221     double *wf   = (double*) pws; pws += m * sizeof(double)/sizeof(char);
 222     int    *ipvt = (int*)    pws; /*pws += n * sizeof(int)   /sizeof(char);*/
 223
 224     /* Initialize diag */ // TODO: check whether this is still needed
 225     if (!C->scale_diag) {
 226         for (j = 0; j < n; j++)
 227             diag[j] = 1.;
 228     }
 229
 230 /***  Evaluate function at starting point and calculate norm.  ***/
 231
 232     if( C->verbosity&1 )
 233         fprintf(msgfile, "lmmin start (ftol=%g gtol=%g xtol=%g)\n",
 234                 C->ftol, C->gtol, C->xtol);
 235     if( C->verbosity&2 )
 236         lm_print_pars(nout, x, msgfile);
 237     (*evaluate)(x, m, data, fvec, &(S->userbreak));
 238     if( C->verbosity&8 )
 239     {
 240         if (y) {
 241             for( i=0; i<m; ++i )
 242                 fprintf(msgfile, "    i, f, y-f: %4i %18.8g %18.8g\n",
 243                         i, fvec[i], y[i]-fvec[i]);
 244         } else {
 245             for( i=0; i<m; ++i )
 246                 fprintf(msgfile, "    i, f: %4i %18.8g\n", i, fvec[i]);
 247         }
 248     }
 249     S->nfev = 1;
 250     if ( S->userbreak )
 251         goto terminate;
 252     if ( n == 0 ) {
 253         S->outcome = 13; /* won't fit */
 254         goto terminate;
 255     }
 256     fnorm = lm_fnorm(m, fvec, y);
 257     if( C->verbosity&2 )
 258         fprintf(msgfile, "  fnorm = %24.16g\n", fnorm);
 259     if( !isfinite(fnorm) ){
 260         if( C->verbosity )
 261             fprintf(msgfile, "nan case 1\n");
 262         S->outcome = 12; /* nan */
 263         goto terminate;
 264     } else if( fnorm <= LM_DWARF ){
 265         S->outcome = 0; /* sum of squares almost zero, nothing to do */
 266         goto terminate;
 267     }
 268
 269 /***  The outer loop: compute gradient, then descend.  ***/
 270
 271     for( int outer=0; ; ++outer ) {
 272
 273 /***  [outer]  Calculate the Jacobian.  ***/
 274
 275         for (j = 0; j < n; j++) {
 276             temp = x[j];
 277             step = MAX(eps*eps, eps * fabs(temp));
 278             x[j] += step; /* replace temporarily */
 279             (*evaluate)(x, m, data, wf, &(S->userbreak));
 280             ++(S->nfev);
 281             if ( S->userbreak )
 282                 goto terminate;
 283             for (i = 0; i < m; i++)
 284                 fjac[j*m+i] = (wf[i] - fvec[i]) / step;
 285             x[j] = temp; /* restore */
 286         }
 287         if ( C->verbosity&16 ) {
 288             /* print the entire matrix */
 289             printf("Jacobian\n");
 290             for (i = 0; i < m; i++) {
 291                 printf("  ");
 292                 for (j = 0; j < n; j++)
 293                     printf("%.5e ", fjac[j*m+i]);
 294                 printf("\n");
 295             }
 296         }
 297
 298 /***  [outer]  Compute the QR factorization of the Jacobian.  ***/
 299
 300 /*      fjac is an m by n array. The upper n by n submatrix of fjac
 301  *        is made to contain an upper triangular matrix R with diagonal
 302  *        elements of nonincreasing magnitude such that
 303  *
 304  *              P^T*(J^T*J)*P = R^T*R
 305  *
 306  *              (NOTE: ^T stands for matrix transposition),
 307  *
 308  *        where P is a permutation matrix and J is the final calculated
 309  *        Jacobian. Column j of P is column ipvt(j) of the identity matrix.
 310  *        The lower trapezoidal part of fjac contains information generated
 311  *        during the computation of R.
 312  *
 313  *      ipvt is an integer array of length n. It defines a permutation
 314  *        matrix P such that jac*P = Q*R, where jac is the final calculated
 315  *        Jacobian, Q is orthogonal (not stored), and R is upper triangular
 316  *        with diagonal elements of nonincreasing magnitude. Column j of P
 317  *        is column ipvt(j) of the identity matrix.
 318  */
 319
 320         lm_qrfac(m, n, fjac, ipvt, wa1, wa2, wa3);
 321         /* return values are ipvt, wa1=rdiag, wa2=acnorm */
 322
 323 /***  [outer]  Form Q^T * fvec, and store first n components in qtf.  ***/
 324
 325         if (y)
 326             for (i = 0; i < m; i++)
 327                 wf[i] = fvec[i] - y[i];
 328         else
 329             for (i = 0; i < m; i++)
 330                 wf[i] = fvec[i];
 331
 332         for (j = 0; j < n; j++) {
 333             temp3 = fjac[j*m+j];
 334             if (temp3 != 0) {
 335                 sum = 0;
 336                 for (i = j; i < m; i++)
 337                     sum += fjac[j*m+i] * wf[i];
 338                 temp = -sum / temp3;
 339                 for (i = j; i < m; i++)
 340                     wf[i] += fjac[j*m+i] * temp;
 341             }
 342             fjac[j*m+j] = wa1[j];
 343             qtf[j] = wf[j];
 344         }
 345
 346 /***  [outer]  Compute norm of scaled gradient and detect degeneracy.  ***/
 347
 348         gnorm = 0;
 349         for (j = 0; j < n; j++) {
 350             if (wa2[ipvt[j]] == 0)
 351                 continue;
 352             sum = 0;
 353             for (i = 0; i <= j; i++)
 354                 sum += fjac[j*m+i] * qtf[i];
 355             gnorm = MAX(gnorm, fabs( sum / wa2[ipvt[j]] / fnorm ));
 356         }
 357
 358         if (gnorm <= C->gtol) {
 359             S->outcome = 4;
 360             goto terminate;
 361         }
 362
 363 /***  [outer]  Initialize / update diag and delta. ***/
 364
 365         if ( !outer ) {
 366             /* first iteration only */
 367             if (C->scale_diag) {
 368                 /* diag := norms of the columns of the initial Jacobian */
 369                 for (j = 0; j < n; j++)
 370                     diag[j] = wa2[j] ? wa2[j] : 1;
 371                 /* xnorm := || D x || */
 372                 for (j = 0; j < n; j++)
 373                     wa3[j] = diag[j] * x[j];
 374                 xnorm = lm_enorm(n, wa3);
 375             } else {
 376                 xnorm = lm_enorm(n, x);
 377             }
 378             if( !isfinite(xnorm) ){
 379                 if( C->verbosity )
 380                     fprintf(msgfile, "nan case 2\n");
 381                 S->outcome = 12; /* nan */
 382                 goto terminate;
 383             }
 384             /* initialize the step bound delta. */
 385             if ( xnorm )
 386                 delta = C->stepbound * xnorm;
 387             else
 388                 delta = C->stepbound;
 389             /* only now print the header for the loop table */
 390             if( C->verbosity&2 ) {
 391                 fprintf(msgfile, " #o #i     lmpar    prered  actred"
 392                         "        ratio    dirder      delta"
 393                         "      pnorm                 fnorm");
 394                 for (i = 0; i < nout; ++i)
 395                     fprintf(msgfile, "               p%i", i);
 396                 fprintf(msgfile, "\n");
 397             }
 398         } else {
 399             if (C->scale_diag) {
 400                 for (j = 0; j < n; j++)
 401                     diag[j] = MAX( diag[j], wa2[j] );
 402             }
 403         }
 404
 405 /***  The inner loop. ***/
 406         int inner = 0;
 407         do {
 408
 409 /***  [inner]  Determine the Levenberg-Marquardt parameter.  ***/
 410
 411             lm_lmpar(n, fjac, m, ipvt, diag, qtf, delta, &lmpar,
 412                      wa1, wa2, wf, wa3);
 413             /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
 414
 415             /* predict scaled reduction */
 416             pnorm = lm_enorm(n, wa3);
 417             if( !isfinite(pnorm) ){
 418                 if( C->verbosity )
 419                     fprintf(msgfile, "nan case 3\n");
 420                 S->outcome = 12; /* nan */
 421                 goto terminate;
 422             }
 423             temp2 = lmpar * SQR( pnorm / fnorm );
 424             for (j = 0; j < n; j++) {
 425                 wa3[j] = 0;
 426                 for (i = 0; i <= j; i++)
 427                     wa3[i] -= fjac[j*m+i] * wa1[ipvt[j]];
 428             }
 429             temp1 = SQR( lm_enorm(n, wa3) / fnorm );
 430             if( !isfinite(temp1) ){
 431                 if( C->verbosity )
 432                     fprintf(msgfile, "nan case 4\n");
 433                 S->outcome = 12; /* nan */
 434                 goto terminate;
 435             }
 436             prered = temp1 + 2 * temp2;
 437             dirder = -temp1 + temp2; /* scaled directional derivative */
 438
 439             /* at first call, adjust the initial step bound. */
 440             if ( !outer && !inner && pnorm < delta )
 441                 delta = pnorm;
 442
 443 /***  [inner]  Evaluate the function at x + p.  ***/
 444
 445             for (j = 0; j < n; j++)
 446                 wa2[j] = x[j] - wa1[j];
 447
 448             (*evaluate)( wa2, m, data, wf, &(S->userbreak) );
 449             ++(S->nfev);
 450             if ( S->userbreak )
 451                 goto terminate;
 452             fnorm1 = lm_fnorm(m, wf, y);
 453             // exceptionally, for this norm we do not test for infinity
 454             // because we can deal with it without terminating.
 455
 456 /***  [inner]  Evaluate the scaled reduction.  ***/
 457
 458             /* actual scaled reduction (supports even the case fnorm1=infty) */
 459             if (p1 * fnorm1 < fnorm)
 460                 actred = 1 - SQR(fnorm1 / fnorm);
 461             else
 462                 actred = -1;
 463
 464             /* ratio of actual to predicted reduction */
 465             ratio = prered ? actred/prered : 0;
 466
 467             if( C->verbosity&32 ) {
 468                 if (y) {
 469                     for( i=0; i<m; ++i )
 470                         fprintf(msgfile, "    i, f, y-f: %4i %18.8g %18.8g\n",
 471                                 i, fvec[i], y[i]-fvec[i]);
 472                 } else {
 473                     for( i=0; i<m; ++i )
 474                         fprintf(msgfile, "    i, f, y-f: %4i %18.8g\n",
 475                                 i, fvec[i]);
 476                 }
 477             }
 478             if( C->verbosity&2 ) {
 479                 printf("%3i %2i %9.2g %9.2g %9.2g %14.6g"
 480                        " %9.2g %10.3e %10.3e %21.15e",
 481                        outer, inner, lmpar, prered, actred, ratio,
 482                        dirder, delta, pnorm, fnorm1);
 483                 for (i = 0; i < nout; ++i)
 484                     fprintf(msgfile, " %16.9g", wa2[i]);
 485                 fprintf(msgfile, "\n");
 486             }
 487
 488             /* update the step bound */
 489             if (ratio <= 0.25) {
 490                 if (actred >= 0)
 491                     temp = 0.5;
 492                 else
 493                     temp = 0.5 * dirder / (dirder + 0.5 * actred);
 494                 if (p1 * fnorm1 >= fnorm || temp < p1)
 495                     temp = p1;
 496                 delta = temp * MIN(delta, pnorm / p1);
 497                 lmpar /= temp;
 498             } else if (lmpar == 0 || ratio >= 0.75) {
 499                 delta = 2 * pnorm;
 500                 lmpar *= 0.5;
 501             }
 502
 503 /***  [inner]  On success, update solution, and test for convergence.  ***/
 504
 505             inner_success = ratio >= p0001;
 506             if ( inner_success ) {
 507
 508                 /* update x, fvec, and their norms */
 509                 if (C->scale_diag) {
 510                     for (j = 0; j < n; j++) {
 511                         x[j] = wa2[j];
 512                         wa2[j] = diag[j] * x[j];
 513                     }
 514                 } else {
 515                     for (j = 0; j < n; j++)
 516                         x[j] = wa2[j];
 517                 }
 518                 for (i = 0; i < m; i++)
 519                     fvec[i] = wf[i];
 520                 xnorm = lm_enorm(n, wa2);
 521                 if( !isfinite(xnorm) ){
 522                     if( C->verbosity )
 523                         fprintf(msgfile, "nan case 6\n");
 524                     S->outcome = 12; /* nan */
 525                     goto terminate;
 526                 }
 527                 fnorm = fnorm1;
 528             }
 529
 530             /* convergence tests */
 531             S->outcome = 0;
 532             if( fnorm<=LM_DWARF )
 533                 goto terminate;  /* success: sum of squares almost zero */
 534             /* test two criteria (both may be fulfilled) */
 535             if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
 536                 S->outcome = 1;  /* success: x almost stable */
 537             if (delta <= C->xtol * xnorm)
 538                 S->outcome += 2; /* success: sum of squares almost stable */
 539             if (S->outcome != 0) {
 540                 goto terminate;
 541             }
 542
 543 /***  [inner]  Tests for termination and stringent tolerances.  ***/
 544
 545             if ( S->nfev >= maxfev ){
 546                 S->outcome = 5;
 547                 goto terminate;
 548             }
 549             if ( fabs(actred) <= LM_MACHEP &&
 550                  prered <= LM_MACHEP && ratio <= 2 ){
 551                 S->outcome = 6;
 552                 goto terminate;
 553             }
 554             if ( delta <= LM_MACHEP*xnorm ){
 555                 S->outcome = 7;
 556                 goto terminate;
 557             }
 558             if ( gnorm <= LM_MACHEP ){
 559                 S->outcome = 8;
 560                 goto terminate;
 561             }
 562
 563 /***  [inner]  End of the loop. Repeat if iteration unsuccessful.  ***/
 564
 565             ++inner;
 566         } while ( !inner_success );
 567
 568 /***  [outer]  End of the loop. ***/
 569
 570     };
 571
 572 terminate:
 573     S->fnorm = lm_fnorm(m, fvec, y);
 574     if( C->verbosity&1 )
 575         fprintf(msgfile, "lmmin terminates with outcome %i\n", S->outcome);
 576     if( C->verbosity&2 )
 577         lm_print_pars(nout, x, msgfile);
 578     if( C->verbosity&8 ) {
 579         if (y) {
 580             for( i=0; i<m; ++i )
 581                 fprintf(msgfile, "    i, f, y-f: %4i %18.8g %18.8g\n",
 582                         i, fvec[i], y[i]-fvec[i] );
 583         } else {
 584             for( i=0; i<m; ++i )
 585                 fprintf(msgfile, "    i, f, y-f: %4i %18.8g\n", i, fvec[i]);
 586         }
 587     }
 588     if( C->verbosity&2 )
 589         fprintf(msgfile, "  fnorm=%24.16g xnorm=%24.16g\n", S->fnorm, xnorm);
 590     if ( S->userbreak ) /* user-requested break */
 591         S->outcome = 11;
 592
 593 /***  Deallocate the workspace.  ***/
 594     free(ws);
 595
 596 } /*** lmmin. ***/
 597
 598
 599 /*****************************************************************************/
 600 /*  lm_lmpar (determine Levenberg-Marquardt parameter)                       */
 601 /*****************************************************************************/
 602
 603 void lm_lmpar(
 604     const int n, double *const r, const int ldr, int *const ipvt,
 605     double *const diag, double *const qtb, double delta, double *const par,
 606     double *const x, double *const sdiag, double *const aux, double *const xdi)
 607 {
 608 /*     Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
 609  *     an m-vector b, and a positive number delta, the problem is to
 610  *     determine a parameter value par such that if x solves the system
 611  *
 612  *          A*x = b  and  sqrt(par)*D*x = 0
 613  *
 614  *     in the least squares sense, and dxnorm is the euclidean
 615  *     norm of D*x, then either par=0 and (dxnorm-delta) < 0.1*delta,
 616  *     or par>0 and abs(dxnorm-delta) < 0.1*delta.
 617  *
 618  *     Using lm_qrsolv, this subroutine completes the solution of the
 619  *     problem if it is provided with the necessary information from
 620  *     the QR factorization, with column pivoting, of A. That is, if
 621  *     A*P = Q*R, where P is a permutation matrix, Q has orthogonal
 622  *     columns, and R is an upper triangular matrix with diagonal
 623  *     elements of nonincreasing magnitude, then lmpar expects the
 624  *     full upper triangle of R, the permutation matrix P, and the
 625  *     first n components of Q^T*b. On output lmpar also provides an
 626  *     upper triangular matrix S such that
 627  *
 628  *          P^T*(A^T*A + par*D*D)*P = S^T*S.
 629  *
 630  *     S is employed within lmpar and may be of separate interest.
 631  *
 632  *     Only a few iterations are generally needed for convergence
 633  *     of the algorithm. If, however, the limit of 10 iterations
 634  *     is reached, then the output par will contain the best value
 635  *     obtained so far.
 636  *
 637  *     Parameters:
 638  *
 639  *      n is a positive integer INPUT variable set to the order of r.
 640  *
 641  *      r is an n by n array. On INPUT the full upper triangle
 642  *        must contain the full upper triangle of the matrix R.
 643  *        On OUTPUT the full upper triangle is unaltered, and the
 644  *        strict lower triangle contains the strict upper triangle
 645  *        (transposed) of the upper triangular matrix S.
 646  *
 647  *      ldr is a positive integer INPUT variable not less than n
 648  *        which specifies the leading dimension of the array R.
 649  *
 650  *      ipvt is an integer INPUT array of length n which defines the
 651  *        permutation matrix P such that A*P = Q*R. Column j of P
 652  *        is column ipvt(j) of the identity matrix.
 653  *
 654  *      diag is an INPUT array of length n which must contain the
 655  *        diagonal elements of the matrix D.
 656  *
 657  *      qtb is an INPUT array of length n which must contain the first
 658  *        n elements of the vector Q^T*b.
 659  *
 660  *      delta is a positive INPUT variable which specifies an upper
 661  *        bound on the euclidean norm of D*x.
 662  *
 663  *      par is a nonnegative variable. On INPUT par contains an
 664  *        initial estimate of the Levenberg-Marquardt parameter.
 665  *        On OUTPUT par contains the final estimate.
 666  *
 667  *      x is an OUTPUT array of length n which contains the least
 668  *        squares solution of the system A*x = b, sqrt(par)*D*x = 0,
 669  *        for the output par.
 670  *
 671  *      sdiag is an array of length n needed as workspace; on OUTPUT
 672  *        it contains the diagonal elements of the upper triangular
 673  *        matrix S.
 674  *
 675  *      aux is a multi-purpose work array of length n.
 676  *
 677  *      xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
 678  *
 679  */
 680     int i, iter, j, nsing;
 681     double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
 682     double sum, temp;
 683     static double p1 = 0.1;
 684
 685 /*** lmpar: compute and store in x the gauss-newton direction. if the
 686      jacobian is rank-deficient, obtain a least squares solution. ***/
 687
 688     nsing = n;
 689     for (j = 0; j < n; j++) {
 690         aux[j] = qtb[j];
 691         if (r[j * ldr + j] == 0 && nsing == n)
 692             nsing = j;
 693         if (nsing < n)
 694             aux[j] = 0;
 695     }
 696     for (j = nsing - 1; j >= 0; j--) {
 697         aux[j] = aux[j] / r[j + ldr * j];
 698         temp = aux[j];
 699         for (i = 0; i < j; i++)
 700             aux[i] -= r[j * ldr + i] * temp;
 701     }
 702
 703     for (j = 0; j < n; j++)
 704         x[ipvt[j]] = aux[j];
 705
 706 /*** lmpar: initialize the iteration counter, evaluate the function at the
 707      origin, and test for acceptance of the gauss-newton direction. ***/
 708
 709     for (j = 0; j < n; j++)
 710         xdi[j] = diag[j] * x[j];
 711     dxnorm = lm_enorm(n, xdi);
 712     fp = dxnorm - delta;
 713     if (fp <= p1 * delta) {
 714 #ifdef LMFIT_DEBUG_MESSAGES
 715         printf("debug lmpar nsing %d n %d, terminate (fp<p1*delta)\n",
 716                nsing, n);
 717 #endif
 718         *par = 0;
 719         return;
 720     }
 721
 722 /*** lmpar: if the jacobian is not rank deficient, the newton
 723      step provides a lower bound, parl, for the zero of
 724      the function. otherwise set this bound to zero. ***/
 725
 726     parl = 0;
 727     if (nsing >= n) {
 728         for (j = 0; j < n; j++)
 729             aux[j] = diag[ipvt[j]] * xdi[ipvt[j]] / dxnorm;
 730
 731         for (j = 0; j < n; j++) {
 732             sum = 0;
 733             for (i = 0; i < j; i++)
 734                 sum += r[j * ldr + i] * aux[i];
 735             aux[j] = (aux[j] - sum) / r[j + ldr * j];
 736         }
 737         temp = lm_enorm(n, aux);
 738         parl = fp / delta / temp / temp;
 739     }
 740
 741 /*** lmpar: calculate an upper bound, paru, for the zero of the function. ***/
 742
 743     for (j = 0; j < n; j++) {
 744         sum = 0;
 745         for (i = 0; i <= j; i++)
 746             sum += r[j * ldr + i] * qtb[i];
 747         aux[j] = sum / diag[ipvt[j]];
 748     }
 749     gnorm = lm_enorm(n, aux);
 750     paru = gnorm / delta;
 751     if (paru == 0)
 752         paru = LM_DWARF / MIN(delta, p1);
 753
 754 /*** lmpar: if the input par lies outside of the interval (parl,paru),
 755      set par to the closer endpoint. ***/
 756
 757     *par = MAX(*par, parl);
 758     *par = MIN(*par, paru);
 759     if (*par == 0)
 760         *par = gnorm / dxnorm;
 761
 762 /*** lmpar: iterate. ***/
 763
 764     for (iter=0; ; iter++) {
 765
 766         /** evaluate the function at the current value of par. **/
 767
 768         if (*par == 0)
 769             *par = MAX(LM_DWARF, 0.001 * paru);
 770         temp = sqrt(*par);
 771         for (j = 0; j < n; j++)
 772             aux[j] = temp * diag[j];
 773
 774         lm_qrsolv(n, r, ldr, ipvt, aux, qtb, x, sdiag, xdi);
 775         /* return values are r, x, sdiag */
 776
 777         for (j = 0; j < n; j++)
 778             xdi[j] = diag[j] * x[j]; /* used as output */
 779         dxnorm = lm_enorm(n, xdi);
 780         fp_old = fp;
 781         fp = dxnorm - delta;
 782
 783         /** if the function is small enough, accept the current value
 784             of par. Also test for the exceptional cases where parl
 785             is zero or the number of iterations has reached 10. **/
 786
 787         if (fabs(fp) <= p1 * delta
 788             || (parl == 0 && fp <= fp_old && fp_old < 0)
 789             || iter == 10) {
 790 #ifdef LMFIT_DEBUG_MESSAGES
 791             printf("debug lmpar nsing %d iter %d "
 792                    "par %.4e [%.4e %.4e] delta %.4e fp %.4e\n",
 793                    nsing, iter, *par, parl, paru, delta, fp);
 794 #endif
 795             break; /* the only exit from the iteration. */
 796         }
 797
 798         /** compute the Newton correction. **/
 799
 800         for (j = 0; j < n; j++)
 801             aux[j] = diag[ipvt[j]] * xdi[ipvt[j]] / dxnorm;
 802
 803         for (j = 0; j < n; j++) {
 804             aux[j] = aux[j] / sdiag[j];
 805             for (i = j + 1; i < n; i++)
 806                 aux[i] -= r[j * ldr + i] * aux[j];
 807         }
 808         temp = lm_enorm(n, aux);
 809         parc = fp / delta / temp / temp;
 810
 811         /** depending on the sign of the function, update parl or paru. **/
 812
 813         if (fp > 0)
 814             parl = MAX(parl, *par);
 815         else if (fp < 0)
 816             paru = MIN(paru, *par);
 817         /* the case fp==0 is precluded by the break condition  */
 818
 819         /** compute an improved estimate for par. **/
 820
 821         *par = MAX(parl, *par + parc);
 822
 823     }
 824
 825 } /*** lm_lmpar. ***/
 826
 827 /*****************************************************************************/
 828 /*  lm_qrfac (QR factorization, from lapack)                                 */
 829 /*****************************************************************************/
 830
 831 void lm_qrfac(
 832     const int m, const int n, double *const A, int *const Pivot,
 833     double *const Rdiag, double *const Acnorm, double *const W)
 834 {
 835 /*
 836  *     This subroutine uses Householder transformations with column pivoting
 837  *     to compute a QR factorization of the m by n matrix A. That is, qrfac
 838  *     determines an orthogonal matrix Q, a permutation matrix P, and an
 839  *     upper trapezoidal matrix R with diagonal elements of nonincreasing
 840  *     magnitude, such that A*P = Q*R. The Householder transformation for
 841  *     column k, k = 1,2,...,n, is of the form
 842  *
 843  *          I - 2*w*wT/|w|^2
 844  *
 845  *     where w has zeroes in the first k-1 positions.
 846  *
 847  *     Parameters:
 848  *
 849  *      m is an INPUT parameter set to the number of rows of A.
 850  *
 851  *      n is an INPUT parameter set to the number of columns of A.
 852  *
 853  *      A is an m by n array. On INPUT, A contains the matrix for
 854  *        which the QR factorization is to be computed. On OUTPUT
 855  *        the strict upper trapezoidal part of A contains the strict
 856  *        upper trapezoidal part of R, and the lower trapezoidal
 857  *        part of A contains a factored form of Q (the non-trivial
 858  *        elements of the vectors w described above).
 859  *
 860  *      Pivot is an integer OUTPUT array of length n that describes the
 861  *        permutation matrix P:
 862  *        Column j of P is column ipvt(j) of the identity matrix.
 863  *
 864  *      Rdiag is an OUTPUT array of length n which contains the
 865  *        diagonal elements of R.
 866  *
 867  *      Acnorm is an OUTPUT array of length n which contains the norms
 868  *        of the corresponding columns of the input matrix A. If this
 869  *        information is not needed, then Acnorm can share storage with Rdiag.
 870  *
 871  *      W is a work array of length n.
 872  *
 873  */
 874     int i, j, k, kmax;
 875     double ajnorm, sum, temp;
 876
 877 #ifdef LMFIT_DEBUG_MESSAGES
 878     printf("debug qrfac\n");
 879 #endif
 880
 881     /** Compute initial column norms;
 882         initialize Pivot with identity permutation. ***/
 883
 884     for (j = 0; j < n; j++) {
 885         W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
 886         Pivot[j] = j;
 887     }
 888
 889     /** Loop over columns of A. **/
 890
 891     assert( n <= m );
 892     for (j = 0; j < n; j++) {
 893
 894         /** Bring the column of largest norm into the pivot position. **/
 895
 896         kmax = j;
 897         for (k = j+1; k < n; k++)
 898             if (Rdiag[k] > Rdiag[kmax])
 899                 kmax = k;
 900
 901         if (kmax != j) {
 902             /* Swap columns j and kmax. */
 903             k = Pivot[j];
 904             Pivot[j] = Pivot[kmax];
 905             Pivot[kmax] = k;
 906             for (i = 0; i < m; i++) {
 907                 temp = A[j*m+i];
 908                 A[j*m+i] = A[kmax*m+i];
 909                 A[kmax*m+i] = temp;
 910             }
 911             /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
 912             Rdiag[kmax] = Rdiag[j];
 913             W[kmax] = W[j];
 914         }
 915
 916         /** Compute the Householder reflection vector w_j to reduce the
 917             j-th column of A to a multiple of the j-th unit vector. **/
 918
 919         ajnorm = lm_enorm(m-j, &A[j*m+j]);
 920         if (ajnorm == 0) {
 921             Rdiag[j] = 0;
 922             continue;
 923         }
 924
 925         /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
 926            where the sign +- is chosen to avoid cancellation in w_jj. */
 927         if (A[j*m+j] < 0)
 928             ajnorm = -ajnorm;
 929         for (i = j; i < m; i++)
 930             A[j*m+i] /= ajnorm;
 931         A[j*m+j] += 1;
 932
 933         /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
 934             to the remaining columns, and update the norms. **/
 935
 936         for (k = j+1; k < n; k++) {
 937             /* Compute scalar product w_j * a_j. */
 938             sum = 0;
 939             for (i = j; i < m; i++)
 940                 sum += A[j*m+i] * A[k*m+i];
 941
 942             /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
 943             temp = sum / A[j*m+j];
 944
 945             /* Carry out transform U_w_j * a_k. */
 946             for (i = j; i < m; i++)
 947                 A[k*m+i] -= temp * A[j*m+i];
 948
 949             /* No idea what happens here. */
 950             if (Rdiag[k] != 0) {
 951                 temp = A[m*k+j] / Rdiag[k];
 952                 if ( fabs(temp)<1 ) {
 953                     Rdiag[k] *= sqrt(1-SQR(temp));
 954                     temp = Rdiag[k] / W[k];
 955                 } else
 956                     temp = 0;
 957                 if ( temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP ) {
 958                     Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
 959                     W[k] = Rdiag[k];
 960                 }
 961             }
 962         }
 963
 964         Rdiag[j] = -ajnorm;
 965     }
 966 } /*** lm_qrfac. ***/
 967
 968
 969 /*****************************************************************************/
 970 /*  lm_qrsolv (linear least-squares)                                         */
 971 /*****************************************************************************/
 972
 973 void lm_qrsolv(
 974     const int n, double *const r, const int ldr, int *const ipvt,
 975     double *const diag, double *const qtb, double *const x,
 976     double *const sdiag, double *const wa)
 977 {
 978 /*
 979  *     Given an m by n matrix A, an n by n diagonal matrix D, and an
 980  *     m-vector b, the problem is to determine an x which solves the
 981  *     system
 982  *
 983  *          A*x = b  and  D*x = 0
 984  *
 985  *     in the least squares sense.
 986  *
 987  *     This subroutine completes the solution of the problem if it is
 988  *     provided with the necessary information from the QR factorization,
 989  *     with column pivoting, of A. That is, if A*P = Q*R, where P is a
 990  *     permutation matrix, Q has orthogonal columns, and R is an upper
 991  *     triangular matrix with diagonal elements of nonincreasing magnitude,
 992  *     then qrsolv expects the full upper triangle of R, the permutation
 993  *     matrix P, and the first n components of Q^T*b. The system
 994  *     A*x = b, D*x = 0, is then equivalent to
 995  *
 996  *          R*z = Q^T*b,  P^T*D*P*z = 0,
 997  *
 998  *     where x = P*z. If this system does not have full rank, then a least
 999  *     squares solution is obtained. On output qrsolv also provides an upper
1000  *     triangular matrix S such that
1001  *
1002  *          P^T*(A^T*A + D*D)*P = S^T*S.
1003  *
1004  *     S is computed within qrsolv and may be of separate interest.
1005  *
1006  *     Parameters:
1007  *
1008  *      n is a positive integer INPUT variable set to the order of R.
1009  *
1010  *      r is an n by n array. On INPUT the full upper triangle must
1011  *        contain the full upper triangle of the matrix R. On OUTPUT
1012  *        the full upper triangle is unaltered, and the strict lower
1013  *        triangle contains the strict upper triangle (transposed) of
1014  *        the upper triangular matrix S.
1015  *
1016  *      ldr is a positive integer INPUT variable not less than n
1017  *        which specifies the leading dimension of the array R.
1018  *
1019  *      ipvt is an integer INPUT array of length n which defines the
1020  *        permutation matrix P such that A*P = Q*R. Column j of P
1021  *        is column ipvt(j) of the identity matrix.
1022  *
1023  *      diag is an INPUT array of length n which must contain the
1024  *        diagonal elements of the matrix D.
1025  *
1026  *      qtb is an INPUT array of length n which must contain the first
1027  *        n elements of the vector Q^T*b.
1028  *
1029  *      x is an OUTPUT array of length n which contains the least
1030  *        squares solution of the system A*x = b, D*x = 0.
1031  *
1032  *      sdiag is an OUTPUT array of length n which contains the
1033  *        diagonal elements of the upper triangular matrix S.
1034  *
1035  *      wa is a work array of length n.
1036  *
1037  */
1038     int i, kk, j, k, nsing;
1039     double qtbpj, sum, temp;
1040     double _sin, _cos, _tan, _cot; /* local variables, not functions */
1041
1042 /*** qrsolv: copy R and Q^T*b to preserve input and initialize S.
1043      In particular, save the diagonal elements of R in x. ***/
1044
1045     for (j = 0; j < n; j++) {
1046         for (i = j; i < n; i++)
1047             r[j * ldr + i] = r[i * ldr + j];
1048         x[j] = r[j * ldr + j];
1049         wa[j] = qtb[j];
1050     }
1051
1052 /*** qrsolv: eliminate the diagonal matrix D using a Givens rotation. ***/
1053
1054     for (j = 0; j < n; j++) {
1055
1056 /*** qrsolv: prepare the row of D to be eliminated, locating the
1057      diagonal element using P from the QR factorization. ***/
1058
1059         if (diag[ipvt[j]] == 0)
1060             goto L90;
1061         for (k = j; k < n; k++)
1062             sdiag[k] = 0;
1063         sdiag[j] = diag[ipvt[j]];
1064
1065 /*** qrsolv: the transformations to eliminate the row of D modify only
1066      a single element of Q^T*b beyond the first n, which is initially 0. ***/
1067
1068         qtbpj = 0;
1069         for (k = j; k < n; k++) {
1070
1071             /** determine a Givens rotation which eliminates the
1072                 appropriate element in the current row of D. **/
1073
1074             if (sdiag[k] == 0)
1075                 continue;
1076             kk = k + ldr * k;
1077             if (fabs(r[kk]) < fabs(sdiag[k])) {
1078                 _cot = r[kk] / sdiag[k];
1079                 _sin = 1 / sqrt(1 + SQR(_cot));
1080                 _cos = _sin * _cot;
1081             } else {
1082                 _tan = sdiag[k] / r[kk];
1083                 _cos = 1 / sqrt(1 + SQR(_tan));
1084                 _sin = _cos * _tan;
1085             }
1086
1087             /** compute the modified diagonal element of R and
1088                 the modified element of (Q^T*b,0). **/
1089
1090             r[kk] = _cos * r[kk] + _sin * sdiag[k];
1091             temp = _cos * wa[k] + _sin * qtbpj;
1092             qtbpj = -_sin * wa[k] + _cos * qtbpj;
1093             wa[k] = temp;
1094
1095             /** accumulate the tranformation in the row of S. **/
1096
1097             for (i = k + 1; i < n; i++) {
1098                 temp = _cos * r[k * ldr + i] + _sin * sdiag[i];
1099                 sdiag[i] = -_sin * r[k * ldr + i] + _cos * sdiag[i];
1100                 r[k * ldr + i] = temp;
1101             }
1102         }
1103
1104       L90:
1105         /** store the diagonal element of S and restore
1106             the corresponding diagonal element of R. **/
1107
1108         sdiag[j] = r[j * ldr + j];
1109         r[j * ldr + j] = x[j];
1110     }
1111
1112 /*** qrsolv: solve the triangular system for z. If the system is
1113      singular, then obtain a least squares solution. ***/
1114
1115     nsing = n;
1116     for (j = 0; j < n; j++) {
1117         if (sdiag[j] == 0 && nsing == n)
1118             nsing = j;
1119         if (nsing < n)
1120             wa[j] = 0;
1121     }
1122
1123     for (j = nsing - 1; j >= 0; j--) {
1124         sum = 0;
1125         for (i = j + 1; i < nsing; i++)
1126             sum += r[j * ldr + i] * wa[i];
1127         wa[j] = (wa[j] - sum) / sdiag[j];
1128     }
1129
1130 /*** qrsolv: permute the components of z back to components of x. ***/
1131
1132     for (j = 0; j < n; j++)
1133         x[ipvt[j]] = wa[j];
1134
1135 } /*** lm_qrsolv. ***/
1136
1137
1138 /*****************************************************************************/
1139 /*  lm_enorm (Euclidean norm)                                                */
1140 /*****************************************************************************/
1141
1142 double lm_enorm(const int n, const double *const x)
1143 {
1144 /*     This function calculates the Euclidean norm of an n-vector x.
1145  *
1146  *     The Euclidean norm is computed by accumulating the sum of
1147  *     squares in three different sums. The sums of squares for the
1148  *     small and large components are scaled so that no overflows
1149  *     occur. Non-destructive underflows are permitted. Underflows
1150  *     and overflows do not occur in the computation of the unscaled
1151  *     sum of squares for the intermediate components.
1152  *     The definitions of small, intermediate and large components
1153  *     depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1154  *     restrictions on these constants are that LM_SQRT_DWARF**2 not
1155  *     underflow and LM_SQRT_GIANT**2 not overflow.
1156  *
1157  *     Parameters:
1158  *
1159  *      n is a positive integer INPUT variable.
1160  *
1161  *      x is an INPUT array of length n.
1162  */
1163     int i;
1164     double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
1165
1166     s1 = 0;
1167     s2 = 0;
1168     s3 = 0;
1169     x1max = 0;
1170     x3max = 0;
1171     agiant = LM_SQRT_GIANT / n;
1172
1173     /** sum squares. **/
1174
1175     for (i = 0; i < n; i++) {
1176         xabs = fabs(x[i]);
1177         if (xabs > LM_SQRT_DWARF) {
1178             if ( xabs < agiant ) {
1179                 s2 += xabs * xabs;
1180             } else if ( xabs > x1max ) {
1181                 temp = x1max / xabs;
1182                 s1 = 1 + s1 * SQR(temp);
1183                 x1max = xabs;
1184             } else {
1185                 temp = xabs / x1max;
1186                 s1 += SQR(temp);
1187             }
1188         } else if ( xabs > x3max ) {
1189             temp = x3max / xabs;
1190             s3 = 1 + s3 * SQR(temp);
1191             x3max = xabs;
1192         } else if (xabs != 0) {
1193             temp = xabs / x3max;
1194             s3 += SQR(temp);
1195         }
1196     }
1197
1198     /** calculation of norm. **/
1199
1200     if (s1 != 0)
1201         return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1202     else if (s2 != 0)
1203         if (s2 >= x3max)
1204             return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1205         else
1206             return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1207     else
1208         return x3max * sqrt(s3);
1209
1210 } /*** lm_enorm. ***/
1211
1212
1213 /*****************************************************************************/
1214 /*  lm_fnorm (Euclidean norm of difference)                                                */
1215 /*****************************************************************************/
1216
1217 double lm_fnorm(const int n, const double *const x, const double *const y)
1218 {
1219 /*     This function calculates the Euclidean norm of an n-vector x-y.
1220  *
1221  *     The Euclidean norm is computed by accumulating the sum of
1222  *     squares in three different sums. The sums of squares for the
1223  *     small and large components are scaled so that no overflows
1224  *     occur. Non-destructive underflows are permitted. Underflows
1225  *     and overflows do not occur in the computation of the unscaled
1226  *     sum of squares for the intermediate components.
1227  *     The definitions of small, intermediate and large components
1228  *     depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1229  *     restrictions on these constants are that LM_SQRT_DWARF**2 not
1230  *     underflow and LM_SQRT_GIANT**2 not overflow.
1231  *
1232  *     Parameters:
1233  *
1234  *      n is a positive integer INPUT variable.
1235  *
1236  *      x, y are INPUT arrays of length n.
1237  */
1238     if (!y)
1239         return lm_enorm(n, x);
1240     int i;
1241     double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
1242
1243     s1 = 0;
1244     s2 = 0;
1245     s3 = 0;
1246     x1max = 0;
1247     x3max = 0;
1248     agiant = LM_SQRT_GIANT / n;
1249
1250     /** sum squares. **/
1251
1252     for (i = 0; i < n; i++) {
1253         xabs = fabs(x[i]-y[i]);
1254         if (xabs > LM_SQRT_DWARF) {
1255             if ( xabs < agiant ) {
1256                 s2 += xabs * xabs;
1257             } else if ( xabs > x1max ) {
1258                 temp = x1max / xabs;
1259                 s1 = 1 + s1 * SQR(temp);
1260                 x1max = xabs;
1261             } else {
1262                 temp = xabs / x1max;
1263                 s1 += SQR(temp);
1264             }
1265         } else if ( xabs > x3max ) {
1266             temp = x3max / xabs;
1267             s3 = 1 + s3 * SQR(temp);
1268             x3max = xabs;
1269         } else if (xabs != 0) {
1270             temp = xabs / x3max;
1271             s3 += SQR(temp);
1272         }
1273     }
1274
1275     /** calculation of norm. **/
1276
1277     if (s1 != 0)
1278         return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1279     else if (s2 != 0)
1280         if (s2 >= x3max)
1281             return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1282         else
1283             return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1284     else
1285         return x3max * sqrt(s3);
1286
1287 } /*** lm_fnorm. ***/