2 * Library: lmfit (Levenberg-Marquardt least squares fitting)
6 * Contents: Levenberg-Marquardt minimization.
8 * Copyright: MINPACK authors, The University of Chikago (1980-1999)
9 * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
11 * License: see ../COPYING (FreeBSD)
13 * Homepage: apps.jcns.fz-juelich.de/lmfit
23 #define MIN(a,b) (((a)<=(b)) ? (a) : (b))
24 #define MAX(a,b) (((a)>=(b)) ? (a) : (b))
25 #define SQR(x) (x)*(x)
27 /* Declare functions that do the heavy numerics.
28 Implementions are in this source file, below lmmin.
29 Dependences: lmmin calls qrfac and lmpar; lmpar calls qrsolv. */
31 const int n
, double *const r
, const int ldr
, int *const ipvt
,
32 double *const diag
, double *const qtb
, double delta
, double *const par
,
34 double *const sdiag
, double *const aux
, double *const xdi
);
36 const int m
, const int n
, double *const a
, int *const ipvt
,
37 double *const rdiag
, double *const acnorm
, double *const wa
);
38 static void lm_qrsolv(
39 const int n
, double *const r
, const int ldr
, int *const ipvt
,
40 double *const diag
, double *const qtb
, double *const x
,
41 double *const sdiag
, double *const wa
);
44 /*****************************************************************************/
45 /* Numeric constants */
46 /*****************************************************************************/
48 /* machine-dependent constants from float.h */
49 #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
50 #define LM_DWARF DBL_MIN /* smallest nonzero number */
51 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
52 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
53 #define LM_USERTOL 30*LM_MACHEP /* users are recommended to require this */
55 /* If the above values do not work, the following seem good for an x86:
61 The following values should work on any machine:
64 LM_SQRT_DWARF 3.834e-20
65 LM_SQRT_GIANT 1.304e19
69 const lm_control_struct lm_control_double
= {
70 LM_USERTOL
, LM_USERTOL
, LM_USERTOL
, LM_USERTOL
, 100., 100, 1,
72 const lm_control_struct lm_control_float
= {
73 1.e
-7, 1.e
-7, 1.e
-7, 1.e
-7, 100., 100, 1,
77 /*****************************************************************************/
78 /* Message texts (indexed by status.info) */
79 /*****************************************************************************/
81 const char *lm_infmsg
[] = {
82 "found zero (sum of squares below underflow limit)",
83 "converged (the relative error in the sum of squares is at most tol)",
84 "converged (the relative error of the parameter vector is at most tol)",
85 "converged (both errors are at most tol)",
86 "trapped (by degeneracy; increasing epsilon might help)",
87 "exhausted (number of function calls exceeding preset patience)",
88 "failed (ftol<tol: cannot reduce sum of squares any further)",
89 "failed (xtol<tol: cannot improve approximate solution any further)",
90 "failed (gtol<tol: cannot improve approximate solution any further)",
91 "crashed (not enough memory)",
92 "exploded (fatal coding error: improper input parameters)",
93 "stopped (break requested within function evaluation)",
94 "found nan (function value is not-a-number or infinite)",
95 "won't fit (no free parameter)"
98 const char *lm_shortmsg
[] = {
100 "converged (f)", // 1
101 "converged (p)", // 2
102 "converged (2)", // 3
109 "invalid input", // 10
116 /*****************************************************************************/
117 /* Monitoring auxiliaries. */
118 /*****************************************************************************/
120 static void lm_print_pars(const int nout
, const double *par
, FILE* fout
)
122 fprintf(fout
, " pars:");
123 for (int i
= 0; i
< nout
; ++i
)
124 fprintf(fout
, " %23.16g", par
[i
]);
129 /*****************************************************************************/
130 /* lmmin (main minimization routine) */
131 /*****************************************************************************/
134 const int n
, double *const x
, const int m
, const double* y
,
135 const void *const data
,
136 void (*const evaluate
)(
137 const double *const par
, const int m_dat
, const void *const data
,
138 double *const fvec
, int *const userbreak
),
139 const lm_control_struct
*const C
, lm_status_struct
*const S
)
142 double actred
, dirder
, fnorm
, fnorm1
, gnorm
, pnorm
,
143 prered
, ratio
, step
, sum
, temp
, temp1
, temp2
, temp3
;
144 static double p1
= 0.1, p0001
= 1.0e-4;
146 int maxfev
= C
->patience
* (n
+1);
148 int inner_success
; /* flag for loop control */
149 double lmpar
= 0; /* Levenberg-Marquardt parameter */
152 double eps
= sqrt(MAX(C
->epsilon
, LM_MACHEP
)); /* for forward differences */
154 int nout
= C
->n_maxpri
==-1 ? n
: MIN(C
->n_maxpri
, n
);
156 /* The workaround msgfile=NULL is needed for default initialization */
157 FILE* msgfile
= C
->msgfile
? C
->msgfile
: stdout
;
159 /* Default status info; must be set ahead of first return statements */
160 S
->outcome
= 0; /* status code */
162 S
->nfev
= 0; /* function evaluation counter */
164 /*** Check input parameters for errors. ***/
167 fprintf(stderr
, "lmmin: invalid number of parameters %i\n", n
);
168 S
->outcome
= 10; /* invalid parameter */
172 fprintf(stderr
, "lmmin: number of data points (%i) "
173 "smaller than number of parameters (%i)\n", m
, n
);
177 if (C
->ftol
< 0 || C
->xtol
< 0 || C
->gtol
< 0) {
179 "lmmin: negative tolerance (at least one of %g %g %g)\n",
180 C
->ftol
, C
->xtol
, C
->gtol
);
185 fprintf(stderr
, "lmmin: nonpositive function evaluations limit %i\n",
190 if (C
->stepbound
<= 0) {
191 fprintf(stderr
, "lmmin: nonpositive stepbound %g\n", C
->stepbound
);
195 if (C
->scale_diag
!= 0 && C
->scale_diag
!= 1) {
196 fprintf(stderr
, "lmmin: logical variable scale_diag=%i, "
197 "should be 0 or 1\n", C
->scale_diag
);
202 /*** Allocate work space. ***/
204 /* Allocate total workspace with just one system call */
206 if ( ( ws
= static_cast<char *>(malloc(
207 (2*m
+5*n
+m
*n
)*sizeof(double) + n
*sizeof(int)) ) ) == NULL
) {
212 /* Assign workspace segments. */
214 double *fvec
= (double*) pws
; pws
+= m
* sizeof(double)/sizeof(char);
215 double *diag
= (double*) pws
; pws
+= n
* sizeof(double)/sizeof(char);
216 double *qtf
= (double*) pws
; pws
+= n
* sizeof(double)/sizeof(char);
217 double *fjac
= (double*) pws
; pws
+= n
*m
*sizeof(double)/sizeof(char);
218 double *wa1
= (double*) pws
; pws
+= n
* sizeof(double)/sizeof(char);
219 double *wa2
= (double*) pws
; pws
+= n
* sizeof(double)/sizeof(char);
220 double *wa3
= (double*) pws
; pws
+= n
* sizeof(double)/sizeof(char);
221 double *wf
= (double*) pws
; pws
+= m
* sizeof(double)/sizeof(char);
222 int *ipvt
= (int*) pws
; /*pws += n * sizeof(int) /sizeof(char);*/
224 /* Initialize diag */ // TODO: check whether this is still needed
225 if (!C
->scale_diag
) {
226 for (j
= 0; j
< n
; j
++)
230 /*** Evaluate function at starting point and calculate norm. ***/
233 fprintf(msgfile
, "lmmin start (ftol=%g gtol=%g xtol=%g)\n",
234 C
->ftol
, C
->gtol
, C
->xtol
);
236 lm_print_pars(nout
, x
, msgfile
);
237 (*evaluate
)(x
, m
, data
, fvec
, &(S
->userbreak
));
242 fprintf(msgfile
, " i, f, y-f: %4i %18.8g %18.8g\n",
243 i
, fvec
[i
], y
[i
]-fvec
[i
]);
246 fprintf(msgfile
, " i, f: %4i %18.8g\n", i
, fvec
[i
]);
253 S
->outcome
= 13; /* won't fit */
256 fnorm
= lm_fnorm(m
, fvec
, y
);
258 fprintf(msgfile
, " fnorm = %24.16g\n", fnorm
);
259 if( !isfinite(fnorm
) ){
261 fprintf(msgfile
, "nan case 1\n");
262 S
->outcome
= 12; /* nan */
264 } else if( fnorm
<= LM_DWARF
){
265 S
->outcome
= 0; /* sum of squares almost zero, nothing to do */
269 /*** The outer loop: compute gradient, then descend. ***/
271 for( int outer
=0; ; ++outer
) {
273 /*** [outer] Calculate the Jacobian. ***/
275 for (j
= 0; j
< n
; j
++) {
277 step
= MAX(eps
*eps
, eps
* fabs(temp
));
278 x
[j
] += step
; /* replace temporarily */
279 (*evaluate
)(x
, m
, data
, wf
, &(S
->userbreak
));
283 for (i
= 0; i
< m
; i
++)
284 fjac
[j
*m
+i
] = (wf
[i
] - fvec
[i
]) / step
;
285 x
[j
] = temp
; /* restore */
287 if ( C
->verbosity
&16 ) {
288 /* print the entire matrix */
289 printf("Jacobian\n");
290 for (i
= 0; i
< m
; i
++) {
292 for (j
= 0; j
< n
; j
++)
293 printf("%.5e ", fjac
[j
*m
+i
]);
298 /*** [outer] Compute the QR factorization of the Jacobian. ***/
300 /* fjac is an m by n array. The upper n by n submatrix of fjac
301 * is made to contain an upper triangular matrix R with diagonal
302 * elements of nonincreasing magnitude such that
304 * P^T*(J^T*J)*P = R^T*R
306 * (NOTE: ^T stands for matrix transposition),
308 * where P is a permutation matrix and J is the final calculated
309 * Jacobian. Column j of P is column ipvt(j) of the identity matrix.
310 * The lower trapezoidal part of fjac contains information generated
311 * during the computation of R.
313 * ipvt is an integer array of length n. It defines a permutation
314 * matrix P such that jac*P = Q*R, where jac is the final calculated
315 * Jacobian, Q is orthogonal (not stored), and R is upper triangular
316 * with diagonal elements of nonincreasing magnitude. Column j of P
317 * is column ipvt(j) of the identity matrix.
320 lm_qrfac(m
, n
, fjac
, ipvt
, wa1
, wa2
, wa3
);
321 /* return values are ipvt, wa1=rdiag, wa2=acnorm */
323 /*** [outer] Form Q^T * fvec, and store first n components in qtf. ***/
326 for (i
= 0; i
< m
; i
++)
327 wf
[i
] = fvec
[i
] - y
[i
];
329 for (i
= 0; i
< m
; i
++)
332 for (j
= 0; j
< n
; j
++) {
336 for (i
= j
; i
< m
; i
++)
337 sum
+= fjac
[j
*m
+i
] * wf
[i
];
339 for (i
= j
; i
< m
; i
++)
340 wf
[i
] += fjac
[j
*m
+i
] * temp
;
342 fjac
[j
*m
+j
] = wa1
[j
];
346 /*** [outer] Compute norm of scaled gradient and detect degeneracy. ***/
349 for (j
= 0; j
< n
; j
++) {
350 if (wa2
[ipvt
[j
]] == 0)
353 for (i
= 0; i
<= j
; i
++)
354 sum
+= fjac
[j
*m
+i
] * qtf
[i
];
355 gnorm
= MAX(gnorm
, fabs( sum
/ wa2
[ipvt
[j
]] / fnorm
));
358 if (gnorm
<= C
->gtol
) {
363 /*** [outer] Initialize / update diag and delta. ***/
366 /* first iteration only */
368 /* diag := norms of the columns of the initial Jacobian */
369 for (j
= 0; j
< n
; j
++)
370 diag
[j
] = wa2
[j
] ? wa2
[j
] : 1;
371 /* xnorm := || D x || */
372 for (j
= 0; j
< n
; j
++)
373 wa3
[j
] = diag
[j
] * x
[j
];
374 xnorm
= lm_enorm(n
, wa3
);
376 xnorm
= lm_enorm(n
, x
);
378 if( !isfinite(xnorm
) ){
380 fprintf(msgfile
, "nan case 2\n");
381 S
->outcome
= 12; /* nan */
384 /* initialize the step bound delta. */
386 delta
= C
->stepbound
* xnorm
;
388 delta
= C
->stepbound
;
389 /* only now print the header for the loop table */
390 if( C
->verbosity
&2 ) {
391 fprintf(msgfile
, " #o #i lmpar prered actred"
392 " ratio dirder delta"
394 for (i
= 0; i
< nout
; ++i
)
395 fprintf(msgfile
, " p%i", i
);
396 fprintf(msgfile
, "\n");
400 for (j
= 0; j
< n
; j
++)
401 diag
[j
] = MAX( diag
[j
], wa2
[j
] );
405 /*** The inner loop. ***/
409 /*** [inner] Determine the Levenberg-Marquardt parameter. ***/
411 lm_lmpar(n
, fjac
, m
, ipvt
, diag
, qtf
, delta
, &lmpar
,
413 /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
415 /* predict scaled reduction */
416 pnorm
= lm_enorm(n
, wa3
);
417 if( !isfinite(pnorm
) ){
419 fprintf(msgfile
, "nan case 3\n");
420 S
->outcome
= 12; /* nan */
423 temp2
= lmpar
* SQR( pnorm
/ fnorm
);
424 for (j
= 0; j
< n
; j
++) {
426 for (i
= 0; i
<= j
; i
++)
427 wa3
[i
] -= fjac
[j
*m
+i
] * wa1
[ipvt
[j
]];
429 temp1
= SQR( lm_enorm(n
, wa3
) / fnorm
);
430 if( !isfinite(temp1
) ){
432 fprintf(msgfile
, "nan case 4\n");
433 S
->outcome
= 12; /* nan */
436 prered
= temp1
+ 2 * temp2
;
437 dirder
= -temp1
+ temp2
; /* scaled directional derivative */
439 /* at first call, adjust the initial step bound. */
440 if ( !outer
&& !inner
&& pnorm
< delta
)
443 /*** [inner] Evaluate the function at x + p. ***/
445 for (j
= 0; j
< n
; j
++)
446 wa2
[j
] = x
[j
] - wa1
[j
];
448 (*evaluate
)( wa2
, m
, data
, wf
, &(S
->userbreak
) );
452 fnorm1
= lm_fnorm(m
, wf
, y
);
453 // exceptionally, for this norm we do not test for infinity
454 // because we can deal with it without terminating.
456 /*** [inner] Evaluate the scaled reduction. ***/
458 /* actual scaled reduction (supports even the case fnorm1=infty) */
459 if (p1
* fnorm1
< fnorm
)
460 actred
= 1 - SQR(fnorm1
/ fnorm
);
464 /* ratio of actual to predicted reduction */
465 ratio
= prered
? actred
/prered
: 0;
467 if( C
->verbosity
&32 ) {
470 fprintf(msgfile
, " i, f, y-f: %4i %18.8g %18.8g\n",
471 i
, fvec
[i
], y
[i
]-fvec
[i
]);
474 fprintf(msgfile
, " i, f, y-f: %4i %18.8g\n",
478 if( C
->verbosity
&2 ) {
479 printf("%3i %2i %9.2g %9.2g %9.2g %14.6g"
480 " %9.2g %10.3e %10.3e %21.15e",
481 outer
, inner
, lmpar
, prered
, actred
, ratio
,
482 dirder
, delta
, pnorm
, fnorm1
);
483 for (i
= 0; i
< nout
; ++i
)
484 fprintf(msgfile
, " %16.9g", wa2
[i
]);
485 fprintf(msgfile
, "\n");
488 /* update the step bound */
493 temp
= 0.5 * dirder
/ (dirder
+ 0.5 * actred
);
494 if (p1
* fnorm1
>= fnorm
|| temp
< p1
)
496 delta
= temp
* MIN(delta
, pnorm
/ p1
);
498 } else if (lmpar
== 0 || ratio
>= 0.75) {
503 /*** [inner] On success, update solution, and test for convergence. ***/
505 inner_success
= ratio
>= p0001
;
506 if ( inner_success
) {
508 /* update x, fvec, and their norms */
510 for (j
= 0; j
< n
; j
++) {
512 wa2
[j
] = diag
[j
] * x
[j
];
515 for (j
= 0; j
< n
; j
++)
518 for (i
= 0; i
< m
; i
++)
520 xnorm
= lm_enorm(n
, wa2
);
521 if( !isfinite(xnorm
) ){
523 fprintf(msgfile
, "nan case 6\n");
524 S
->outcome
= 12; /* nan */
530 /* convergence tests */
532 if( fnorm
<=LM_DWARF
)
533 goto terminate
; /* success: sum of squares almost zero */
534 /* test two criteria (both may be fulfilled) */
535 if (fabs(actred
) <= C
->ftol
&& prered
<= C
->ftol
&& ratio
<= 2)
536 S
->outcome
= 1; /* success: x almost stable */
537 if (delta
<= C
->xtol
* xnorm
)
538 S
->outcome
+= 2; /* success: sum of squares almost stable */
539 if (S
->outcome
!= 0) {
543 /*** [inner] Tests for termination and stringent tolerances. ***/
545 if ( S
->nfev
>= maxfev
){
549 if ( fabs(actred
) <= LM_MACHEP
&&
550 prered
<= LM_MACHEP
&& ratio
<= 2 ){
554 if ( delta
<= LM_MACHEP
*xnorm
){
558 if ( gnorm
<= LM_MACHEP
){
563 /*** [inner] End of the loop. Repeat if iteration unsuccessful. ***/
566 } while ( !inner_success
);
568 /*** [outer] End of the loop. ***/
573 S
->fnorm
= lm_fnorm(m
, fvec
, y
);
575 fprintf(msgfile
, "lmmin terminates with outcome %i\n", S
->outcome
);
577 lm_print_pars(nout
, x
, msgfile
);
578 if( C
->verbosity
&8 ) {
581 fprintf(msgfile
, " i, f, y-f: %4i %18.8g %18.8g\n",
582 i
, fvec
[i
], y
[i
]-fvec
[i
] );
585 fprintf(msgfile
, " i, f, y-f: %4i %18.8g\n", i
, fvec
[i
]);
589 fprintf(msgfile
, " fnorm=%24.16g xnorm=%24.16g\n", S
->fnorm
, xnorm
);
590 if ( S
->userbreak
) /* user-requested break */
593 /*** Deallocate the workspace. ***/
599 /*****************************************************************************/
600 /* lm_lmpar (determine Levenberg-Marquardt parameter) */
601 /*****************************************************************************/
604 const int n
, double *const r
, const int ldr
, int *const ipvt
,
605 double *const diag
, double *const qtb
, double delta
, double *const par
,
606 double *const x
, double *const sdiag
, double *const aux
, double *const xdi
)
608 /* Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
609 * an m-vector b, and a positive number delta, the problem is to
610 * determine a parameter value par such that if x solves the system
612 * A*x = b and sqrt(par)*D*x = 0
614 * in the least squares sense, and dxnorm is the euclidean
615 * norm of D*x, then either par=0 and (dxnorm-delta) < 0.1*delta,
616 * or par>0 and abs(dxnorm-delta) < 0.1*delta.
618 * Using lm_qrsolv, this subroutine completes the solution of the
619 * problem if it is provided with the necessary information from
620 * the QR factorization, with column pivoting, of A. That is, if
621 * A*P = Q*R, where P is a permutation matrix, Q has orthogonal
622 * columns, and R is an upper triangular matrix with diagonal
623 * elements of nonincreasing magnitude, then lmpar expects the
624 * full upper triangle of R, the permutation matrix P, and the
625 * first n components of Q^T*b. On output lmpar also provides an
626 * upper triangular matrix S such that
628 * P^T*(A^T*A + par*D*D)*P = S^T*S.
630 * S is employed within lmpar and may be of separate interest.
632 * Only a few iterations are generally needed for convergence
633 * of the algorithm. If, however, the limit of 10 iterations
634 * is reached, then the output par will contain the best value
639 * n is a positive integer INPUT variable set to the order of r.
641 * r is an n by n array. On INPUT the full upper triangle
642 * must contain the full upper triangle of the matrix R.
643 * On OUTPUT the full upper triangle is unaltered, and the
644 * strict lower triangle contains the strict upper triangle
645 * (transposed) of the upper triangular matrix S.
647 * ldr is a positive integer INPUT variable not less than n
648 * which specifies the leading dimension of the array R.
650 * ipvt is an integer INPUT array of length n which defines the
651 * permutation matrix P such that A*P = Q*R. Column j of P
652 * is column ipvt(j) of the identity matrix.
654 * diag is an INPUT array of length n which must contain the
655 * diagonal elements of the matrix D.
657 * qtb is an INPUT array of length n which must contain the first
658 * n elements of the vector Q^T*b.
660 * delta is a positive INPUT variable which specifies an upper
661 * bound on the euclidean norm of D*x.
663 * par is a nonnegative variable. On INPUT par contains an
664 * initial estimate of the Levenberg-Marquardt parameter.
665 * On OUTPUT par contains the final estimate.
667 * x is an OUTPUT array of length n which contains the least
668 * squares solution of the system A*x = b, sqrt(par)*D*x = 0,
669 * for the output par.
671 * sdiag is an array of length n needed as workspace; on OUTPUT
672 * it contains the diagonal elements of the upper triangular
675 * aux is a multi-purpose work array of length n.
677 * xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
680 int i
, iter
, j
, nsing
;
681 double dxnorm
, fp
, fp_old
, gnorm
, parc
, parl
, paru
;
683 static double p1
= 0.1;
685 /*** lmpar: compute and store in x the gauss-newton direction. if the
686 jacobian is rank-deficient, obtain a least squares solution. ***/
689 for (j
= 0; j
< n
; j
++) {
691 if (r
[j
* ldr
+ j
] == 0 && nsing
== n
)
696 for (j
= nsing
- 1; j
>= 0; j
--) {
697 aux
[j
] = aux
[j
] / r
[j
+ ldr
* j
];
699 for (i
= 0; i
< j
; i
++)
700 aux
[i
] -= r
[j
* ldr
+ i
] * temp
;
703 for (j
= 0; j
< n
; j
++)
706 /*** lmpar: initialize the iteration counter, evaluate the function at the
707 origin, and test for acceptance of the gauss-newton direction. ***/
709 for (j
= 0; j
< n
; j
++)
710 xdi
[j
] = diag
[j
] * x
[j
];
711 dxnorm
= lm_enorm(n
, xdi
);
713 if (fp
<= p1
* delta
) {
714 #ifdef LMFIT_DEBUG_MESSAGES
715 printf("debug lmpar nsing %d n %d, terminate (fp<p1*delta)\n",
722 /*** lmpar: if the jacobian is not rank deficient, the newton
723 step provides a lower bound, parl, for the zero of
724 the function. otherwise set this bound to zero. ***/
728 for (j
= 0; j
< n
; j
++)
729 aux
[j
] = diag
[ipvt
[j
]] * xdi
[ipvt
[j
]] / dxnorm
;
731 for (j
= 0; j
< n
; j
++) {
733 for (i
= 0; i
< j
; i
++)
734 sum
+= r
[j
* ldr
+ i
] * aux
[i
];
735 aux
[j
] = (aux
[j
] - sum
) / r
[j
+ ldr
* j
];
737 temp
= lm_enorm(n
, aux
);
738 parl
= fp
/ delta
/ temp
/ temp
;
741 /*** lmpar: calculate an upper bound, paru, for the zero of the function. ***/
743 for (j
= 0; j
< n
; j
++) {
745 for (i
= 0; i
<= j
; i
++)
746 sum
+= r
[j
* ldr
+ i
] * qtb
[i
];
747 aux
[j
] = sum
/ diag
[ipvt
[j
]];
749 gnorm
= lm_enorm(n
, aux
);
750 paru
= gnorm
/ delta
;
752 paru
= LM_DWARF
/ MIN(delta
, p1
);
754 /*** lmpar: if the input par lies outside of the interval (parl,paru),
755 set par to the closer endpoint. ***/
757 *par
= MAX(*par
, parl
);
758 *par
= MIN(*par
, paru
);
760 *par
= gnorm
/ dxnorm
;
762 /*** lmpar: iterate. ***/
764 for (iter
=0; ; iter
++) {
766 /** evaluate the function at the current value of par. **/
769 *par
= MAX(LM_DWARF
, 0.001 * paru
);
771 for (j
= 0; j
< n
; j
++)
772 aux
[j
] = temp
* diag
[j
];
774 lm_qrsolv(n
, r
, ldr
, ipvt
, aux
, qtb
, x
, sdiag
, xdi
);
775 /* return values are r, x, sdiag */
777 for (j
= 0; j
< n
; j
++)
778 xdi
[j
] = diag
[j
] * x
[j
]; /* used as output */
779 dxnorm
= lm_enorm(n
, xdi
);
783 /** if the function is small enough, accept the current value
784 of par. Also test for the exceptional cases where parl
785 is zero or the number of iterations has reached 10. **/
787 if (fabs(fp
) <= p1
* delta
788 || (parl
== 0 && fp
<= fp_old
&& fp_old
< 0)
790 #ifdef LMFIT_DEBUG_MESSAGES
791 printf("debug lmpar nsing %d iter %d "
792 "par %.4e [%.4e %.4e] delta %.4e fp %.4e\n",
793 nsing
, iter
, *par
, parl
, paru
, delta
, fp
);
795 break; /* the only exit from the iteration. */
798 /** compute the Newton correction. **/
800 for (j
= 0; j
< n
; j
++)
801 aux
[j
] = diag
[ipvt
[j
]] * xdi
[ipvt
[j
]] / dxnorm
;
803 for (j
= 0; j
< n
; j
++) {
804 aux
[j
] = aux
[j
] / sdiag
[j
];
805 for (i
= j
+ 1; i
< n
; i
++)
806 aux
[i
] -= r
[j
* ldr
+ i
] * aux
[j
];
808 temp
= lm_enorm(n
, aux
);
809 parc
= fp
/ delta
/ temp
/ temp
;
811 /** depending on the sign of the function, update parl or paru. **/
814 parl
= MAX(parl
, *par
);
816 paru
= MIN(paru
, *par
);
817 /* the case fp==0 is precluded by the break condition */
819 /** compute an improved estimate for par. **/
821 *par
= MAX(parl
, *par
+ parc
);
825 } /*** lm_lmpar. ***/
827 /*****************************************************************************/
828 /* lm_qrfac (QR factorization, from lapack) */
829 /*****************************************************************************/
832 const int m
, const int n
, double *const A
, int *const Pivot
,
833 double *const Rdiag
, double *const Acnorm
, double *const W
)
836 * This subroutine uses Householder transformations with column pivoting
837 * to compute a QR factorization of the m by n matrix A. That is, qrfac
838 * determines an orthogonal matrix Q, a permutation matrix P, and an
839 * upper trapezoidal matrix R with diagonal elements of nonincreasing
840 * magnitude, such that A*P = Q*R. The Householder transformation for
841 * column k, k = 1,2,...,n, is of the form
845 * where w has zeroes in the first k-1 positions.
849 * m is an INPUT parameter set to the number of rows of A.
851 * n is an INPUT parameter set to the number of columns of A.
853 * A is an m by n array. On INPUT, A contains the matrix for
854 * which the QR factorization is to be computed. On OUTPUT
855 * the strict upper trapezoidal part of A contains the strict
856 * upper trapezoidal part of R, and the lower trapezoidal
857 * part of A contains a factored form of Q (the non-trivial
858 * elements of the vectors w described above).
860 * Pivot is an integer OUTPUT array of length n that describes the
861 * permutation matrix P:
862 * Column j of P is column ipvt(j) of the identity matrix.
864 * Rdiag is an OUTPUT array of length n which contains the
865 * diagonal elements of R.
867 * Acnorm is an OUTPUT array of length n which contains the norms
868 * of the corresponding columns of the input matrix A. If this
869 * information is not needed, then Acnorm can share storage with Rdiag.
871 * W is a work array of length n.
875 double ajnorm
, sum
, temp
;
877 #ifdef LMFIT_DEBUG_MESSAGES
878 printf("debug qrfac\n");
881 /** Compute initial column norms;
882 initialize Pivot with identity permutation. ***/
884 for (j
= 0; j
< n
; j
++) {
885 W
[j
] = Rdiag
[j
] = Acnorm
[j
] = lm_enorm(m
, &A
[j
*m
]);
889 /** Loop over columns of A. **/
892 for (j
= 0; j
< n
; j
++) {
894 /** Bring the column of largest norm into the pivot position. **/
897 for (k
= j
+1; k
< n
; k
++)
898 if (Rdiag
[k
] > Rdiag
[kmax
])
902 /* Swap columns j and kmax. */
904 Pivot
[j
] = Pivot
[kmax
];
906 for (i
= 0; i
< m
; i
++) {
908 A
[j
*m
+i
] = A
[kmax
*m
+i
];
911 /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
912 Rdiag
[kmax
] = Rdiag
[j
];
916 /** Compute the Householder reflection vector w_j to reduce the
917 j-th column of A to a multiple of the j-th unit vector. **/
919 ajnorm
= lm_enorm(m
-j
, &A
[j
*m
+j
]);
925 /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
926 where the sign +- is chosen to avoid cancellation in w_jj. */
929 for (i
= j
; i
< m
; i
++)
933 /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
934 to the remaining columns, and update the norms. **/
936 for (k
= j
+1; k
< n
; k
++) {
937 /* Compute scalar product w_j * a_j. */
939 for (i
= j
; i
< m
; i
++)
940 sum
+= A
[j
*m
+i
] * A
[k
*m
+i
];
942 /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
943 temp
= sum
/ A
[j
*m
+j
];
945 /* Carry out transform U_w_j * a_k. */
946 for (i
= j
; i
< m
; i
++)
947 A
[k
*m
+i
] -= temp
* A
[j
*m
+i
];
949 /* No idea what happens here. */
951 temp
= A
[m
*k
+j
] / Rdiag
[k
];
952 if ( fabs(temp
)<1 ) {
953 Rdiag
[k
] *= sqrt(1-SQR(temp
));
954 temp
= Rdiag
[k
] / W
[k
];
957 if ( temp
== 0 || 0.05 * SQR(temp
) <= LM_MACHEP
) {
958 Rdiag
[k
] = lm_enorm(m
-j
-1, &A
[m
*k
+j
+1]);
966 } /*** lm_qrfac. ***/
969 /*****************************************************************************/
970 /* lm_qrsolv (linear least-squares) */
971 /*****************************************************************************/
974 const int n
, double *const r
, const int ldr
, int *const ipvt
,
975 double *const diag
, double *const qtb
, double *const x
,
976 double *const sdiag
, double *const wa
)
979 * Given an m by n matrix A, an n by n diagonal matrix D, and an
980 * m-vector b, the problem is to determine an x which solves the
983 * A*x = b and D*x = 0
985 * in the least squares sense.
987 * This subroutine completes the solution of the problem if it is
988 * provided with the necessary information from the QR factorization,
989 * with column pivoting, of A. That is, if A*P = Q*R, where P is a
990 * permutation matrix, Q has orthogonal columns, and R is an upper
991 * triangular matrix with diagonal elements of nonincreasing magnitude,
992 * then qrsolv expects the full upper triangle of R, the permutation
993 * matrix P, and the first n components of Q^T*b. The system
994 * A*x = b, D*x = 0, is then equivalent to
996 * R*z = Q^T*b, P^T*D*P*z = 0,
998 * where x = P*z. If this system does not have full rank, then a least
999 * squares solution is obtained. On output qrsolv also provides an upper
1000 * triangular matrix S such that
1002 * P^T*(A^T*A + D*D)*P = S^T*S.
1004 * S is computed within qrsolv and may be of separate interest.
1008 * n is a positive integer INPUT variable set to the order of R.
1010 * r is an n by n array. On INPUT the full upper triangle must
1011 * contain the full upper triangle of the matrix R. On OUTPUT
1012 * the full upper triangle is unaltered, and the strict lower
1013 * triangle contains the strict upper triangle (transposed) of
1014 * the upper triangular matrix S.
1016 * ldr is a positive integer INPUT variable not less than n
1017 * which specifies the leading dimension of the array R.
1019 * ipvt is an integer INPUT array of length n which defines the
1020 * permutation matrix P such that A*P = Q*R. Column j of P
1021 * is column ipvt(j) of the identity matrix.
1023 * diag is an INPUT array of length n which must contain the
1024 * diagonal elements of the matrix D.
1026 * qtb is an INPUT array of length n which must contain the first
1027 * n elements of the vector Q^T*b.
1029 * x is an OUTPUT array of length n which contains the least
1030 * squares solution of the system A*x = b, D*x = 0.
1032 * sdiag is an OUTPUT array of length n which contains the
1033 * diagonal elements of the upper triangular matrix S.
1035 * wa is a work array of length n.
1038 int i
, kk
, j
, k
, nsing
;
1039 double qtbpj
, sum
, temp
;
1040 double _sin
, _cos
, _tan
, _cot
; /* local variables, not functions */
1042 /*** qrsolv: copy R and Q^T*b to preserve input and initialize S.
1043 In particular, save the diagonal elements of R in x. ***/
1045 for (j
= 0; j
< n
; j
++) {
1046 for (i
= j
; i
< n
; i
++)
1047 r
[j
* ldr
+ i
] = r
[i
* ldr
+ j
];
1048 x
[j
] = r
[j
* ldr
+ j
];
1052 /*** qrsolv: eliminate the diagonal matrix D using a Givens rotation. ***/
1054 for (j
= 0; j
< n
; j
++) {
1056 /*** qrsolv: prepare the row of D to be eliminated, locating the
1057 diagonal element using P from the QR factorization. ***/
1059 if (diag
[ipvt
[j
]] == 0)
1061 for (k
= j
; k
< n
; k
++)
1063 sdiag
[j
] = diag
[ipvt
[j
]];
1065 /*** qrsolv: the transformations to eliminate the row of D modify only
1066 a single element of Q^T*b beyond the first n, which is initially 0. ***/
1069 for (k
= j
; k
< n
; k
++) {
1071 /** determine a Givens rotation which eliminates the
1072 appropriate element in the current row of D. **/
1077 if (fabs(r
[kk
]) < fabs(sdiag
[k
])) {
1078 _cot
= r
[kk
] / sdiag
[k
];
1079 _sin
= 1 / sqrt(1 + SQR(_cot
));
1082 _tan
= sdiag
[k
] / r
[kk
];
1083 _cos
= 1 / sqrt(1 + SQR(_tan
));
1087 /** compute the modified diagonal element of R and
1088 the modified element of (Q^T*b,0). **/
1090 r
[kk
] = _cos
* r
[kk
] + _sin
* sdiag
[k
];
1091 temp
= _cos
* wa
[k
] + _sin
* qtbpj
;
1092 qtbpj
= -_sin
* wa
[k
] + _cos
* qtbpj
;
1095 /** accumulate the tranformation in the row of S. **/
1097 for (i
= k
+ 1; i
< n
; i
++) {
1098 temp
= _cos
* r
[k
* ldr
+ i
] + _sin
* sdiag
[i
];
1099 sdiag
[i
] = -_sin
* r
[k
* ldr
+ i
] + _cos
* sdiag
[i
];
1100 r
[k
* ldr
+ i
] = temp
;
1105 /** store the diagonal element of S and restore
1106 the corresponding diagonal element of R. **/
1108 sdiag
[j
] = r
[j
* ldr
+ j
];
1109 r
[j
* ldr
+ j
] = x
[j
];
1112 /*** qrsolv: solve the triangular system for z. If the system is
1113 singular, then obtain a least squares solution. ***/
1116 for (j
= 0; j
< n
; j
++) {
1117 if (sdiag
[j
] == 0 && nsing
== n
)
1123 for (j
= nsing
- 1; j
>= 0; j
--) {
1125 for (i
= j
+ 1; i
< nsing
; i
++)
1126 sum
+= r
[j
* ldr
+ i
] * wa
[i
];
1127 wa
[j
] = (wa
[j
] - sum
) / sdiag
[j
];
1130 /*** qrsolv: permute the components of z back to components of x. ***/
1132 for (j
= 0; j
< n
; j
++)
1135 } /*** lm_qrsolv. ***/
1138 /*****************************************************************************/
1139 /* lm_enorm (Euclidean norm) */
1140 /*****************************************************************************/
1142 double lm_enorm(const int n
, const double *const x
)
1144 /* This function calculates the Euclidean norm of an n-vector x.
1146 * The Euclidean norm is computed by accumulating the sum of
1147 * squares in three different sums. The sums of squares for the
1148 * small and large components are scaled so that no overflows
1149 * occur. Non-destructive underflows are permitted. Underflows
1150 * and overflows do not occur in the computation of the unscaled
1151 * sum of squares for the intermediate components.
1152 * The definitions of small, intermediate and large components
1153 * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1154 * restrictions on these constants are that LM_SQRT_DWARF**2 not
1155 * underflow and LM_SQRT_GIANT**2 not overflow.
1159 * n is a positive integer INPUT variable.
1161 * x is an INPUT array of length n.
1164 double agiant
, s1
, s2
, s3
, xabs
, x1max
, x3max
, temp
;
1171 agiant
= LM_SQRT_GIANT
/ n
;
1173 /** sum squares. **/
1175 for (i
= 0; i
< n
; i
++) {
1177 if (xabs
> LM_SQRT_DWARF
) {
1178 if ( xabs
< agiant
) {
1180 } else if ( xabs
> x1max
) {
1181 temp
= x1max
/ xabs
;
1182 s1
= 1 + s1
* SQR(temp
);
1185 temp
= xabs
/ x1max
;
1188 } else if ( xabs
> x3max
) {
1189 temp
= x3max
/ xabs
;
1190 s3
= 1 + s3
* SQR(temp
);
1192 } else if (xabs
!= 0) {
1193 temp
= xabs
/ x3max
;
1198 /** calculation of norm. **/
1201 return x1max
* sqrt(s1
+ (s2
/ x1max
) / x1max
);
1204 return sqrt(s2
* (1 + (x3max
/ s2
) * (x3max
* s3
)));
1206 return sqrt(x3max
* ((s2
/ x3max
) + (x3max
* s3
)));
1208 return x3max
* sqrt(s3
);
1210 } /*** lm_enorm. ***/
1213 /*****************************************************************************/
1214 /* lm_fnorm (Euclidean norm of difference) */
1215 /*****************************************************************************/
1217 double lm_fnorm(const int n
, const double *const x
, const double *const y
)
1219 /* This function calculates the Euclidean norm of an n-vector x-y.
1221 * The Euclidean norm is computed by accumulating the sum of
1222 * squares in three different sums. The sums of squares for the
1223 * small and large components are scaled so that no overflows
1224 * occur. Non-destructive underflows are permitted. Underflows
1225 * and overflows do not occur in the computation of the unscaled
1226 * sum of squares for the intermediate components.
1227 * The definitions of small, intermediate and large components
1228 * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1229 * restrictions on these constants are that LM_SQRT_DWARF**2 not
1230 * underflow and LM_SQRT_GIANT**2 not overflow.
1234 * n is a positive integer INPUT variable.
1236 * x, y are INPUT arrays of length n.
1239 return lm_enorm(n
, x
);
1241 double agiant
, s1
, s2
, s3
, xabs
, x1max
, x3max
, temp
;
1248 agiant
= LM_SQRT_GIANT
/ n
;
1250 /** sum squares. **/
1252 for (i
= 0; i
< n
; i
++) {
1253 xabs
= fabs(x
[i
]-y
[i
]);
1254 if (xabs
> LM_SQRT_DWARF
) {
1255 if ( xabs
< agiant
) {
1257 } else if ( xabs
> x1max
) {
1258 temp
= x1max
/ xabs
;
1259 s1
= 1 + s1
* SQR(temp
);
1262 temp
= xabs
/ x1max
;
1265 } else if ( xabs
> x3max
) {
1266 temp
= x3max
/ xabs
;
1267 s3
= 1 + s3
* SQR(temp
);
1269 } else if (xabs
!= 0) {
1270 temp
= xabs
/ x3max
;
1275 /** calculation of norm. **/
1278 return x1max
* sqrt(s1
+ (s2
/ x1max
) / x1max
);
1281 return sqrt(s2
* (1 + (x3max
/ s2
) * (x3max
* s3
)));
1283 return sqrt(x3max
* ((s2
/ x3max
) + (x3max
* s3
)));
1285 return x3max
* sqrt(s3
);
1287 } /*** lm_fnorm. ***/