share/lbfgs/lbfgs.f

   1 C     Modification of lbfgs.f as retrieved 1997/03/29 from
   2 C     ftp://ftp.netlib.org/opt/lbfgs_um.shar
   3 C
   4 C     This version copyright 2006 by Robert Dodier and released
   5 C     under the terms of the GNU General Public License.
   6 C
   7 C     ---------------- Message from the author ----------------
   8 C     From: Jorge Nocedal [mailto:nocedal@dario.ece.nwu.edu]
   9 C     Sent: Friday, August 17, 2001 9:09 AM
  10 C     To: Robert Dodier
  11 C     Subject: Re: Commercial licensing terms for LBFGS?
  12 C
  13 C     Robert:
  14 C     The code L-BFGS (for unconstrained problems) is in the public domain.
  15 C     It can be used in any commercial application.
  16 C
  17 C     The code L-BFGS-B (for bound constrained problems) belongs to
  18 C     ACM. You need to contact them for a commercial license. It is
  19 C     algorithm 778.
  20 C
  21 C     Jorge
  22 C     --------------------- End of message --------------------
  23
  24 C     ----------------------------------------------------------------------
  25 C     This file contains the LBFGS algorithm and supporting routines
  26 C
  27 C     ****************
  28 C     LBFGS SUBROUTINE
  29 C     ****************
  30 C
  31       SUBROUTINE LBFGS(N,M,X,F,G,DIAGCO,DIAG,IPRINT,EPS,XTOL,W,IFLAG
  32      &,SCACHE)
  33 C
  34       INTEGER N,M,IPRINT(2),IFLAG
  35       DOUBLE PRECISION X(N),G(N),DIAG(N),W(N*(2*M+1)+2*M),SCACHE(N)
  36       DOUBLE PRECISION F,EPS,XTOL
  37       LOGICAL DIAGCO
  38 C
  39 C        LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION
  40 C                          JORGE NOCEDAL
  41 C                        *** July 1990 ***
  42 C
  43 C
  44 C     This subroutine solves the unconstrained minimization problem
  45 C
  46 C                      min F(x),    x= (x1,x2,...,xN),
  47 C
  48 C      using the limited memory BFGS method. The routine is especially
  49 C      effective on problems involving a large number of variables. In
  50 C      a typical iteration of this method an approximation Hk to the
  51 C      inverse of the Hessian is obtained by applying M BFGS updates to
  52 C      a diagonal matrix Hk0, using information from the previous M steps.
  53 C      The user specifies the number M, which determines the amount of
  54 C      storage required by the routine. The user may also provide the
  55 C      diagonal matrices Hk0 if not satisfied with the default choice.
  56 C      The algorithm is described in "On the limited memory BFGS method
  57 C      for large scale optimization", by D. Liu and J. Nocedal,
  58 C      Mathematical Programming B 45 (1989) 503-528.
  59 C
  60 C      The user is required to calculate the function value F and its
  61 C      gradient G. In order to allow the user complete control over
  62 C      these computations, reverse  communication is used. The routine
  63 C      must be called repeatedly under the control of the parameter
  64 C      IFLAG.
  65 C
  66 C      The steplength is determined at each iteration by means of the
  67 C      line search routine MCVSRCH, which is a slight modification of
  68 C      the routine CSRCH written by More' and Thuente.
  69 C
  70 C      The calling statement is
  71 C
  72 C          CALL LBFGS(N,M,X,F,G,DIAGCO,DIAG,IPRINT,EPS,XTOL,W,IFLAG)
  73 C
  74 C      where
  75 C
  76 C     N       is an INTEGER variable that must be set by the user to the
  77 C             number of variables. It is not altered by the routine.
  78 C             Restriction: N>0.
  79 C
  80 C     M       is an INTEGER variable that must be set by the user to
  81 C             the number of corrections used in the BFGS update. It
  82 C             is not altered by the routine. Values of M less than 3 are
  83 C             not recommended; large values of M will result in excessive
  84 C             computing time. 3<= M <=7 is recommended. Restriction: M>0.
  85 C
  86 C     X       is a DOUBLE PRECISION array of length N. On initial entry
  87 C             it must be set by the user to the values of the initial
  88 C             estimate of the solution vector. On exit with IFLAG=0, it
  89 C             contains the values of the variables at the best point
  90 C             found (usually a solution).
  91 C
  92 C     F       is a DOUBLE PRECISION variable. Before initial entry and on
  93 C             a re-entry with IFLAG=1, it must be set by the user to
  94 C             contain the value of the function F at the point X.
  95 C
  96 C     G       is a DOUBLE PRECISION array of length N. Before initial
  97 C             entry and on a re-entry with IFLAG=1, it must be set by
  98 C             the user to contain the components of the gradient G at
  99 C             the point X.
 100 C
 101 C     DIAGCO  is a LOGICAL variable that must be set to .TRUE. if the
 102 C             user  wishes to provide the diagonal matrix Hk0 at each
 103 C             iteration. Otherwise it should be set to .FALSE., in which
 104 C             case  LBFGS will use a default value described below. If
 105 C             DIAGCO is set to .TRUE. the routine will return at each
 106 C             iteration of the algorithm with IFLAG=2, and the diagonal
 107 C              matrix Hk0  must be provided in the array DIAG.
 108 C
 109 C
 110 C     DIAG    is a DOUBLE PRECISION array of length N. If DIAGCO=.TRUE.,
 111 C             then on initial entry or on re-entry with IFLAG=2, DIAG
 112 C             it must be set by the user to contain the values of the
 113 C             diagonal matrix Hk0.  Restriction: all elements of DIAG
 114 C             must be positive.
 115 C
 116 C     IPRINT  is an INTEGER array of length two which must be set by the
 117 C             user.
 118 C
 119 C             IPRINT(1) specifies the frequency of the output:
 120 C                IPRINT(1) < 0 : no output is generated,
 121 C                IPRINT(1) = 0 : output only at first and last iteration,
 122 C                IPRINT(1) > 0 : output every IPRINT(1) iterations.
 123 C
 124 C             IPRINT(2) specifies the type of output generated:
 125 C                IPRINT(2) = 0 : iteration count, number of function
 126 C                                evaluations, function value, norm of the
 127 C                                gradient, and steplength,
 128 C                IPRINT(2) = 1 : same as IPRINT(2)=0, plus vector of
 129 C                                variables and  gradient vector at the
 130 C                                initial point,
 131 C                IPRINT(2) = 2 : same as IPRINT(2)=1, plus vector of
 132 C                                variables,
 133 C                IPRINT(2) = 3 : same as IPRINT(2)=2, plus gradient vector.
 134 C
 135 C
 136 C     EPS     is a positive DOUBLE PRECISION variable that must be set by
 137 C             the user, and determines the accuracy with which the solution
 138 C             is to be found. The subroutine terminates when
 139 C
 140 C                         ||G|| < EPS max(1,||X||),
 141 C
 142 C             where ||.|| denotes the Euclidean norm.
 143 C
 144 C     XTOL    is a  positive DOUBLE PRECISION variable that must be set by
 145 C             the user to an estimate of the machine precision (e.g.
 146 C             10**(-16) on a SUN station 3/60). The line search routine will
 147 C             terminate if the relative width of the interval of uncertainty
 148 C             is less than XTOL.
 149 C
 150 C     W       is a DOUBLE PRECISION array of length N(2M+1)+2M used as
 151 C             workspace for LBFGS. This array must not be altered by the
 152 C             user.
 153 C
 154 C     IFLAG   is an INTEGER variable that must be set to 0 on initial entry
 155 C             to the subroutine. A return with IFLAG<0 indicates an error,
 156 C             and IFLAG=0 indicates that the routine has terminated without
 157 C             detecting errors. On a return with IFLAG=1, the user must
 158 C             evaluate the function F and gradient G. On a return with
 159 C             IFLAG=2, the user must provide the diagonal matrix Hk0.
 160 C
 161 C             The following negative values of IFLAG, detecting an error,
 162 C             are possible:
 163 C
 164 C              IFLAG=-1  The line search routine MCSRCH failed. The
 165 C                        parameter INFO provides more detailed information
 166 C                        (see also the documentation of MCSRCH):
 167 C
 168 C                       INFO = 0  IMPROPER INPUT PARAMETERS.
 169 C
 170 C                       INFO = 2  RELATIVE WIDTH OF THE INTERVAL OF
 171 C                                 UNCERTAINTY IS AT MOST XTOL.
 172 C
 173 C                       INFO = 3  MORE THAN 20 FUNCTION EVALUATIONS WERE
 174 C                                 REQUIRED AT THE PRESENT ITERATION.
 175 C
 176 C                       INFO = 4  THE STEP IS TOO SMALL.
 177 C
 178 C                       INFO = 5  THE STEP IS TOO LARGE.
 179 C
 180 C                       INFO = 6  ROUNDING ERRORS PREVENT FURTHER PROGRESS.
 181 C                                 THERE MAY NOT BE A STEP WHICH SATISFIES
 182 C                                 THE SUFFICIENT DECREASE AND CURVATURE
 183 C                                 CONDITIONS. TOLERANCES MAY BE TOO SMALL.
 184 C
 185 C
 186 C              IFLAG=-2  The i-th diagonal element of the diagonal inverse
 187 C                        Hessian approximation, given in DIAG, is not
 188 C                        positive.
 189 C
 190 C              IFLAG=-3  Improper input parameters for LBFGS (N or M are
 191 C                        not positive).
 192 C
 193 C
 194 C
 195 C    ON THE DRIVER:
 196 C
 197 C    The program that calls LBFGS must contain the declaration:
 198 C
 199 C                       EXTERNAL LB2
 200 C
 201 C    LB2 is a BLOCK DATA that defines the default values of several
 202 C    parameters described in the COMMON section.
 203 C
 204 C
 205 C
 206 C    COMMON:
 207 C
 208 C     The subroutine contains one common area, which the user may wish to
 209 C    reference:
 210 C
 211          COMMON /LB3/MP,LP,GTOL,STPMIN,STPMAX
 212 C
 213 C    MP  is an INTEGER variable with default value 6. It is used as the
 214 C        unit number for the printing of the monitoring information
 215 C        controlled by IPRINT.
 216 C
 217 C    LP  is an INTEGER variable with default value 6. It is used as the
 218 C        unit number for the printing of error messages. This printing
 219 C        may be suppressed by setting LP to a non-positive value.
 220 C
 221 C    GTOL is a DOUBLE PRECISION variable with default value 0.9, which
 222 C        controls the accuracy of the line search routine MCSRCH. If the
 223 C        function and gradient evaluations are inexpensive with respect
 224 C        to the cost of the iteration (which is sometimes the case when
 225 C        solving very large problems) it may be advantageous to set GTOL
 226 C        to a small value. A typical small value is 0.1.  Restriction:
 227 C        GTOL should be greater than 1.D-04.
 228 C
 229 C    STPMIN and STPMAX are non-negative DOUBLE PRECISION variables which
 230 C        specify lower and uper bounds for the step in the line search.
 231 C        Their default values are 1.D-20 and 1.D+20, respectively. These
 232 C        values need not be modified unless the exponents are too large
 233 C        for the machine being used, or unless the problem is extremely
 234 C        badly scaled (in which case the exponents should be increased).
 235 C
 236 C
 237 C  MACHINE DEPENDENCIES
 238 C
 239 C        The only variables that are machine-dependent are XTOL,
 240 C        STPMIN and STPMAX.
 241 C
 242 C
 243 C  GENERAL INFORMATION
 244 C
 245 C    Other routines called directly:  DAXPY, DDOT, LB1, MCSRCH
 246 C
 247 C    Input/Output  :  No input; diagnostic messages on unit MP and
 248 C                     error messages on unit LP.
 249 C
 250 C
 251 C     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 252 C
 253       DOUBLE PRECISION GTOL,ONE,ZERO,GNORM,DDOT,STP1,FTOL,STPMIN,
 254      .                 STPMAX,STP,YS,YY,SQ,YR,BETA,XNORM
 255       INTEGER MP,LP,ITER,NFUN,POINT,ISPT,IYPT,MAXFEV,INFO,
 256      .        BOUND,NPT,CP,I,NFEV,INMC,IYCN,ISCN
 257       LOGICAL FINISH
 258 C
 259       SAVE
 260       DATA ONE,ZERO/1.0D+0,0.0D+0/
 261 C
 262 C     INITIALIZE
 263 C     ----------
 264 C
 265       IF(IFLAG.EQ.0) GO TO 10
 266       GO TO (172,100) IFLAG
 267   10  ITER= 0
 268       IF(N.LE.0.OR.M.LE.0) GO TO 196
 269       IF(GTOL.LE.1.D-04) THEN
 270         IF(LP.GT.0) WRITE(LP,245)
 271         GTOL=9.D-01
 272       ENDIF
 273       NFUN= 1
 274       POINT= 0
 275       FINISH= .FALSE.
 276       IF(DIAGCO) THEN
 277          DO 30 I=1,N
 278  30      IF (DIAG(I).LE.ZERO) GO TO 195
 279       ELSE
 280          DO 40 I=1,N
 281  40      DIAG(I)= 1.0D0
 282       ENDIF
 283 C
 284 C     THE WORK VECTOR W IS DIVIDED AS FOLLOWS:
 285 C     ---------------------------------------
 286 C     THE FIRST N LOCATIONS ARE USED TO STORE THE GRADIENT AND
 287 C         OTHER TEMPORARY INFORMATION.
 288 C     LOCATIONS (N+1)...(N+M) STORE THE SCALARS RHO.
 289 C     LOCATIONS (N+M+1)...(N+2M) STORE THE NUMBERS ALPHA USED
 290 C         IN THE FORMULA THAT COMPUTES H*G.
 291 C     LOCATIONS (N+2M+1)...(N+2M+NM) STORE THE LAST M SEARCH
 292 C         STEPS.
 293 C     LOCATIONS (N+2M+NM+1)...(N+2M+2NM) STORE THE LAST M
 294 C         GRADIENT DIFFERENCES.
 295 C
 296 C     THE SEARCH STEPS AND GRADIENT DIFFERENCES ARE STORED IN A
 297 C     CIRCULAR ORDER CONTROLLED BY THE PARAMETER POINT.
 298 C
 299       ISPT= N+2*M
 300       IYPT= ISPT+N*M
 301       DO 50 I=1,N
 302  50   W(ISPT+I)= -G(I)*DIAG(I)
 303       GNORM= DSQRT(DDOT(N,G,1,G,1))
 304       STP1= ONE/GNORM
 305 C
 306 C     PARAMETERS FOR LINE SEARCH ROUTINE
 307 C
 308       FTOL= 1.0D-4
 309       MAXFEV= 20
 310 C
 311       IF(IPRINT(1).GE.0) CALL LB1(IPRINT,ITER,NFUN,
 312      *                     GNORM,N,M,X,F,G,STP,FINISH)
 313 C
 314 C    --------------------
 315 C     MAIN ITERATION LOOP
 316 C    --------------------
 317 C
 318  80   ITER= ITER+1
 319       INFO=0
 320       BOUND=ITER-1
 321       IF(ITER.EQ.1) GO TO 165
 322       IF (ITER .GT. M)BOUND=M
 323 C
 324          YS= DDOT(N,W(IYPT+NPT+1),1,W(ISPT+NPT+1),1)
 325       IF(.NOT.DIAGCO) THEN
 326          YY= DDOT(N,W(IYPT+NPT+1),1,W(IYPT+NPT+1),1)
 327          DO 90 I=1,N
 328    90    DIAG(I)= YS/YY
 329       ELSE
 330          IFLAG=2
 331          RETURN
 332       ENDIF
 333  100  CONTINUE
 334       IF(DIAGCO) THEN
 335         DO 110 I=1,N
 336  110    IF (DIAG(I).LE.ZERO) GO TO 195
 337       ENDIF
 338 C
 339 C     COMPUTE -H*G USING THE FORMULA GIVEN IN: Nocedal, J. 1980,
 340 C     "Updating quasi-Newton matrices with limited storage",
 341 C     Mathematics of Computation, Vol.24, No.151, pp. 773-782.
 342 C     ---------------------------------------------------------
 343 C
 344       CP= POINT
 345       IF (POINT.EQ.0) CP=M
 346       W(N+CP)= ONE/YS
 347       DO 112 I=1,N
 348  112  W(I)= -G(I)
 349       CP= POINT
 350       DO 125 I= 1,BOUND
 351          CP=CP-1
 352          IF (CP.EQ. -1)CP=M-1
 353          SQ= DDOT(N,W(ISPT+CP*N+1),1,W,1)
 354          INMC=N+M+CP+1
 355          IYCN=IYPT+CP*N
 356          W(INMC)= W(N+CP+1)*SQ
 357          CALL DAXPY(N,-W(INMC),W(IYCN+1),1,W,1)
 358  125  CONTINUE
 359 C
 360       DO 130 I=1,N
 361  130  W(I)=DIAG(I)*W(I)
 362 C
 363       DO 145 I=1,BOUND
 364          YR= DDOT(N,W(IYPT+CP*N+1),1,W,1)
 365          BETA= W(N+CP+1)*YR
 366          INMC=N+M+CP+1
 367          BETA= W(INMC)-BETA
 368          ISCN=ISPT+CP*N
 369          CALL DAXPY(N,BETA,W(ISCN+1),1,W,1)
 370          CP=CP+1
 371          IF (CP.EQ.M)CP=0
 372  145  CONTINUE
 373 C
 374 C     STORE THE NEW SEARCH DIRECTION
 375 C     ------------------------------
 376 C
 377        DO 160 I=1,N
 378  160   W(ISPT+POINT*N+I)= W(I)
 379 C
 380 C     OBTAIN THE ONE-DIMENSIONAL MINIMIZER OF THE FUNCTION
 381 C     BY USING THE LINE SEARCH ROUTINE MCSRCH
 382 C     ----------------------------------------------------
 383  165  NFEV=0
 384       STP=ONE
 385       IF (ITER.EQ.1) STP=STP1
 386       DO 170 I=1,N
 387  170  W(I)=G(I)
 388  172  CONTINUE
 389       CALL MCSRCH(N,X,F,G,W(ISPT+POINT*N+1),STP,FTOL,
 390      *            XTOL,MAXFEV,INFO,NFEV,DIAG)
 391       IF (INFO .EQ. -1) THEN
 392         IFLAG=1
 393         RETURN
 394       ENDIF
 395       IF (INFO .NE. 1) GO TO 190
 396       NFUN= NFUN + NFEV
 397 C
 398 C     COMPUTE THE NEW STEP AND GRADIENT CHANGE
 399 C     -----------------------------------------
 400 C
 401       NPT=POINT*N
 402       DO 175 I=1,N
 403       W(ISPT+NPT+I)= STP*W(ISPT+NPT+I)
 404  175  W(IYPT+NPT+I)= G(I)-W(I)
 405       POINT=POINT+1
 406       IF (POINT.EQ.M)POINT=0
 407 C
 408 C     TERMINATION TEST
 409 C     ----------------
 410 C
 411       GNORM= DSQRT(DDOT(N,G,1,G,1))
 412       XNORM= DSQRT(DDOT(N,X,1,X,1))
 413       XNORM= DMAX1(1.0D0,XNORM)
 414       IF (GNORM/XNORM .LE. EPS) FINISH=.TRUE.
 415 C
 416       IF(IPRINT(1).GE.0) CALL LB1(IPRINT,ITER,NFUN,
 417      *               GNORM,N,M,X,F,G,STP,FINISH)
 418
 419 C     We've completed a line search. Cache the current solution vector.
 420 C
 421 C     At the end of searching, the solution cache is different from
 422 C     the current solution if the search is terminated in the
 423 C     middle of a line search. That is the case if, for example,
 424 C     the number of function evaluations is the criterion to stop
 425 C     the search instead of the number of line searches or convergence
 426 C     to a prescribed tolerance.
 427
 428       DO 177 I=1,N
 429       SCACHE(I) = X(I)
 430   177 CONTINUE
 431
 432       IF (FINISH) THEN
 433          IFLAG=0
 434          RETURN
 435       ENDIF
 436       GO TO 80
 437 C
 438 C     ------------------------------------------------------------
 439 C     END OF MAIN ITERATION LOOP. ERROR EXITS.
 440 C     ------------------------------------------------------------
 441 C
 442  190  IFLAG=-1
 443       IF(LP.GT.0) WRITE(LP,200) INFO
 444       RETURN
 445  195  IFLAG=-2
 446       IF(LP.GT.0) WRITE(LP,235) I
 447       RETURN
 448  196  IFLAG= -3
 449       IF(LP.GT.0) WRITE(LP,240)
 450 C
 451 C     FORMATS
 452 C     -------
 453 C
 454  200  FORMAT(/' IFLAG= -1 ',/' LINE SEARCH FAILED. SEE',
 455      .          ' DOCUMENTATION OF ROUTINE MCSRCH',/' ERROR RETURN',
 456      .          ' OF LINE SEARCH: INFO= ',I2,/
 457      .          ' POSSIBLE CAUSES: FUNCTION OR GRADIENT ARE INCORRECT',/,
 458      .          ' OR INCORRECT TOLERANCES')
 459  235  FORMAT(/' IFLAG= -2',/' THE',I5,'-TH DIAGONAL ELEMENT OF THE',/,
 460      .       ' INVERSE HESSIAN APPROXIMATION IS NOT POSITIVE')
 461  240  FORMAT(/' IFLAG= -3',/' IMPROPER INPUT PARAMETERS (N OR M',
 462      .       ' ARE NOT POSITIVE)')
 463  245  FORMAT(/'  GTOL IS LESS THAN OR EQUAL TO 1.D-04',
 464      .       / ' IT HAS BEEN RESET TO 9.D-01')
 465       RETURN
 466       END