src/kernel/genalg.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  34  */
  35 /**H*O*C**************************************************************
  36 **                                                                  **
  37 ** No.!Version! Date ! Request !    Modification           ! Author **
  38 ** ---+-------+------+---------+---------------------------+------- **
  39 **  1 + 3.1  +5/18/95+   -     + strategy DE/rand-to-best/1+  Storn **
  40 **    +      +       +         + included                  +        **
  41 **  1 + 3.2  +6/06/95+C.Fleiner+ change loops into memcpy  +  Storn **
  42 **  2 + 3.2  +6/06/95+   -     + update comments           +  Storn **
  43 **  1 + 3.3  +6/15/95+ K.Price + strategy DE/best/2 incl.  +  Storn **
  44 **  2 + 3.3  +6/16/95+   -     + comments and beautifying  +  Storn **
  45 **  3 + 3.3  +7/13/95+   -     + upper and lower bound for +  Storn **
  46 **    +      +       +         + initialization            +        **
  47 **  1 + 3.4  +2/12/96+   -     + increased printout prec.  +  Storn **
  48 **  1 + 3.5  +5/28/96+   -     + strategies revisited      +  Storn **
  49 **  2 + 3.5  +5/28/96+   -     + strategy DE/rand/2 incl.  +  Storn **
  50 **  1 + 3.6  +8/06/96+ K.Price + Binomial Crossover added  +  Storn **
  51 **  2 + 3.6  +9/30/96+ K.Price + cost variance output      +  Storn **
  52 **  3 + 3.6  +9/30/96+   -     + alternative to ASSIGND    +  Storn **
  53 **  4 + 3.6  +10/1/96+   -    + variable checking inserted +  Storn **
  54 **  5 + 3.6  +10/1/96+   -     + strategy indic. improved  +  Storn **
  55 **                                                                  **
  56 ***H*O*C*E***********************************************************/
  57
  58 /* Adopted for use in GROMACS by David van der Spoel, Oct 2001 */
  59 #ifdef HAVE_CONFIG_H
  60 #include <config.h>
  61 #endif
  62
  63 #include <stdio.h>
  64 #include <stdlib.h>
  65 #include <math.h>
  66 #include <memory.h>
  67 #include "typedefs.h"
  68 #include "smalloc.h"
  69 #include "futil.h"
  70 #include "genalg.h"
  71 #include "gmx_fatal.h"
  72 #include "random.h"
  73 #include "txtdump.h"
  74 #include "vec.h"
  75 #include "main.h"
  76 #include "gmxfio.h"
  77
  78 static const char  *strat[] = {
  79   "DE/best/1/exp",          "DE/rand/1/exp",
  80   "DE/rand-to-best/1/exp",  "DE/best/2/exp",
  81   "DE/rand/2/exp",          "DE/best/1/bin",
  82   "DE/rand/1/bin",          "DE/rand-to-best/1/bin",
  83   "DE/best/2/bin",          "DE/rand/2/bin"
  84 };
  85
  86 /*------------------------Macros----------------------------------------*/
  87 static void assignd(int D,real a[],real b[])
  88 {
  89   int i;
  90
  91   for(i=0; (i<D); i++)
  92     a[i] = b[i];
  93 }
  94
  95 static real **make2d(int n,int m)
  96 {
  97   int  i;
  98   real **r;
  99
 100   snew(r,n);
 101   for(i=0;(i<n); i++)
 102     snew(r[i],m);
 103   return r;
 104 }
 105
 106 static void copy2range(int D,real c[],t_range r[])
 107 {
 108   int i;
 109
 110   for(i=0; (i<D); i++) {
 111     /* Range check */
 112     while (c[i] < r[i].rmin)
 113       c[i] += r[i].dr;
 114     while (c[i] > r[i].rmax)
 115       c[i] -= r[i].dr;
 116     /*    if (c[i] < r[i].rmin)
 117           c[i] = r[i].rmin;
 118           if (c[i] > r[i].rmax)
 119           c[i] = r[i].rmax;
 120     */
 121     r[i].rval = c[i];
 122   }
 123 }
 124
 125 t_genalg *init_ga(FILE *fplog,const char *infile,int D,t_range range[])
 126 {
 127   FILE     *fpin_ptr;
 128   t_genalg *ga;
 129   double   ff,cr;
 130   int      i,j;
 131
 132   /*------Initializations----------------------------*/
 133   snew(ga,1);
 134   /*-----Read input data------------------------------------------------*/
 135   fpin_ptr   = gmx_fio_fopen(infile,"r");
 136   if ( fscanf(fpin_ptr,"%d",&ga->NP) != 1         ||         /*---choice of strategy---*/
 137        fscanf(fpin_ptr,"%d",&ga->strategy) != 1   ||         /*---choice of strategy---*/
 138        fscanf(fpin_ptr,"%lf",&ff) != 1            ||         /*---weight factor------------*/
 139        fscanf(fpin_ptr,"%lf",&cr) != 1            ||         /*---crossing over factor-----*/
 140        fscanf(fpin_ptr,"%d",&ga->seed) != 1       ||         /*---random seed----------*/
 141        gmx_fio_fclose(fpin_ptr) != 1)
 142   {
 143       gmx_fatal(FARGS,"Error reading from file %s",infile);
 144   }
 145
 146   ga->FF   = ff;
 147   ga->CR   = cr;
 148   ga->D    = D;
 149   ga->ipop = 0;
 150   ga->gen  = 0;
 151
 152   /* Allocate memory */
 153   ga->pold = make2d(ga->NP,ga->D);
 154   ga->pnew = make2d(ga->NP,ga->D);
 155   snew(ga->tmp,ga->D);
 156   snew(ga->best,ga->D);
 157   snew(ga->bestit,ga->D);
 158   snew(ga->cost,ga->NP);
 159   snew(ga->msf,ga->NP);
 160   snew(ga->pres,ga->NP);
 161   snew(ga->scale,ga->NP);
 162   snew(ga->energy,ga->NP);
 163
 164   /*-----Checking input variables for proper range--------------*/
 165   if ((ga->CR < 0) || (ga->CR > 1.0))
 166     gmx_fatal(FARGS,"CR=%f, should be ex [0,1]",ga->CR);
 167   if (ga->seed <= 0)
 168     gmx_fatal(FARGS,"seed=%d, should be > 0",ga->seed);
 169   if ((ga->strategy < 0) || (ga->strategy > 10))
 170     gmx_fatal(FARGS,"strategy=%d, should be ex {1-10}",ga->strategy);
 171
 172   /* spread initial population members */
 173   for (i=0; (i<ga->NP); i++) {
 174     for (j=0; (j<ga->D); j++) {
 175       ga->pold[i][j] = value_rand(&(range[j]),&ga->seed);
 176     }
 177   }
 178
 179   fprintf(fplog,"-----------------------------------------------\n");
 180   fprintf(fplog,"Genetic algorithm parameters\n");
 181   fprintf(fplog,"-----------------------------------------------\n");
 182   fprintf(fplog,"Number of variables:   %d\n",ga->D);
 183   fprintf(fplog,"Population size:       %d\n",ga->NP);
 184   fprintf(fplog,"Strategy:              %s\n",strat[ga->strategy]);
 185   fprintf(fplog,"Weight factor:         %g\n",ga->FF);
 186   fprintf(fplog,"Crossing over factor:  %g\n",ga->CR);
 187   fprintf(fplog,"Random seed:           %d\n",ga->seed);
 188   fprintf(fplog,"-----------------------------------------------\n");
 189
 190   return ga;
 191 }
 192
 193 void update_ga(FILE *fpout_ptr,t_range range[],t_genalg *ga)
 194 {
 195   static int  i_init=0;   /* Initialisation related stuff       */
 196   int   i, j, L, n;      /* counting variables                 */
 197   int   r1,r2,r3,r4,r5;  /* placeholders for random indexes    */
 198
 199   if (i_init < ga->NP) {
 200     /* Copy data for first force evaluation to range array  */
 201     copy2range(ga->D,ga->pold[i_init],range);
 202
 203     i_init++;
 204     return;
 205   }
 206   else {
 207     /* Now starts real genetic stuff, a new trial set is made */
 208     if (ga->ipop == ga->NP) {
 209       ga->gen++;
 210       i=ga->ipop=0;
 211     }
 212     else
 213       i=ga->ipop;
 214
 215     do {                        /* Pick a random population member */
 216       /* Endless loop for ga->NP < 2 !!!     */
 217       r1 = (int)(rando(&ga->seed)*ga->NP);
 218     } while(r1==i);
 219
 220     do {                        /* Pick a random population member */
 221       /* Endless loop for ga->NP < 3 !!!     */
 222       r2 = (int)(rando(&ga->seed)*ga->NP);
 223       } while((r2==i) || (r2==r1));
 224
 225     do {
 226       /* Pick a random population member */
 227       /* Endless loop for ga->NP < 4 !!!     */
 228       r3 = (int)(rando(&ga->seed)*ga->NP);
 229     } while((r3==i) || (r3==r1) || (r3==r2));
 230
 231     do {
 232       /* Pick a random population member */
 233       /* Endless loop for ga->NP < 5 !!!     */
 234       r4 = (int)(rando(&ga->seed)*ga->NP);
 235     } while((r4==i) || (r4==r1) || (r4==r2) || (r4==r3));
 236
 237     do {
 238       /* Pick a random population member */
 239       /* Endless loop for ga->NP < 6 !!!     */
 240       r5 = (int)(rando(&ga->seed)*ga->NP);
 241     } while((r5==i) || (r5==r1) || (r5==r2) || (r5==r3) || (r5==r4));
 242
 243
 244     /* Choice of strategy
 245      * We have tried to come up with a sensible naming-convention: DE/x/y/z
 246      * DE :  stands for Differential Evolution
 247      * x  :  a string which denotes the vector to be perturbed
 248      * y  :  number of difference vectors taken for perturbation of x
 249      * z  :  crossover method (exp = exponential, bin = binomial)
 250      *
 251      * There are some simple rules which are worth following:
 252      * 1)  ga->FF is usually between 0.5 and 1 (in rare cases > 1)
 253      * 2)  ga->CR is between 0 and 1 with 0., 0.3, 0.7 and 1. being worth to
 254      *     be tried first
 255      * 3)  To start off ga->NP = 10*ga->D is a reasonable choice. Increase ga->NP if
 256      *     misconvergence happens.
 257      * 4)  If you increase ga->NP, ga->FF usually has to be decreased
 258      * 5)  When the DE/ga->best... schemes fail DE/rand... usually works and
 259      *     vice versa
 260      * EXPONENTIAL ga->CROSSOVER
 261      *-------DE/ga->best/1/exp-------
 262      *-------Our oldest strategy but still not bad. However, we have found several
 263      *-------optimization problems where misconvergence occurs.
 264      */
 265     assignd(ga->D,ga->tmp,ga->pold[i]);
 266     n = (int)(rando(&ga->seed)*ga->D);
 267     L = 0;
 268
 269     switch (ga->strategy) {
 270     case 1:
 271       /* strategy DE0 (not in our paper) */
 272       do {
 273         ga->tmp[n] = ga->bestit[n] + ga->FF*(ga->pold[r2][n]-ga->pold[r3][n]);
 274         n = (n+1)%ga->D;
 275         L++;
 276       } while((rando(&ga->seed) < ga->CR) && (L < ga->D));
 277       break;
 278
 279       /* DE/rand/1/exp
 280        * This is one of my favourite strategies. It works especially
 281        * well when the ga->bestit[]"-schemes experience misconvergence.
 282        * Try e.g. ga->FF=0.7 and ga->CR=0.5 * as a first guess.
 283        */
 284     case 2:
 285       /* strategy DE1 in the techreport */
 286       do {
 287         ga->tmp[n] = ga->pold[r1][n] + ga->FF*(ga->pold[r2][n]-ga->pold[r3][n]);
 288         n = (n+1)%ga->D;
 289         L++;
 290       } while((rando(&ga->seed) < ga->CR) && (L < ga->D));
 291       break;
 292
 293       /* DE/rand-to-ga->best/1/exp
 294        * This strategy seems to be one of the ga->best strategies.
 295        * Try ga->FF=0.85 and ga->CR=1.
 296        * If you get misconvergence try to increase ga->NP.
 297        * If this doesn't help you should play around with all three
 298        * control variables.
 299        */
 300     case 3:
 301       /* similar to DE2 but generally better */
 302       do {
 303         ga->tmp[n] = ga->tmp[n] + ga->FF*(ga->bestit[n] - ga->tmp[n]) +
 304           ga->FF*(ga->pold[r1][n]-ga->pold[r2][n]);
 305         n = (n+1)%ga->D;
 306         L++;
 307       } while((rando(&ga->seed) < ga->CR) && (L < ga->D));
 308       break;
 309
 310       /* DE/ga->best/2/exp is another powerful strategy worth trying */
 311     case 4:
 312       do {
 313         ga->tmp[n] = ga->bestit[n] +
 314           (ga->pold[r1][n]+ga->pold[r2][n]-ga->pold[r3][n]-ga->pold[r4][n])*ga->FF;
 315         n = (n+1)%ga->D;
 316         L++;
 317       } while((rando(&ga->seed) < ga->CR) && (L < ga->D));
 318       break;
 319
 320       /*----DE/rand/2/exp seems to be a robust optimizer for many functions-----*/
 321     case 5:
 322       do {
 323         ga->tmp[n] = ga->pold[r5][n] +
 324           (ga->pold[r1][n]+ga->pold[r2][n]-ga->pold[r3][n]-ga->pold[r4][n])*ga->FF;
 325         n = (n+1)%ga->D;
 326         L++;
 327       } while((rando(&ga->seed) < ga->CR) && (L < ga->D));
 328       break;
 329
 330       /*===Essentially same strategies but BINOMIAL ga->CROSSOVER===*/
 331
 332       /*-------DE/ga->best/1/bin------*/
 333     case 6:
 334       for (L=0; L<ga->D; L++) {
 335         /* perform D binomial trials */
 336         if ((rando(&ga->seed) < ga->CR) || (L == (ga->D-1))) {
 337           /* change at least one parameter */
 338             ga->tmp[n] = ga->bestit[n] + ga->FF*(ga->pold[r2][n]-ga->pold[r3][n]);
 339         }
 340         n = (n+1)%ga->D;
 341       }
 342       break;
 343
 344       /*-------DE/rand/1/bin------*/
 345     case 7:
 346       for (L=0; L<ga->D; L++) {
 347         /* perform D binomial trials */
 348         if ((rando(&ga->seed) < ga->CR) || (L == (ga->D-1))) {
 349           /* change at least one parameter */
 350           ga->tmp[n] = ga->pold[r1][n] + ga->FF*(ga->pold[r2][n]-ga->pold[r3][n]);
 351         }
 352         n = (n+1)%ga->D;
 353       }
 354       break;
 355
 356       /*-------DE/rand-to-ga->best/1/bin------*/
 357     case 8:
 358       for (L=0; L<ga->D; L++) {
 359         /* perform ga->D binomial trials */
 360         if ((rando(&ga->seed) < ga->CR) || (L == (ga->D-1))) {
 361           /* change at least one parameter */
 362           ga->tmp[n] = ga->tmp[n] + ga->FF*(ga->bestit[n] - ga->tmp[n]) +
 363             ga->FF*(ga->pold[r1][n]-ga->pold[r2][n]);
 364         }
 365         n = (n+1)%ga->D;
 366       }
 367       break;
 368
 369       /*-------DE/ga->best/2/bin------*/
 370     case 9:
 371       for (L=0; L<ga->D; L++) {
 372         /* perform ga->D binomial trials */
 373         if ((rando(&ga->seed) < ga->CR) || (L == (ga->D-1))) {
 374           /* change at least one parameter */
 375           ga->tmp[n] = ga->bestit[n] +
 376             (ga->pold[r1][n]+ga->pold[r2][n]-ga->pold[r3][n]-ga->pold[r4][n])*ga->FF;
 377         }
 378         n = (n+1)%ga->D;
 379       }
 380       break;
 381
 382       /*-------DE/rand/2/bin-------*/
 383     default:
 384       for (L=0; L<ga->D; L++) {
 385         /* perform ga->D binomial trials */
 386         if ((rando(&ga->seed) < ga->CR) || (L == (ga->D-1))) {
 387           /* change at least one parameter */
 388           ga->tmp[n] = ga->pold[r5][n] +
 389             (ga->pold[r1][n]+ga->pold[r2][n]-ga->pold[r3][n]-ga->pold[r4][n])*ga->FF;
 390         }
 391         n = (n+1)%ga->D;
 392       }
 393       break;
 394     }
 395
 396     /*===Trial mutation now in ga->tmp[]. Test how good this choice really was.==*/
 397     copy2range(ga->D,ga->tmp,range);
 398   }
 399 }
 400
 401 bool print_ga(FILE *fp,t_genalg *ga,real msf,tensor pres,rvec scale,
 402               real energy,t_range range[],real tol)
 403 {
 404   static int nfeval=0;          /* number of function evaluations     */
 405   static bool bImproved;
 406   real trial_cost;
 407   real cvar;            /* computes the cost variance         */
 408   real cmean;           /* mean cost                          */
 409   int  i,j;
 410   real **pswap;
 411
 412   trial_cost = cost(pres,msf,energy);
 413   if (nfeval < ga->NP) {
 414     ga->cost[nfeval]   = trial_cost;
 415     ga->msf[nfeval]    = msf;
 416     ga->energy[nfeval] = energy;
 417     copy_mat(pres,ga->pres[nfeval]);
 418     copy_rvec(scale,ga->scale[nfeval]);
 419     if (debug) {
 420       pr_rvec(debug,0,"scale",scale,DIM,TRUE);
 421       pr_rvec(debug,0,"pold ",ga->pold[nfeval]+4,DIM,TRUE);
 422     }
 423     nfeval++;
 424     return FALSE;
 425   }
 426   /* When we get here we have done an initial evaluation for all
 427    * animals in the population
 428    */
 429   if (ga->ipop == 0)
 430     bImproved = FALSE;
 431
 432   /* First iteration after first round of trials */
 433   if (nfeval == ga->NP) {
 434     /* Evaluate who is ga->best */
 435     ga->imin = 0;
 436     for (j=1; (j<ga->NP); j++) {
 437       if (ga->cost[j] < ga->cost[ga->imin])
 438         ga->imin = j;
 439     }
 440     assignd(ga->D,ga->best,ga->pold[ga->imin]);
 441     /* save best member ever          */
 442     assignd(ga->D,ga->bestit,ga->pold[ga->imin]);
 443     /* save best member of generation */
 444   }
 445
 446   if (trial_cost < ga->cost[ga->ipop]) {
 447     if (trial_cost < ga->cost[ga->imin]) {
 448       /* Was this a new minimum? */
 449       /* if so, reset cmin to new low...*/
 450       ga->imin = ga->ipop;
 451       assignd(ga->D,ga->best,ga->tmp);
 452       bImproved = TRUE;
 453     }
 454     /* improved objective function value ? */
 455     ga->cost[ga->ipop]   = trial_cost;
 456
 457     ga->msf[ga->ipop]    = msf;
 458     ga->energy[ga->ipop] = energy;
 459     copy_mat(pres,ga->pres[ga->ipop]);
 460     copy_rvec(scale,ga->scale[ga->ipop]);
 461
 462     assignd(ga->D,ga->pnew[ga->ipop],ga->tmp);
 463
 464   }
 465   else {
 466     /* replace target with old value */
 467     assignd(ga->D,ga->pnew[ga->ipop],ga->pold[ga->ipop]);
 468   }
 469   /* #define SCALE_DEBUG*/
 470 #ifdef SCALE_DEBUG
 471   if (ga->D > 5) {
 472     rvec dscale;
 473     rvec_sub(ga->scale[ga->imin],&(ga->best[ga->D-3]),dscale);
 474     if (norm(dscale) > 0) {
 475       pr_rvec(fp,0,"scale",scale,DIM,TRUE);
 476       pr_rvec(fp,0,"best ",&(ga->best[ga->D-3]),DIM,TRUE);
 477       fprintf(fp,"imin = %d, ipop = %d, nfeval = %d\n",ga->imin,
 478               ga->ipop,nfeval);
 479       gmx_fatal(FARGS,"Scale inconsistency");
 480     }
 481   }
 482 #endif
 483
 484   /* Increase population member count */
 485   ga->ipop++;
 486
 487   /* End mutation loop through population? */
 488   if (ga->ipop == ga->NP) {
 489     /* Save ga->best population member of current iteration */
 490     assignd(ga->D,ga->bestit,ga->best);
 491
 492     /* swap population arrays. New generation becomes old one */
 493     pswap     = ga->pold;
 494     ga->pold  = ga->pnew;
 495     ga->pnew  = pswap;
 496
 497     /*----Compute the energy variance (just for monitoring purposes)-----------*/
 498     /* compute the mean value first */
 499     cmean = 0.0;
 500     for (j=0; (j<ga->NP); j++)
 501       cmean += ga->cost[j];
 502     cmean = cmean/ga->NP;
 503
 504     /* now the variance              */
 505     cvar = 0.0;
 506     for (j=0; (j<ga->NP); j++)
 507       cvar += sqr(ga->cost[j] - cmean);
 508     cvar = cvar/(ga->NP-1);
 509
 510     /*----Output part----------------------------------------------------------*/
 511     if (1 || bImproved || (nfeval == ga->NP)) {
 512       fprintf(fp,"\nGen: %5d\n  Cost: %12.3e  <Cost>: %12.3e\n"
 513               "  Ener: %8.4f RMSF: %8.3f Pres: %8.1f %8.1f  %8.1f (%8.1f)\n"
 514               "  Box-Scale: %15.10f  %15.10f  %15.10f\n",
 515               ga->gen,ga->cost[ga->imin],cmean,ga->energy[ga->imin],
 516               sqrt(ga->msf[ga->imin]),ga->pres[ga->imin][XX][XX],
 517               ga->pres[ga->imin][YY][YY],ga->pres[ga->imin][ZZ][ZZ],
 518               trace(ga->pres[ga->imin])/3.0,
 519               ga->scale[ga->imin][XX],ga->scale[ga->imin][YY],
 520               ga->scale[ga->imin][ZZ]);
 521
 522       for (j=0; (j<ga->D); j++)
 523         fprintf(fp,"\tbest[%d]=%-15.10f\n",j,ga->best[j]);
 524       if (ga->cost[ga->imin] < tol) {
 525         for (i=0; (i<ga->NP); i++) {
 526           fprintf(fp,"Animal: %3d Cost:%8.3f  Ener: %8.3f RMSF: %8.3f%s\n",
 527                   i,ga->cost[i],ga->energy[i],sqrt(ga->msf[i]),
 528                   (i == ga->imin) ? " ***" : "");
 529           for (j=0; (j<ga->D); j++)
 530             fprintf(fp,"\tParam[%d][%d]=%15.10g\n",i,j,ga->pold[i][j]);
 531         }
 532         return TRUE;
 533       }
 534       fflush(fp);
 535     }
 536   }
 537   nfeval++;
 538
 539   return FALSE;
 540 }
 541