src/tools/gmx_analyze.c

   1 /*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39 #include <math.h>
  40 #include <string.h>
  41 #include "statutil.h"
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "smalloc.h"
  45 #include "macros.h"
  46 #include "gmx_fatal.h"
  47 #include "vec.h"
  48 #include "copyrite.h"
  49 #include "futil.h"
  50 #include "readinp.h"
  51 #include "statutil.h"
  52 #include "txtdump.h"
  53 #include "gstat.h"
  54 #include "gmx_statistics.h"
  55 #include "xvgr.h"
  56
  57 /* must correspond to char *avbar_opt[] declared in main() */
  58 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
  59
  60 static void power_fit(int n,int nset,real **val,real *t)
  61 {
  62   real *x,*y,quality,a,b,r;
  63   int  s,i;
  64
  65   snew(x,n);
  66   snew(y,n);
  67
  68   if (t[0]>0) {
  69     for(i=0; i<n; i++)
  70       if (t[0]>0)
  71         x[i] = log(t[i]);
  72   } else {
  73     fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
  74     for(i=0; i<n; i++)
  75       x[i] = log(i+1);
  76   }
  77
  78   for(s=0; s<nset; s++) {
  79     i=0;
  80     for(i=0; i<n && val[s][i]>=0; i++)
  81       y[i] = log(val[s][i]);
  82     if (i < n)
  83       fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
  84     lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
  85     fprintf(stdout,"Power fit set %3d:  error %.3f  a %g  b %g\n",
  86             s+1,quality,a,exp(b));
  87   }
  88
  89   sfree(y);
  90   sfree(x);
  91 }
  92
  93 static real cosine_content(int nhp,int n,real *y)
  94      /* Assumes n equidistant points */
  95 {
  96   double fac,cosyint,yyint;
  97   int i;
  98
  99   if (n < 2)
 100     return 0;
 101
 102   fac = M_PI*nhp/(n-1);
 103
 104   cosyint = 0;
 105   yyint = 0;
 106   for(i=0; i<n; i++) {
 107     cosyint += cos(fac*i)*y[i];
 108     yyint += y[i]*y[i];
 109   }
 110
 111   return 2*cosyint*cosyint/(n*yyint);
 112 }
 113
 114 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
 115                          const output_env_t oenv)
 116 {
 117   FILE *fp;
 118   int  s;
 119   real cc;
 120
 121   fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
 122                 oenv);
 123
 124   for(s=0; s<nset; s++) {
 125     cc = cosine_content(s+1,n,val[s]);
 126     fprintf(fp," %d %g\n",s+1,cc);
 127     fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
 128             s+1,0.5*(s+1),cc);
 129   }
 130   fprintf(stdout,"\n");
 131
 132   fclose(fp);
 133 }
 134
 135 static void regression_analysis(int n,bool bXYdy,real *x,real **val)
 136 {
 137   real S,chi2,a,b,da,db,r=0;
 138
 139   printf("Fitting data to a function f(x) = ax + b\n");
 140   printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 141   printf("Error estimates will be given if w_i (sigma) values are given\n");
 142   printf("(use option -xydy).\n\n");
 143   if (bXYdy)
 144     lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S);
 145   else
 146     lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S);
 147   chi2 = sqr((n-2)*S);
 148   printf("Chi2                    = %g\n",chi2);
 149   printf("S (Sqrt(Chi2/(n-2))     = %g\n",S);
 150   printf("Correlation coefficient = %.1f%%\n",100*r);
 151   printf("\n");
 152   if (bXYdy) {
 153     printf("a    = %g +/- %g\n",a,da);
 154     printf("b    = %g +/- %g\n",b,db);
 155   }
 156   else {
 157     printf("a    = %g\n",a);
 158     printf("b    = %g\n",b);
 159   }
 160 }
 161
 162 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
 163                const output_env_t oenv)
 164 {
 165   FILE *fp;
 166   int  i,s;
 167   double min,max;
 168   int  nbin;
 169 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 170   long long int *histo;
 171 #else
 172   double        *histo;
 173 #endif
 174
 175   min = val[0][0];
 176   max = val[0][0];
 177   for(s=0; s<nset; s++)
 178     for(i=0; i<n; i++)
 179       if (val[s][i] < min)
 180         min = val[s][i];
 181       else if (val[s][i] > max)
 182         max = val[s][i];
 183
 184   min = binwidth*floor(min/binwidth);
 185   max = binwidth*ceil(max/binwidth);
 186   if (min != 0)
 187     min -= binwidth;
 188   max += binwidth;
 189
 190   nbin = (int)((max - min)/binwidth + 0.5) + 1;
 191   fprintf(stderr,"Making distributions with %d bins\n",nbin);
 192   snew(histo,nbin);
 193   fp = xvgropen(distfile,"Distribution","","",oenv);
 194   for(s=0; s<nset; s++) {
 195     for(i=0; i<nbin; i++)
 196       histo[i] = 0;
 197     for(i=0; i<n; i++)
 198       histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 199     for(i=0; i<nbin; i++)
 200       fprintf(fp," %g  %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
 201     if (s < nset-1)
 202       fprintf(fp,"&\n");
 203   }
 204   fclose(fp);
 205 }
 206
 207 static int real_comp(const void *a,const void *b)
 208 {
 209   real dif = *(real *)a - *(real *)b;
 210
 211   if (dif < 0)
 212     return -1;
 213   else if (dif > 0)
 214     return 1;
 215   else
 216     return 0;
 217 }
 218
 219 static void average(const char *avfile,int avbar_opt,
 220                     int n, int nset,real **val,real *t)
 221 {
 222   FILE   *fp;
 223   int    i,s,edge=0;
 224   double av,var,err;
 225   real   *tmp=NULL;
 226
 227   fp = ffopen(avfile,"w");
 228   if ((avbar_opt == avbarERROR) && (nset == 1))
 229     avbar_opt = avbarNONE;
 230   if (avbar_opt != avbarNONE) {
 231     if (avbar_opt == avbar90) {
 232       snew(tmp,nset);
 233       fprintf(fp,"@TYPE xydydy\n");
 234       edge = (int)(nset*0.05+0.5);
 235       fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
 236               " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
 237     } else
 238       fprintf(fp,"@TYPE xydy\n");
 239   }
 240
 241   for(i=0; i<n; i++) {
 242     av = 0;
 243     for(s=0; s<nset; s++)
 244       av += val[s][i];
 245     av /= nset;
 246     fprintf(fp," %g %g",t[i],av);
 247     var = 0;
 248     if (avbar_opt != avbarNONE) {
 249       if (avbar_opt == avbar90) {
 250         for(s=0; s<nset; s++)
 251           tmp[s] = val[s][i];
 252         qsort(tmp,nset,sizeof(tmp[0]),real_comp);
 253         fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
 254       } else {
 255         for(s=0; s<nset; s++)
 256           var += sqr(val[s][i]-av);
 257         if (avbar_opt == avbarSTDDEV)
 258           err = sqrt(var/nset);
 259         else
 260           err = sqrt(var/(nset*(nset-1)));
 261         fprintf(fp," %g",err);
 262       }
 263     }
 264     fprintf(fp,"\n");
 265   }
 266   fclose(fp);
 267
 268   if (avbar_opt == avbar90)
 269     sfree(tmp);
 270 }
 271
 272 static real anal_ee_inf(real *parm,real T)
 273 {
 274   return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 275 }
 276
 277 static real anal_ee(real *parm,real T,real t)
 278 {
 279   real e1,e2;
 280
 281   if (parm[0])
 282     e1 = exp(-t/parm[0]);
 283   else
 284     e1 = 1;
 285   if (parm[2])
 286     e2 = exp(-t/parm[2]);
 287   else
 288     e2 = 1;
 289
 290   return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
 291               parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
 292 }
 293
 294 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
 295                            int nset, double *av,double *sig,real **val,real dt,
 296                            bool bFitAc,bool bSingleExpFit,bool bAllowNegLTCorr,
 297                            const output_env_t oenv)
 298 {
 299     FILE   *fp;
 300     int    bs,prev_bs,nbs,nb;
 301     real   spacing,nbr;
 302     int    s,i,j;
 303     double blav,var;
 304     char   **leg;
 305     real   *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
 306     real   fitparm[4];
 307     real   ee,a,tau1,tau2;
 308
 309     if (n < 4)
 310     {
 311       fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
 312
 313       return;
 314     }
 315
 316     fp = xvgropen(eefile,"Error estimates",
 317                   "Block size (time)","Error estimate", oenv);
 318     if (get_print_xvgr_codes(oenv))
 319     {
 320         fprintf(fp,
 321                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 322                 (n-1)*dt,n);
 323     }
 324     snew(leg,2*nset);
 325     xvgr_legend(fp,2*nset,leg,oenv);
 326     sfree(leg);
 327
 328     spacing = pow(2,1.0/resol);
 329     snew(tbs,n);
 330     snew(ybs,n);
 331     snew(fitsig,n);
 332     for(s=0; s<nset; s++)
 333     {
 334         nbs = 0;
 335         prev_bs = 0;
 336         nbr = nb_min;
 337         while (nbr <= n)
 338         {
 339             bs = n/(int)nbr;
 340             if (bs != prev_bs)
 341             {
 342                 nb = n/bs;
 343                 var = 0;
 344                 for(i=0; i<nb; i++)
 345                 {
 346                     blav=0;
 347                     for (j=0; j<bs; j++)
 348                     {
 349                         blav += val[s][bs*i+j];
 350                     }
 351                     var += sqr(av[s] - blav/bs);
 352                 }
 353                 tbs[nbs] = bs*dt;
 354                 if (sig[s] == 0)
 355                 {
 356                     ybs[nbs] = 0;
 357                 }
 358                 else
 359                 {
 360                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 361                 }
 362                 nbs++;
 363             }
 364             nbr *= spacing;
 365             nb = (int)(nbr+0.5);
 366             prev_bs = bs;
 367         }
 368         if (sig[s] == 0)
 369         {
 370             ee   = 0;
 371             a    = 1;
 372             tau1 = 0;
 373             tau2 = 0;
 374         }
 375         else
 376         {
 377             for(i=0; i<nbs/2; i++)
 378             {
 379                 rtmp         = tbs[i];
 380                 tbs[i]       = tbs[nbs-1-i];
 381                 tbs[nbs-1-i] = rtmp;
 382                 rtmp         = ybs[i];
 383                 ybs[i]       = ybs[nbs-1-i];
 384                 ybs[nbs-1-i] = rtmp;
 385             }
 386             /* The initial slope of the normalized ybs^2 is 1.
 387              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 388              * From this we take our initial guess for tau1.
 389              */
 390             twooe = 2/exp(1);
 391             i = -1;
 392             do
 393             {
 394                 i++;
 395                 tau1_est = tbs[i];
 396             } while (i < nbs - 1 &&
 397                      (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 398
 399             if (ybs[0] > ybs[1])
 400             {
 401                 fprintf(stdout,"Data set %d has strange time correlations:\n"
 402                         "the std. error using single points is larger than that of blocks of 2 points\n"
 403                         "The error estimate might be inaccurate, check the fit\n",
 404                         s+1);
 405                 /* Use the total time as tau for the fitting weights */
 406                 tau_sig = (n - 1)*dt;
 407             }
 408             else
 409             {
 410                 tau_sig = tau1_est;
 411             }
 412
 413             if (debug)
 414             {
 415                 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
 416             }
 417
 418             /* Generate more or less appropriate sigma's,
 419              * also taking the density of points into account.
 420              */
 421             for(i=0; i<nbs; i++)
 422             {
 423                 if (i == 0)
 424                 {
 425                     dens = tbs[1]/tbs[0] - 1;
 426                 }
 427                 else if (i == nbs-1)
 428                 {
 429                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 430                 }
 431                 else
 432                 {
 433                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 434                 }
 435                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 436             }
 437
 438             if (!bSingleExpFit)
 439             {
 440                 fitparm[0] = tau1_est;
 441                 fitparm[1] = 0.95;
 442                 /* We set the initial guess for tau2
 443                  * to halfway between tau1_est and the total time (on log scale).
 444                  */
 445                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 446                 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
 447                          bDebugMode(),effnERREST,fitparm,0);
 448                 fitparm[3] = 1-fitparm[1];
 449             }
 450             if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
 451                 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
 452             {
 453                 if (!bSingleExpFit)
 454                 {
 455                     if (fitparm[2] > (n-1)*dt)
 456                     {
 457                         fprintf(stdout,
 458                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 459                                 "         the statistics might be bad\n",
 460                                 (n-1)*dt);
 461                     }
 462                     else
 463                     {
 464                         fprintf(stdout,"a fitted parameter is negative\n");
 465                     }
 466                     fprintf(stdout,"invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 467                             sig[s]*anal_ee_inf(fitparm,n*dt),
 468                             fitparm[1],fitparm[0],fitparm[2]);
 469                     /* Do a fit with tau2 fixed at the total time.
 470                      * One could also choose any other large value for tau2.
 471                      */
 472                     fitparm[0] = tau1_est;
 473                     fitparm[1] = 0.95;
 474                     fitparm[2] = (n-1)*dt;
 475                     fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
 476                     do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
 477                              effnERREST,fitparm,4);
 478                     fitparm[3] = 1-fitparm[1];
 479                 }
 480                 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
 481                     || (fitparm[1]>1 && !bAllowNegLTCorr))
 482                 {
 483                     if (!bSingleExpFit) {
 484                         fprintf(stdout,"a fitted parameter is negative\n");
 485                         fprintf(stdout,"invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 486                                 sig[s]*anal_ee_inf(fitparm,n*dt),
 487                                 fitparm[1],fitparm[0],fitparm[2]);
 488                     }
 489                     /* Do a single exponential fit */
 490                     fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
 491                     fitparm[0] = tau1_est;
 492                     fitparm[1] = 1.0;
 493                     fitparm[2] = 0.0;
 494                     do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
 495                              effnERREST,fitparm,6);
 496                     fitparm[3] = 1-fitparm[1];
 497                 }
 498             }
 499             ee   = sig[s]*anal_ee_inf(fitparm,n*dt);
 500             a    = fitparm[1];
 501             tau1 = fitparm[0];
 502             tau2 = fitparm[2];
 503         }
 504         fprintf(stdout,"Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 505                 s+1,ee,a,tau1,tau2);
 506         fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
 507         fprintf(fp,"@ legend string %d \"ee %6g\"\n",
 508                 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
 509         for(i=0; i<nbs; i++)
 510         {
 511             fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
 512                     sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
 513         }
 514
 515         if (bFitAc)
 516         {
 517             int fitlen;
 518             real *ac,acint,ac_fit[4];
 519
 520             snew(ac,n);
 521             for(i=0; i<n; i++) {
 522                 ac[i] = val[s][i] - av[s];
 523                 if (i > 0)
 524                     fitsig[i] = sqrt(i);
 525                 else
 526                     fitsig[i] = 1;
 527             }
 528             low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
 529                             dt,eacNormal,1,FALSE,TRUE,
 530                             FALSE,0,0,effnNONE,0);
 531
 532             fitlen = n/nb_min;
 533
 534             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 535             acint = 0.5*ac[0];
 536             for(i=1; i<=fitlen/2; i++)
 537             {
 538                 acint += ac[i];
 539             }
 540             acint *= dt;
 541
 542             /* Generate more or less appropriate sigma's */
 543             for(i=0; i<=fitlen; i++)
 544             {
 545                 fitsig[i] = sqrt(acint + dt*i);
 546             }
 547
 548             ac_fit[0] = 0.5*acint;
 549             ac_fit[1] = 0.95;
 550             ac_fit[2] = 10*acint;
 551             do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
 552                      bDebugMode(),effnEXP3,ac_fit,0);
 553             ac_fit[3] = 1 - ac_fit[1];
 554
 555             fprintf(stdout,"Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 556                     s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
 557                     ac_fit[1],ac_fit[0],ac_fit[2]);
 558
 559             fprintf(fp,"&\n");
 560             for(i=0; i<nbs; i++)
 561             {
 562                 fprintf(fp,"%g %g\n",tbs[i],
 563                         sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
 564             }
 565
 566             sfree(ac);
 567         }
 568         if (s < nset-1)
 569         {
 570             fprintf(fp,"&\n");
 571         }
 572     }
 573     sfree(fitsig);
 574     sfree(ybs);
 575     sfree(tbs);
 576     fclose(fp);
 577 }
 578
 579 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
 580                          bool bError,real fit_start,real smooth_tail_start,
 581                          const output_env_t oenv)
 582 {
 583   const real tol = 1e-8;
 584   real *kt;
 585   real weight,d2;
 586   int  j;
 587
 588   please_cite(stdout,"Spoel2006b");
 589
 590   /* Compute negative derivative k(t) = -dc(t)/dt */
 591   if (!bError) {
 592     snew(kt,nn);
 593     compute_derivative(nn,time,val[0],kt);
 594     for(j=0; (j<nn); j++)
 595       kt[j] = -kt[j];
 596     if (debug) {
 597       d2 = 0;
 598       for(j=0; (j<nn); j++)
 599         d2 += sqr(kt[j] - val[3][j]);
 600       fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
 601     }
 602     analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
 603                  temp,smooth_tail_start,oenv);
 604     sfree(kt);
 605   }
 606   else if (nset == 6) {
 607     analyse_corr(nn,time,val[0],val[2],val[4],
 608                  val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
 609   }
 610   else {
 611     printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 612     printf("Not doing anything. Sorry.\n");
 613   }
 614 }
 615
 616 static void filter(real flen,int n,int nset,real **val,real dt,
 617                    const output_env_t oenv)
 618 {
 619   int    f,s,i,j;
 620   double *filt,sum,vf,fluc,fluctot;
 621
 622   f = (int)(flen/(2*dt));
 623   snew(filt,f+1);
 624   filt[0] = 1;
 625   sum = 1;
 626   for(i=1; i<=f; i++) {
 627     filt[i] = cos(M_PI*dt*i/flen);
 628     sum += 2*filt[i];
 629   }
 630   for(i=0; i<=f; i++)
 631     filt[i] /= sum;
 632   fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
 633   fprintf(stdout,"  using a filter of length %g of %d points\n",flen,2*f+1);
 634   fluctot = 0;
 635   for(s=0; s<nset; s++) {
 636     fluc = 0;
 637     for(i=f; i<n-f; i++) {
 638       vf = filt[0]*val[s][i];
 639       for(j=1; j<=f; j++)
 640         vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 641       fluc += sqr(val[s][i] - vf);
 642     }
 643     fluc /= n - 2*f;
 644     fluctot += fluc;
 645     fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
 646   }
 647   fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
 648   fprintf(stdout,"\n");
 649
 650   sfree(filt);
 651 }
 652
 653 static void do_fit(FILE *out,int n,bool bYdy,int ny,real *x0,real **val,
 654                    int npargs,t_pargs *ppa,const output_env_t oenv)
 655 {
 656   real *c1=NULL,*sig=NULL,*fitparm;
 657   real dt=0,tendfit,tbeginfit;
 658   int  i,efitfn,nparm;
 659
 660   efitfn = get_acffitfn();
 661   nparm  = nfp_ffn[efitfn];
 662   fprintf(out,"Will fit to the following function:\n");
 663   fprintf(out,"%s\n",longs_ffn[efitfn]);
 664   c1 = val[n];
 665   if (bYdy) {
 666     c1  = val[n];
 667     sig = val[n+1];
 668     fprintf(out,"Using two columns as y and sigma values\n");
 669   } else {
 670     snew(sig,ny);
 671   }
 672   if (opt2parg_bSet("-beginfit",npargs,ppa)) {
 673     tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
 674   } else {
 675     tbeginfit = x0 ? x0[0]    : 0;
 676   }
 677   if (opt2parg_bSet("-endfit",npargs,ppa)) {
 678     tendfit   = opt2parg_real("-endfit",npargs,ppa);
 679   } else {
 680     tendfit   = x0 ? x0[ny-1] : (ny-1)*dt;
 681   }
 682
 683   snew(fitparm,nparm);
 684   switch(efitfn) {
 685   case effnEXP1:
 686     fitparm[0] = 0.5;
 687     break;
 688   case effnEXP2:
 689     fitparm[0] = 0.5;
 690     fitparm[1] = c1[0];
 691     break;
 692   case effnEXP3:
 693     fitparm[0] = 1.0;
 694     fitparm[1] = 0.5*c1[0];
 695     fitparm[2] = 10.0;
 696     break;
 697   case effnEXP5:
 698     fitparm[0] = fitparm[2] = 0.5*c1[0];
 699     fitparm[1] = 10;
 700     fitparm[3] = 40;
 701     fitparm[4] = 0;
 702     break;
 703   case effnEXP7:
 704     fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 705     fitparm[1] = 1;
 706     fitparm[3] = 10;
 707     fitparm[5] = 100;
 708     fitparm[6] = 0;
 709     break;
 710   case effnEXP9:
 711     fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 712     fitparm[1] = 0.1;
 713     fitparm[3] = 1;
 714     fitparm[5] = 10;
 715     fitparm[7] = 100;
 716     fitparm[8] = 0;
 717     break;
 718   default:
 719     fprintf(out,"Warning: don't know how to initialize the parameters\n");
 720     for(i=0; (i<nparm); i++)
 721       fitparm[i] = 1;
 722   }
 723   fprintf(out,"Starting parameters:\n");
 724   for(i=0; (i<nparm); i++)
 725     fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
 726   if (do_lmfit(ny,c1,sig,dt,x0,tbeginfit,tendfit,
 727                oenv,bDebugMode(),efitfn,fitparm,0)) {
 728     for(i=0; (i<nparm); i++)
 729       fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
 730   }
 731   else {
 732     fprintf(out,"No solution was found\n");
 733   }
 734 }
 735
 736 int gmx_analyze(int argc,char *argv[])
 737 {
 738   static const char *desc[] = {
 739     "g_analyze reads an ascii file and analyzes data sets.",
 740     "A line in the input file may start with a time",
 741     "(see option [TT]-time[tt]) and any number of y values may follow.",
 742     "Multiple sets can also be",
 743     "read when they are seperated by & (option [TT]-n[tt]),",
 744     "in this case only one y value is read from each line.",
 745     "All lines starting with # and @ are skipped.",
 746     "All analyses can also be done for the derivative of a set",
 747     "(option [TT]-d[tt]).[PAR]",
 748
 749     "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
 750     "points are equidistant in time.[PAR]",
 751
 752     "g_analyze always shows the average and standard deviation of each",
 753     "set. For each set it also shows the relative deviation of the third",
 754     "and forth cumulant from those of a Gaussian distribution with the same",
 755     "standard deviation.[PAR]",
 756
 757     "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
 758
 759     "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
 760     "i/2 periods. The formula is:[BR]"
 761     "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
 762     "This is useful for principal components obtained from covariance",
 763     "analysis, since the principal components of random diffusion are",
 764     "pure cosines.[PAR]",
 765
 766     "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
 767
 768     "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
 769
 770     "Option [TT]-av[tt] produces the average over the sets.",
 771     "Error bars can be added with the option [TT]-errbar[tt].",
 772     "The errorbars can represent the standard deviation, the error",
 773     "(assuming the points are independent) or the interval containing",
 774     "90% of the points, by discarding 5% of the points at the top and",
 775     "the bottom.[PAR]",
 776
 777     "Option [TT]-ee[tt] produces error estimates using block averaging.",
 778     "A set is divided in a number of blocks and averages are calculated for",
 779     "each block. The error for the total average is calculated from",
 780     "the variance between averages of the m blocks B_i as follows:",
 781     "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
 782     "These errors are plotted as a function of the block size.",
 783     "Also an analytical block average curve is plotted, assuming",
 784     "that the autocorrelation is a sum of two exponentials.",
 785     "The analytical curve for the block average is:[BR]",
 786     "f(t) = sigma sqrt(2/T (  a   (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
 787     "                       (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
 788     "where T is the total time.",
 789     "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
 790     "When the actual block average is very close to the analytical curve,",
 791     "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
 792     "The complete derivation is given in",
 793     "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
 794
 795     "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
 796     "of each set and over all sets with respect to a filtered average.",
 797     "The filter is proportional to cos(pi t/len) where t goes from -len/2",
 798     "to len/2. len is supplied with the option [TT]-filter[tt].",
 799     "This filter reduces oscillations with period len/2 and len by a factor",
 800     "of 0.79 and 0.33 respectively.[PAR]",
 801
 802     "Option [TT]-g[tt] fits the data to the function given with option",
 803     "[TT]-fitfn[tt].[PAR]",
 804
 805     "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
 806     "by fitting to a t + b on log-log scale. All points after the first",
 807     "zero or negative value are ignored.[PAR]"
 808
 809     "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
 810     "on output from [TT]g_hbond[tt]. The input file can be taken directly",
 811     "from [TT]g_hbond -ac[tt], and then the same result should be produced."
 812   };
 813   static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
 814   static bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
 815   static bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
 816   static bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
 817   static int  nsets_in=1,d=1,nb_min=4,resol=10;
 818   static real temp=298.15,fit_start=1,smooth_tail_start=-1;
 819
 820   /* must correspond to enum avbar* declared at beginning of file */
 821   static const char *avbar_opt[avbarNR+1] = {
 822     NULL, "none", "stddev", "error", "90", NULL
 823   };
 824
 825   t_pargs pa[] = {
 826     { "-time",    FALSE, etBOOL, {&bHaveT},
 827       "Expect a time in the input" },
 828     { "-b",       FALSE, etREAL, {&tb},
 829       "First time to read from set" },
 830     { "-e",       FALSE, etREAL, {&te},
 831       "Last time to read from set" },
 832     { "-n",       FALSE, etINT, {&nsets_in},
 833       "Read # sets seperated by &" },
 834     { "-d",       FALSE, etBOOL, {&bDer},
 835         "Use the derivative" },
 836     { "-dp",      FALSE, etINT, {&d},
 837       "HIDDENThe derivative is the difference over # points" },
 838     { "-bw",      FALSE, etREAL, {&binwidth},
 839       "Binwidth for the distribution" },
 840     { "-errbar",  FALSE, etENUM, {avbar_opt},
 841       "Error bars for -av" },
 842     { "-integrate",FALSE,etBOOL, {&bIntegrate},
 843       "Integrate data function(s) numerically using trapezium rule" },
 844     { "-aver_start",FALSE, etREAL, {&aver_start},
 845       "Start averaging the integral from here" },
 846     { "-xydy",    FALSE, etBOOL, {&bXYdy},
 847       "Interpret second data set as error in the y values for integrating" },
 848     { "-regression",FALSE,etBOOL,{&bRegression},
 849       "Perform a linear regression analysis on the data" },
 850     { "-luzar",   FALSE, etBOOL, {&bLuzar},
 851       "Do a Luzar and Chandler analysis on a correlation function and related as produced by g_hbond. When in addition the -xydy flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
 852     { "-temp",    FALSE, etREAL, {&temp},
 853       "Temperature for the Luzar hydrogen bonding kinetics analysis" },
 854     { "-fitstart", FALSE, etREAL, {&fit_start},
 855       "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
 856     { "-smooth",FALSE, etREAL, {&smooth_tail_start},
 857       "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
 858     { "-nbmin",   FALSE, etINT, {&nb_min},
 859       "HIDDENMinimum number of blocks for block averaging" },
 860     { "-resol", FALSE, etINT, {&resol},
 861       "HIDDENResolution for the block averaging, block size increases with"
 862     " a factor 2^(1/#)" },
 863     { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
 864       "HIDDENAlways use a single exponential fit for the error estimate" },
 865     { "-eenlc", FALSE, etBOOL, {&bEENLC},
 866       "HIDDENAllow a negative long-time correlation" },
 867     { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
 868       "HIDDENAlso plot analytical block average using a autocorrelation fit" },
 869     { "-filter",  FALSE, etREAL, {&filtlen},
 870       "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
 871     { "-power", FALSE, etBOOL, {&bPower},
 872       "Fit data to: b t^a" },
 873     { "-subav", FALSE, etBOOL, {&bSubAv},
 874       "Subtract the average before autocorrelating" },
 875     { "-oneacf", FALSE, etBOOL, {&bAverCorr},
 876       "Calculate one ACF over all sets" }
 877   };
 878 #define NPA asize(pa)
 879
 880   FILE     *out,*out_fit;
 881   int      n,nlast,s,nset,i,j=0;
 882   real     **val,*t,dt,tot,error;
 883   double   *av,*sig,cum1,cum2,cum3,cum4,db;
 884   const char     *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*fitfile;
 885   output_env_t oenv;
 886
 887   t_filenm fnm[] = {
 888     { efXVG, "-f",    "graph",    ffREAD   },
 889     { efXVG, "-ac",   "autocorr", ffOPTWR  },
 890     { efXVG, "-msd",  "msd",      ffOPTWR  },
 891     { efXVG, "-cc",   "coscont",  ffOPTWR  },
 892     { efXVG, "-dist", "distr",    ffOPTWR  },
 893     { efXVG, "-av",   "average",  ffOPTWR  },
 894     { efXVG, "-ee",   "errest",   ffOPTWR  },
 895     { efLOG, "-g",    "fitlog",   ffOPTWR  }
 896   };
 897 #define NFILE asize(fnm)
 898
 899   int     npargs;
 900   t_pargs *ppa;
 901
 902   npargs = asize(pa);
 903   ppa    = add_acf_pargs(&npargs,pa);
 904
 905   CopyRight(stderr,argv[0]);
 906   parse_common_args(&argc,argv,PCA_CAN_VIEW,
 907                     NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
 908
 909   acfile   = opt2fn_null("-ac",NFILE,fnm);
 910   msdfile  = opt2fn_null("-msd",NFILE,fnm);
 911   ccfile   = opt2fn_null("-cc",NFILE,fnm);
 912   distfile = opt2fn_null("-dist",NFILE,fnm);
 913   avfile   = opt2fn_null("-av",NFILE,fnm);
 914   eefile   = opt2fn_null("-ee",NFILE,fnm);
 915   if (opt2parg_bSet("-fitfn",npargs,ppa))
 916     fitfile  = opt2fn("-g",NFILE,fnm);
 917   else
 918     fitfile  = opt2fn_null("-g",NFILE,fnm);
 919
 920   val=read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
 921                     opt2parg_bSet("-b",npargs,ppa),tb,
 922                     opt2parg_bSet("-e",npargs,ppa),te,
 923                     nsets_in,&nset,&n,&dt,&t);
 924   printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
 925
 926   if (bDer) {
 927     printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
 928             d,d);
 929     n -= d;
 930     for(s=0; s<nset; s++)
 931       for(i=0; (i<n); i++)
 932         val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
 933   }
 934   if (bIntegrate) {
 935     real sum,stddev;
 936     printf("Calculating the integral using the trapezium rule\n");
 937
 938     if (bXYdy) {
 939       sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
 940       printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
 941     }
 942     else {
 943       for(s=0; s<nset; s++) {
 944         sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
 945         printf("Integral %d  %10.5f  +/- %10.5f\n",s+1,sum,stddev);
 946       }
 947     }
 948   }
 949   if (fitfile) {
 950     out_fit = ffopen(fitfile,"w");
 951     if (bXYdy && nset>=2) {
 952       do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
 953     } else {
 954       for(s=0; s<nset; s++)
 955         do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
 956     }
 957     fclose(out_fit);
 958   }
 959
 960   printf("                                      std. dev.    relative deviation of\n");
 961   printf("                       standard       ---------   cumulants from those of\n");
 962   printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
 963   printf("                                                      cum. 3   cum. 4\n");
 964   snew(av,nset);
 965   snew(sig,nset);
 966   for(s=0; (s<nset); s++) {
 967     cum1 = 0;
 968     cum2 = 0;
 969     cum3 = 0;
 970     cum4 = 0;
 971     for(i=0; (i<n); i++)
 972       cum1 += val[s][i];
 973     cum1 /= n;
 974     for(i=0; (i<n); i++) {
 975       db = val[s][i]-cum1;
 976       cum2 += db*db;
 977       cum3 += db*db*db;
 978       cum4 += db*db*db*db;
 979     }
 980     cum2  /= n;
 981     cum3  /= n;
 982     cum4  /= n;
 983     av[s]  = cum1;
 984     sig[s] = sqrt(cum2);
 985     if (n > 1)
 986       error = sqrt(cum2/(n-1));
 987     else
 988       error = 0;
 989     printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
 990            s+1,av[s],sig[s],error,
 991            sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
 992            sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
 993   }
 994   printf("\n");
 995
 996   if (filtlen)
 997     filter(filtlen,n,nset,val,dt,oenv);
 998
 999   if (msdfile) {
1000     out=xvgropen(msdfile,"Mean square displacement",
1001                  "time","MSD (nm\\S2\\N)",oenv);
1002     nlast = (int)(n*frac);
1003     for(s=0; s<nset; s++) {
1004       for(j=0; j<=nlast; j++) {
1005         if (j % 100 == 0)
1006           fprintf(stderr,"\r%d",j);
1007         tot=0;
1008         for(i=0; i<n-j; i++)
1009           tot += sqr(val[s][i]-val[s][i+j]);
1010         tot /= (real)(n-j);
1011         fprintf(out," %g %8g\n",dt*j,tot);
1012       }
1013       if (s<nset-1)
1014         fprintf(out,"&\n");
1015     }
1016     fclose(out);
1017     fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1018   }
1019   if (ccfile)
1020     plot_coscont(ccfile,n,nset,val,oenv);
1021
1022   if (distfile)
1023     histogram(distfile,binwidth,n,nset,val,oenv);
1024   if (avfile)
1025     average(avfile,nenum(avbar_opt),n,nset,val,t);
1026   if (eefile)
1027     estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1028                    bEeFitAc,bEESEF,bEENLC,oenv);
1029   if (bPower)
1030     power_fit(n,nset,val,t);
1031   if (acfile) {
1032     if (bSubAv)
1033       for(s=0; s<nset; s++)
1034         for(i=0; i<n; i++)
1035           val[s][i] -= av[s];
1036     do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1037                 eacNormal,bAverCorr);
1038   }
1039   if (bRegression)
1040     regression_analysis(n,bXYdy,t,val);
1041
1042   if (bLuzar)
1043     luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1044
1045   view_all(oenv,NFILE, fnm);
1046
1047   thanx(stderr);
1048
1049   return 0;
1050 }
1051