Don't use POSIX fnmatch() for pattern matching.
[gromacs/qmmm-gamess-us.git] / src / tools / gmx_analyze.c
blob159adb89b290caadf95c94734f3457008898e425
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include "statutil.h"
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "macros.h"
46 #include "gmx_fatal.h"
47 #include "vec.h"
48 #include "copyrite.h"
49 #include "futil.h"
50 #include "readinp.h"
51 #include "statutil.h"
52 #include "txtdump.h"
53 #include "gstat.h"
54 #include "gmx_statistics.h"
55 #include "xvgr.h"
57 /* must correspond to char *avbar_opt[] declared in main() */
58 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
60 static void power_fit(int n,int nset,real **val,real *t)
62 real *x,*y,quality,a,b,r;
63 int s,i;
65 snew(x,n);
66 snew(y,n);
68 if (t[0]>0) {
69 for(i=0; i<n; i++)
70 if (t[0]>0)
71 x[i] = log(t[i]);
72 } else {
73 fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
74 for(i=0; i<n; i++)
75 x[i] = log(i+1);
78 for(s=0; s<nset; s++) {
79 i=0;
80 for(i=0; i<n && val[s][i]>=0; i++)
81 y[i] = log(val[s][i]);
82 if (i < n)
83 fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
84 lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
85 fprintf(stdout,"Power fit set %3d: error %.3f a %g b %g\n",
86 s+1,quality,a,exp(b));
89 sfree(y);
90 sfree(x);
93 static real cosine_content(int nhp,int n,real *y)
94 /* Assumes n equidistant points */
96 double fac,cosyint,yyint;
97 int i;
99 if (n < 2)
100 return 0;
102 fac = M_PI*nhp/(n-1);
104 cosyint = 0;
105 yyint = 0;
106 for(i=0; i<n; i++) {
107 cosyint += cos(fac*i)*y[i];
108 yyint += y[i]*y[i];
111 return 2*cosyint*cosyint/(n*yyint);
114 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
115 const output_env_t oenv)
117 FILE *fp;
118 int s;
119 real cc;
121 fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
122 oenv);
124 for(s=0; s<nset; s++) {
125 cc = cosine_content(s+1,n,val[s]);
126 fprintf(fp," %d %g\n",s+1,cc);
127 fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
128 s+1,0.5*(s+1),cc);
130 fprintf(stdout,"\n");
132 fclose(fp);
135 static void regression_analysis(int n,bool bXYdy,real *x,real **val)
137 real S,chi2,a,b,da,db,r=0;
139 printf("Fitting data to a function f(x) = ax + b\n");
140 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
141 printf("Error estimates will be given if w_i (sigma) values are given\n");
142 printf("(use option -xydy).\n\n");
143 if (bXYdy)
144 lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S);
145 else
146 lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S);
147 chi2 = sqr((n-2)*S);
148 printf("Chi2 = %g\n",chi2);
149 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S);
150 printf("Correlation coefficient = %.1f%%\n",100*r);
151 printf("\n");
152 if (bXYdy) {
153 printf("a = %g +/- %g\n",a,da);
154 printf("b = %g +/- %g\n",b,db);
156 else {
157 printf("a = %g\n",a);
158 printf("b = %g\n",b);
162 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
163 const output_env_t oenv)
165 FILE *fp;
166 int i,s;
167 double min,max;
168 int nbin;
169 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
170 long long int *histo;
171 #else
172 double *histo;
173 #endif
175 min = val[0][0];
176 max = val[0][0];
177 for(s=0; s<nset; s++)
178 for(i=0; i<n; i++)
179 if (val[s][i] < min)
180 min = val[s][i];
181 else if (val[s][i] > max)
182 max = val[s][i];
184 min = binwidth*floor(min/binwidth);
185 max = binwidth*ceil(max/binwidth);
186 if (min != 0)
187 min -= binwidth;
188 max += binwidth;
190 nbin = (int)((max - min)/binwidth + 0.5) + 1;
191 fprintf(stderr,"Making distributions with %d bins\n",nbin);
192 snew(histo,nbin);
193 fp = xvgropen(distfile,"Distribution","","",oenv);
194 for(s=0; s<nset; s++) {
195 for(i=0; i<nbin; i++)
196 histo[i] = 0;
197 for(i=0; i<n; i++)
198 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
199 for(i=0; i<nbin; i++)
200 fprintf(fp," %g %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
201 if (s < nset-1)
202 fprintf(fp,"&\n");
204 fclose(fp);
207 static int real_comp(const void *a,const void *b)
209 real dif = *(real *)a - *(real *)b;
211 if (dif < 0)
212 return -1;
213 else if (dif > 0)
214 return 1;
215 else
216 return 0;
219 static void average(const char *avfile,int avbar_opt,
220 int n, int nset,real **val,real *t)
222 FILE *fp;
223 int i,s,edge=0;
224 double av,var,err;
225 real *tmp=NULL;
227 fp = ffopen(avfile,"w");
228 if ((avbar_opt == avbarERROR) && (nset == 1))
229 avbar_opt = avbarNONE;
230 if (avbar_opt != avbarNONE) {
231 if (avbar_opt == avbar90) {
232 snew(tmp,nset);
233 fprintf(fp,"@TYPE xydydy\n");
234 edge = (int)(nset*0.05+0.5);
235 fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
236 " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
237 } else
238 fprintf(fp,"@TYPE xydy\n");
241 for(i=0; i<n; i++) {
242 av = 0;
243 for(s=0; s<nset; s++)
244 av += val[s][i];
245 av /= nset;
246 fprintf(fp," %g %g",t[i],av);
247 var = 0;
248 if (avbar_opt != avbarNONE) {
249 if (avbar_opt == avbar90) {
250 for(s=0; s<nset; s++)
251 tmp[s] = val[s][i];
252 qsort(tmp,nset,sizeof(tmp[0]),real_comp);
253 fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
254 } else {
255 for(s=0; s<nset; s++)
256 var += sqr(val[s][i]-av);
257 if (avbar_opt == avbarSTDDEV)
258 err = sqrt(var/nset);
259 else
260 err = sqrt(var/(nset*(nset-1)));
261 fprintf(fp," %g",err);
264 fprintf(fp,"\n");
266 fclose(fp);
268 if (avbar_opt == avbar90)
269 sfree(tmp);
272 static real anal_ee_inf(real *parm,real T)
274 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
277 static real anal_ee(real *parm,real T,real t)
279 real e1,e2;
281 if (parm[0])
282 e1 = exp(-t/parm[0]);
283 else
284 e1 = 1;
285 if (parm[2])
286 e2 = exp(-t/parm[2]);
287 else
288 e2 = 1;
290 return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
291 parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
294 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
295 int nset, double *av,double *sig,real **val,real dt,
296 bool bFitAc,bool bSingleExpFit,bool bAllowNegLTCorr,
297 const output_env_t oenv)
299 FILE *fp;
300 int bs,prev_bs,nbs,nb;
301 real spacing,nbr;
302 int s,i,j;
303 double blav,var;
304 char **leg;
305 real *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
306 real fitparm[4];
307 real ee,a,tau1,tau2;
309 if (n < 4)
311 fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
313 return;
316 fp = xvgropen(eefile,"Error estimates",
317 "Block size (time)","Error estimate", oenv);
318 if (get_print_xvgr_codes(oenv))
320 fprintf(fp,
321 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
322 (n-1)*dt,n);
324 snew(leg,2*nset);
325 xvgr_legend(fp,2*nset,leg,oenv);
326 sfree(leg);
328 spacing = pow(2,1.0/resol);
329 snew(tbs,n);
330 snew(ybs,n);
331 snew(fitsig,n);
332 for(s=0; s<nset; s++)
334 nbs = 0;
335 prev_bs = 0;
336 nbr = nb_min;
337 while (nbr <= n)
339 bs = n/(int)nbr;
340 if (bs != prev_bs)
342 nb = n/bs;
343 var = 0;
344 for(i=0; i<nb; i++)
346 blav=0;
347 for (j=0; j<bs; j++)
349 blav += val[s][bs*i+j];
351 var += sqr(av[s] - blav/bs);
353 tbs[nbs] = bs*dt;
354 if (sig[s] == 0)
356 ybs[nbs] = 0;
358 else
360 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
362 nbs++;
364 nbr *= spacing;
365 nb = (int)(nbr+0.5);
366 prev_bs = bs;
368 if (sig[s] == 0)
370 ee = 0;
371 a = 1;
372 tau1 = 0;
373 tau2 = 0;
375 else
377 for(i=0; i<nbs/2; i++)
379 rtmp = tbs[i];
380 tbs[i] = tbs[nbs-1-i];
381 tbs[nbs-1-i] = rtmp;
382 rtmp = ybs[i];
383 ybs[i] = ybs[nbs-1-i];
384 ybs[nbs-1-i] = rtmp;
386 /* The initial slope of the normalized ybs^2 is 1.
387 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
388 * From this we take our initial guess for tau1.
390 twooe = 2/exp(1);
391 i = -1;
394 i++;
395 tau1_est = tbs[i];
396 } while (i < nbs - 1 &&
397 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
399 if (ybs[0] > ybs[1])
401 fprintf(stdout,"Data set %d has strange time correlations:\n"
402 "the std. error using single points is larger than that of blocks of 2 points\n"
403 "The error estimate might be inaccurate, check the fit\n",
404 s+1);
405 /* Use the total time as tau for the fitting weights */
406 tau_sig = (n - 1)*dt;
408 else
410 tau_sig = tau1_est;
413 if (debug)
415 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
418 /* Generate more or less appropriate sigma's,
419 * also taking the density of points into account.
421 for(i=0; i<nbs; i++)
423 if (i == 0)
425 dens = tbs[1]/tbs[0] - 1;
427 else if (i == nbs-1)
429 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
431 else
433 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
435 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
438 if (!bSingleExpFit)
440 fitparm[0] = tau1_est;
441 fitparm[1] = 0.95;
442 /* We set the initial guess for tau2
443 * to halfway between tau1_est and the total time (on log scale).
445 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
446 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
447 bDebugMode(),effnERREST,fitparm,0);
448 fitparm[3] = 1-fitparm[1];
450 if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
451 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
453 if (!bSingleExpFit)
455 if (fitparm[2] > (n-1)*dt)
457 fprintf(stdout,
458 "Warning: tau2 is longer than the length of the data (%g)\n"
459 " the statistics might be bad\n",
460 (n-1)*dt);
462 else
464 fprintf(stdout,"a fitted parameter is negative\n");
466 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
467 sig[s]*anal_ee_inf(fitparm,n*dt),
468 fitparm[1],fitparm[0],fitparm[2]);
469 /* Do a fit with tau2 fixed at the total time.
470 * One could also choose any other large value for tau2.
472 fitparm[0] = tau1_est;
473 fitparm[1] = 0.95;
474 fitparm[2] = (n-1)*dt;
475 fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
476 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
477 effnERREST,fitparm,4);
478 fitparm[3] = 1-fitparm[1];
480 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
481 || (fitparm[1]>1 && !bAllowNegLTCorr))
483 if (!bSingleExpFit) {
484 fprintf(stdout,"a fitted parameter is negative\n");
485 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
486 sig[s]*anal_ee_inf(fitparm,n*dt),
487 fitparm[1],fitparm[0],fitparm[2]);
489 /* Do a single exponential fit */
490 fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
491 fitparm[0] = tau1_est;
492 fitparm[1] = 1.0;
493 fitparm[2] = 0.0;
494 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
495 effnERREST,fitparm,6);
496 fitparm[3] = 1-fitparm[1];
499 ee = sig[s]*anal_ee_inf(fitparm,n*dt);
500 a = fitparm[1];
501 tau1 = fitparm[0];
502 tau2 = fitparm[2];
504 fprintf(stdout,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
505 s+1,ee,a,tau1,tau2);
506 fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
507 fprintf(fp,"@ legend string %d \"ee %6g\"\n",
508 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
509 for(i=0; i<nbs; i++)
511 fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
512 sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
515 if (bFitAc)
517 int fitlen;
518 real *ac,acint,ac_fit[4];
520 snew(ac,n);
521 for(i=0; i<n; i++) {
522 ac[i] = val[s][i] - av[s];
523 if (i > 0)
524 fitsig[i] = sqrt(i);
525 else
526 fitsig[i] = 1;
528 low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
529 dt,eacNormal,1,FALSE,TRUE,
530 FALSE,0,0,effnNONE,0);
532 fitlen = n/nb_min;
534 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
535 acint = 0.5*ac[0];
536 for(i=1; i<=fitlen/2; i++)
538 acint += ac[i];
540 acint *= dt;
542 /* Generate more or less appropriate sigma's */
543 for(i=0; i<=fitlen; i++)
545 fitsig[i] = sqrt(acint + dt*i);
548 ac_fit[0] = 0.5*acint;
549 ac_fit[1] = 0.95;
550 ac_fit[2] = 10*acint;
551 do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
552 bDebugMode(),effnEXP3,ac_fit,0);
553 ac_fit[3] = 1 - ac_fit[1];
555 fprintf(stdout,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
556 s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
557 ac_fit[1],ac_fit[0],ac_fit[2]);
559 fprintf(fp,"&\n");
560 for(i=0; i<nbs; i++)
562 fprintf(fp,"%g %g\n",tbs[i],
563 sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
566 sfree(ac);
568 if (s < nset-1)
570 fprintf(fp,"&\n");
573 sfree(fitsig);
574 sfree(ybs);
575 sfree(tbs);
576 fclose(fp);
579 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
580 bool bError,real fit_start,real smooth_tail_start,
581 const output_env_t oenv)
583 const real tol = 1e-8;
584 real *kt;
585 real weight,d2;
586 int j;
588 please_cite(stdout,"Spoel2006b");
590 /* Compute negative derivative k(t) = -dc(t)/dt */
591 if (!bError) {
592 snew(kt,nn);
593 compute_derivative(nn,time,val[0],kt);
594 for(j=0; (j<nn); j++)
595 kt[j] = -kt[j];
596 if (debug) {
597 d2 = 0;
598 for(j=0; (j<nn); j++)
599 d2 += sqr(kt[j] - val[3][j]);
600 fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
602 analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
603 temp,smooth_tail_start,oenv);
604 sfree(kt);
606 else if (nset == 6) {
607 analyse_corr(nn,time,val[0],val[2],val[4],
608 val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
610 else {
611 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
612 printf("Not doing anything. Sorry.\n");
616 static void filter(real flen,int n,int nset,real **val,real dt,
617 const output_env_t oenv)
619 int f,s,i,j;
620 double *filt,sum,vf,fluc,fluctot;
622 f = (int)(flen/(2*dt));
623 snew(filt,f+1);
624 filt[0] = 1;
625 sum = 1;
626 for(i=1; i<=f; i++) {
627 filt[i] = cos(M_PI*dt*i/flen);
628 sum += 2*filt[i];
630 for(i=0; i<=f; i++)
631 filt[i] /= sum;
632 fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
633 fprintf(stdout," using a filter of length %g of %d points\n",flen,2*f+1);
634 fluctot = 0;
635 for(s=0; s<nset; s++) {
636 fluc = 0;
637 for(i=f; i<n-f; i++) {
638 vf = filt[0]*val[s][i];
639 for(j=1; j<=f; j++)
640 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
641 fluc += sqr(val[s][i] - vf);
643 fluc /= n - 2*f;
644 fluctot += fluc;
645 fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
647 fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
648 fprintf(stdout,"\n");
650 sfree(filt);
653 static void do_fit(FILE *out,int n,bool bYdy,int ny,real *x0,real **val,
654 int npargs,t_pargs *ppa,const output_env_t oenv)
656 real *c1=NULL,*sig=NULL,*fitparm;
657 real dt=0,tendfit,tbeginfit;
658 int i,efitfn,nparm;
660 efitfn = get_acffitfn();
661 nparm = nfp_ffn[efitfn];
662 fprintf(out,"Will fit to the following function:\n");
663 fprintf(out,"%s\n",longs_ffn[efitfn]);
664 c1 = val[n];
665 if (bYdy) {
666 c1 = val[n];
667 sig = val[n+1];
668 fprintf(out,"Using two columns as y and sigma values\n");
669 } else {
670 snew(sig,ny);
672 if (opt2parg_bSet("-beginfit",npargs,ppa)) {
673 tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
674 } else {
675 tbeginfit = x0 ? x0[0] : 0;
677 if (opt2parg_bSet("-endfit",npargs,ppa)) {
678 tendfit = opt2parg_real("-endfit",npargs,ppa);
679 } else {
680 tendfit = x0 ? x0[ny-1] : (ny-1)*dt;
683 snew(fitparm,nparm);
684 switch(efitfn) {
685 case effnEXP1:
686 fitparm[0] = 0.5;
687 break;
688 case effnEXP2:
689 fitparm[0] = 0.5;
690 fitparm[1] = c1[0];
691 break;
692 case effnEXP3:
693 fitparm[0] = 1.0;
694 fitparm[1] = 0.5*c1[0];
695 fitparm[2] = 10.0;
696 break;
697 case effnEXP5:
698 fitparm[0] = fitparm[2] = 0.5*c1[0];
699 fitparm[1] = 10;
700 fitparm[3] = 40;
701 fitparm[4] = 0;
702 break;
703 case effnEXP7:
704 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
705 fitparm[1] = 1;
706 fitparm[3] = 10;
707 fitparm[5] = 100;
708 fitparm[6] = 0;
709 break;
710 case effnEXP9:
711 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
712 fitparm[1] = 0.1;
713 fitparm[3] = 1;
714 fitparm[5] = 10;
715 fitparm[7] = 100;
716 fitparm[8] = 0;
717 break;
718 default:
719 fprintf(out,"Warning: don't know how to initialize the parameters\n");
720 for(i=0; (i<nparm); i++)
721 fitparm[i] = 1;
723 fprintf(out,"Starting parameters:\n");
724 for(i=0; (i<nparm); i++)
725 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
726 if (do_lmfit(ny,c1,sig,dt,x0,tbeginfit,tendfit,
727 oenv,bDebugMode(),efitfn,fitparm,0)) {
728 for(i=0; (i<nparm); i++)
729 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
731 else {
732 fprintf(out,"No solution was found\n");
736 int gmx_analyze(int argc,char *argv[])
738 static const char *desc[] = {
739 "g_analyze reads an ascii file and analyzes data sets.",
740 "A line in the input file may start with a time",
741 "(see option [TT]-time[tt]) and any number of y values may follow.",
742 "Multiple sets can also be",
743 "read when they are seperated by & (option [TT]-n[tt]),",
744 "in this case only one y value is read from each line.",
745 "All lines starting with # and @ are skipped.",
746 "All analyses can also be done for the derivative of a set",
747 "(option [TT]-d[tt]).[PAR]",
749 "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
750 "points are equidistant in time.[PAR]",
752 "g_analyze always shows the average and standard deviation of each",
753 "set. For each set it also shows the relative deviation of the third",
754 "and forth cumulant from those of a Gaussian distribution with the same",
755 "standard deviation.[PAR]",
757 "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
759 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
760 "i/2 periods. The formula is:[BR]"
761 "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
762 "This is useful for principal components obtained from covariance",
763 "analysis, since the principal components of random diffusion are",
764 "pure cosines.[PAR]",
766 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
768 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
770 "Option [TT]-av[tt] produces the average over the sets.",
771 "Error bars can be added with the option [TT]-errbar[tt].",
772 "The errorbars can represent the standard deviation, the error",
773 "(assuming the points are independent) or the interval containing",
774 "90% of the points, by discarding 5% of the points at the top and",
775 "the bottom.[PAR]",
777 "Option [TT]-ee[tt] produces error estimates using block averaging.",
778 "A set is divided in a number of blocks and averages are calculated for",
779 "each block. The error for the total average is calculated from",
780 "the variance between averages of the m blocks B_i as follows:",
781 "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
782 "These errors are plotted as a function of the block size.",
783 "Also an analytical block average curve is plotted, assuming",
784 "that the autocorrelation is a sum of two exponentials.",
785 "The analytical curve for the block average is:[BR]",
786 "f(t) = sigma sqrt(2/T ( a (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
787 " (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
788 "where T is the total time.",
789 "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
790 "When the actual block average is very close to the analytical curve,",
791 "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
792 "The complete derivation is given in",
793 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
795 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
796 "of each set and over all sets with respect to a filtered average.",
797 "The filter is proportional to cos(pi t/len) where t goes from -len/2",
798 "to len/2. len is supplied with the option [TT]-filter[tt].",
799 "This filter reduces oscillations with period len/2 and len by a factor",
800 "of 0.79 and 0.33 respectively.[PAR]",
802 "Option [TT]-g[tt] fits the data to the function given with option",
803 "[TT]-fitfn[tt].[PAR]",
805 "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
806 "by fitting to a t + b on log-log scale. All points after the first",
807 "zero or negative value are ignored.[PAR]"
809 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
810 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
811 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
813 static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
814 static bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
815 static bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
816 static bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
817 static int nsets_in=1,d=1,nb_min=4,resol=10;
818 static real temp=298.15,fit_start=1,smooth_tail_start=-1;
820 /* must correspond to enum avbar* declared at beginning of file */
821 static const char *avbar_opt[avbarNR+1] = {
822 NULL, "none", "stddev", "error", "90", NULL
825 t_pargs pa[] = {
826 { "-time", FALSE, etBOOL, {&bHaveT},
827 "Expect a time in the input" },
828 { "-b", FALSE, etREAL, {&tb},
829 "First time to read from set" },
830 { "-e", FALSE, etREAL, {&te},
831 "Last time to read from set" },
832 { "-n", FALSE, etINT, {&nsets_in},
833 "Read # sets seperated by &" },
834 { "-d", FALSE, etBOOL, {&bDer},
835 "Use the derivative" },
836 { "-dp", FALSE, etINT, {&d},
837 "HIDDENThe derivative is the difference over # points" },
838 { "-bw", FALSE, etREAL, {&binwidth},
839 "Binwidth for the distribution" },
840 { "-errbar", FALSE, etENUM, {avbar_opt},
841 "Error bars for -av" },
842 { "-integrate",FALSE,etBOOL, {&bIntegrate},
843 "Integrate data function(s) numerically using trapezium rule" },
844 { "-aver_start",FALSE, etREAL, {&aver_start},
845 "Start averaging the integral from here" },
846 { "-xydy", FALSE, etBOOL, {&bXYdy},
847 "Interpret second data set as error in the y values for integrating" },
848 { "-regression",FALSE,etBOOL,{&bRegression},
849 "Perform a linear regression analysis on the data" },
850 { "-luzar", FALSE, etBOOL, {&bLuzar},
851 "Do a Luzar and Chandler analysis on a correlation function and related as produced by g_hbond. When in addition the -xydy flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
852 { "-temp", FALSE, etREAL, {&temp},
853 "Temperature for the Luzar hydrogen bonding kinetics analysis" },
854 { "-fitstart", FALSE, etREAL, {&fit_start},
855 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
856 { "-smooth",FALSE, etREAL, {&smooth_tail_start},
857 "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
858 { "-nbmin", FALSE, etINT, {&nb_min},
859 "HIDDENMinimum number of blocks for block averaging" },
860 { "-resol", FALSE, etINT, {&resol},
861 "HIDDENResolution for the block averaging, block size increases with"
862 " a factor 2^(1/#)" },
863 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
864 "HIDDENAlways use a single exponential fit for the error estimate" },
865 { "-eenlc", FALSE, etBOOL, {&bEENLC},
866 "HIDDENAllow a negative long-time correlation" },
867 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
868 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
869 { "-filter", FALSE, etREAL, {&filtlen},
870 "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
871 { "-power", FALSE, etBOOL, {&bPower},
872 "Fit data to: b t^a" },
873 { "-subav", FALSE, etBOOL, {&bSubAv},
874 "Subtract the average before autocorrelating" },
875 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
876 "Calculate one ACF over all sets" }
878 #define NPA asize(pa)
880 FILE *out,*out_fit;
881 int n,nlast,s,nset,i,j=0;
882 real **val,*t,dt,tot,error;
883 double *av,*sig,cum1,cum2,cum3,cum4,db;
884 const char *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*fitfile;
885 output_env_t oenv;
887 t_filenm fnm[] = {
888 { efXVG, "-f", "graph", ffREAD },
889 { efXVG, "-ac", "autocorr", ffOPTWR },
890 { efXVG, "-msd", "msd", ffOPTWR },
891 { efXVG, "-cc", "coscont", ffOPTWR },
892 { efXVG, "-dist", "distr", ffOPTWR },
893 { efXVG, "-av", "average", ffOPTWR },
894 { efXVG, "-ee", "errest", ffOPTWR },
895 { efLOG, "-g", "fitlog", ffOPTWR }
897 #define NFILE asize(fnm)
899 int npargs;
900 t_pargs *ppa;
902 npargs = asize(pa);
903 ppa = add_acf_pargs(&npargs,pa);
905 CopyRight(stderr,argv[0]);
906 parse_common_args(&argc,argv,PCA_CAN_VIEW,
907 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
909 acfile = opt2fn_null("-ac",NFILE,fnm);
910 msdfile = opt2fn_null("-msd",NFILE,fnm);
911 ccfile = opt2fn_null("-cc",NFILE,fnm);
912 distfile = opt2fn_null("-dist",NFILE,fnm);
913 avfile = opt2fn_null("-av",NFILE,fnm);
914 eefile = opt2fn_null("-ee",NFILE,fnm);
915 if (opt2parg_bSet("-fitfn",npargs,ppa))
916 fitfile = opt2fn("-g",NFILE,fnm);
917 else
918 fitfile = opt2fn_null("-g",NFILE,fnm);
920 val=read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
921 opt2parg_bSet("-b",npargs,ppa),tb,
922 opt2parg_bSet("-e",npargs,ppa),te,
923 nsets_in,&nset,&n,&dt,&t);
924 printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
926 if (bDer) {
927 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
928 d,d);
929 n -= d;
930 for(s=0; s<nset; s++)
931 for(i=0; (i<n); i++)
932 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
934 if (bIntegrate) {
935 real sum,stddev;
936 printf("Calculating the integral using the trapezium rule\n");
938 if (bXYdy) {
939 sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
940 printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
942 else {
943 for(s=0; s<nset; s++) {
944 sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
945 printf("Integral %d %10.5f +/- %10.5f\n",s+1,sum,stddev);
949 if (fitfile) {
950 out_fit = ffopen(fitfile,"w");
951 if (bXYdy && nset>=2) {
952 do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
953 } else {
954 for(s=0; s<nset; s++)
955 do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
957 fclose(out_fit);
960 printf(" std. dev. relative deviation of\n");
961 printf(" standard --------- cumulants from those of\n");
962 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
963 printf(" cum. 3 cum. 4\n");
964 snew(av,nset);
965 snew(sig,nset);
966 for(s=0; (s<nset); s++) {
967 cum1 = 0;
968 cum2 = 0;
969 cum3 = 0;
970 cum4 = 0;
971 for(i=0; (i<n); i++)
972 cum1 += val[s][i];
973 cum1 /= n;
974 for(i=0; (i<n); i++) {
975 db = val[s][i]-cum1;
976 cum2 += db*db;
977 cum3 += db*db*db;
978 cum4 += db*db*db*db;
980 cum2 /= n;
981 cum3 /= n;
982 cum4 /= n;
983 av[s] = cum1;
984 sig[s] = sqrt(cum2);
985 if (n > 1)
986 error = sqrt(cum2/(n-1));
987 else
988 error = 0;
989 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
990 s+1,av[s],sig[s],error,
991 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
992 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
994 printf("\n");
996 if (filtlen)
997 filter(filtlen,n,nset,val,dt,oenv);
999 if (msdfile) {
1000 out=xvgropen(msdfile,"Mean square displacement",
1001 "time","MSD (nm\\S2\\N)",oenv);
1002 nlast = (int)(n*frac);
1003 for(s=0; s<nset; s++) {
1004 for(j=0; j<=nlast; j++) {
1005 if (j % 100 == 0)
1006 fprintf(stderr,"\r%d",j);
1007 tot=0;
1008 for(i=0; i<n-j; i++)
1009 tot += sqr(val[s][i]-val[s][i+j]);
1010 tot /= (real)(n-j);
1011 fprintf(out," %g %8g\n",dt*j,tot);
1013 if (s<nset-1)
1014 fprintf(out,"&\n");
1016 fclose(out);
1017 fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1019 if (ccfile)
1020 plot_coscont(ccfile,n,nset,val,oenv);
1022 if (distfile)
1023 histogram(distfile,binwidth,n,nset,val,oenv);
1024 if (avfile)
1025 average(avfile,nenum(avbar_opt),n,nset,val,t);
1026 if (eefile)
1027 estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1028 bEeFitAc,bEESEF,bEENLC,oenv);
1029 if (bPower)
1030 power_fit(n,nset,val,t);
1031 if (acfile) {
1032 if (bSubAv)
1033 for(s=0; s<nset; s++)
1034 for(i=0; i<n; i++)
1035 val[s][i] -= av[s];
1036 do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1037 eacNormal,bAverCorr);
1039 if (bRegression)
1040 regression_analysis(n,bXYdy,t,val);
1042 if (bLuzar)
1043 luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1045 view_all(oenv,NFILE, fnm);
1047 thanx(stderr);
1049 return 0;