Enforced rotation: fixed torque calculation for FLEX potential when using mass-weighting
[gromacs/adressmacs.git] / src / tools / gmx_analyze.c
blob4eaf3ea9a5d6c2ea9bf7bf7801a42996aec43866
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include "statutil.h"
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "macros.h"
46 #include "gmx_fatal.h"
47 #include "vec.h"
48 #include "copyrite.h"
49 #include "futil.h"
50 #include "readinp.h"
51 #include "statutil.h"
52 #include "txtdump.h"
53 #include "gstat.h"
54 #include "gmx_matrix.h"
55 #include "gmx_statistics.h"
56 #include "xvgr.h"
57 #include "gmx_ana.h"
58 #include "geminate.h"
60 /* must correspond to char *avbar_opt[] declared in main() */
61 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
63 static void power_fit(int n,int nset,real **val,real *t)
65 real *x,*y,quality,a,b,r;
66 int s,i;
68 snew(x,n);
69 snew(y,n);
71 if (t[0]>0) {
72 for(i=0; i<n; i++)
73 if (t[0]>0)
74 x[i] = log(t[i]);
75 } else {
76 fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
77 for(i=0; i<n; i++)
78 x[i] = log(i+1);
81 for(s=0; s<nset; s++) {
82 i=0;
83 for(i=0; i<n && val[s][i]>=0; i++)
84 y[i] = log(val[s][i]);
85 if (i < n)
86 fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
87 lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
88 fprintf(stdout,"Power fit set %3d: error %.3f a %g b %g\n",
89 s+1,quality,a,exp(b));
92 sfree(y);
93 sfree(x);
96 static real cosine_content(int nhp,int n,real *y)
97 /* Assumes n equidistant points */
99 double fac,cosyint,yyint;
100 int i;
102 if (n < 2)
103 return 0;
105 fac = M_PI*nhp/(n-1);
107 cosyint = 0;
108 yyint = 0;
109 for(i=0; i<n; i++) {
110 cosyint += cos(fac*i)*y[i];
111 yyint += y[i]*y[i];
114 return 2*cosyint*cosyint/(n*yyint);
117 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
118 const output_env_t oenv)
120 FILE *fp;
121 int s;
122 real cc;
124 fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
125 oenv);
127 for(s=0; s<nset; s++) {
128 cc = cosine_content(s+1,n,val[s]);
129 fprintf(fp," %d %g\n",s+1,cc);
130 fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
131 s+1,0.5*(s+1),cc);
133 fprintf(stdout,"\n");
135 ffclose(fp);
138 static void regression_analysis(int n,gmx_bool bXYdy,
139 real *x,int nset,real **val)
141 real S,chi2,a,b,da,db,r=0;
143 if (bXYdy || (nset == 1))
145 printf("Fitting data to a function f(x) = ax + b\n");
146 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
147 printf("Error estimates will be given if w_i (sigma) values are given\n");
148 printf("(use option -xydy).\n\n");
149 if (bXYdy)
150 lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S);
151 else
152 lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S);
153 chi2 = sqr((n-2)*S);
154 printf("Chi2 = %g\n",chi2);
155 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S);
156 printf("Correlation coefficient = %.1f%%\n",100*r);
157 printf("\n");
158 if (bXYdy) {
159 printf("a = %g +/- %g\n",a,da);
160 printf("b = %g +/- %g\n",b,db);
162 else {
163 printf("a = %g\n",a);
164 printf("b = %g\n",b);
167 else
169 double chi2,*a,**xx,*y;
170 int i,j;
172 snew(y,n);
173 snew(xx,nset-1);
174 for(j=0; (j<nset-1); j++)
175 snew(xx[j],n);
176 for(i=0; (i<n); i++)
178 y[i] = val[0][i];
179 for(j=1; (j<nset); j++)
180 xx[j-1][i] = val[j][i];
182 snew(a,nset-1);
183 chi2 = multi_regression(NULL,n,y,nset-1,xx,a);
184 printf("Fitting %d data points in %d sets\n",n,nset-1);
185 printf("chi2 = %g\n",chi2);
186 printf("A =");
187 for(i=0; (i<nset-1); i++)
189 printf(" %g",a[i]);
190 sfree(xx[i]);
192 printf("\n");
193 sfree(xx);
194 sfree(y);
195 sfree(a);
199 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
200 const output_env_t oenv)
202 FILE *fp;
203 int i,s;
204 double min,max;
205 int nbin;
206 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
207 long long int *histo;
208 #else
209 double *histo;
210 #endif
212 min = val[0][0];
213 max = val[0][0];
214 for(s=0; s<nset; s++)
215 for(i=0; i<n; i++)
216 if (val[s][i] < min)
217 min = val[s][i];
218 else if (val[s][i] > max)
219 max = val[s][i];
221 min = binwidth*floor(min/binwidth);
222 max = binwidth*ceil(max/binwidth);
223 if (min != 0)
224 min -= binwidth;
225 max += binwidth;
227 nbin = (int)((max - min)/binwidth + 0.5) + 1;
228 fprintf(stderr,"Making distributions with %d bins\n",nbin);
229 snew(histo,nbin);
230 fp = xvgropen(distfile,"Distribution","","",oenv);
231 for(s=0; s<nset; s++) {
232 for(i=0; i<nbin; i++)
233 histo[i] = 0;
234 for(i=0; i<n; i++)
235 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
236 for(i=0; i<nbin; i++)
237 fprintf(fp," %g %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
238 if (s < nset-1)
239 fprintf(fp,"&\n");
241 ffclose(fp);
244 static int real_comp(const void *a,const void *b)
246 real dif = *(real *)a - *(real *)b;
248 if (dif < 0)
249 return -1;
250 else if (dif > 0)
251 return 1;
252 else
253 return 0;
256 static void average(const char *avfile,int avbar_opt,
257 int n, int nset,real **val,real *t)
259 FILE *fp;
260 int i,s,edge=0;
261 double av,var,err;
262 real *tmp=NULL;
264 fp = ffopen(avfile,"w");
265 if ((avbar_opt == avbarERROR) && (nset == 1))
266 avbar_opt = avbarNONE;
267 if (avbar_opt != avbarNONE) {
268 if (avbar_opt == avbar90) {
269 snew(tmp,nset);
270 fprintf(fp,"@TYPE xydydy\n");
271 edge = (int)(nset*0.05+0.5);
272 fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
273 " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
274 } else
275 fprintf(fp,"@TYPE xydy\n");
278 for(i=0; i<n; i++) {
279 av = 0;
280 for(s=0; s<nset; s++)
281 av += val[s][i];
282 av /= nset;
283 fprintf(fp," %g %g",t[i],av);
284 var = 0;
285 if (avbar_opt != avbarNONE) {
286 if (avbar_opt == avbar90) {
287 for(s=0; s<nset; s++)
288 tmp[s] = val[s][i];
289 qsort(tmp,nset,sizeof(tmp[0]),real_comp);
290 fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
291 } else {
292 for(s=0; s<nset; s++)
293 var += sqr(val[s][i]-av);
294 if (avbar_opt == avbarSTDDEV)
295 err = sqrt(var/nset);
296 else
297 err = sqrt(var/(nset*(nset-1)));
298 fprintf(fp," %g",err);
301 fprintf(fp,"\n");
303 ffclose(fp);
305 if (avbar_opt == avbar90)
306 sfree(tmp);
309 static real anal_ee_inf(real *parm,real T)
311 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
314 static real anal_ee(real *parm,real T,real t)
316 real e1,e2;
318 if (parm[0])
319 e1 = exp(-t/parm[0]);
320 else
321 e1 = 1;
322 if (parm[2])
323 e2 = exp(-t/parm[2]);
324 else
325 e2 = 1;
327 return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
328 parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
331 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
332 int nset, double *av,double *sig,real **val,real dt,
333 gmx_bool bFitAc,gmx_bool bSingleExpFit,gmx_bool bAllowNegLTCorr,
334 const output_env_t oenv)
336 FILE *fp;
337 int bs,prev_bs,nbs,nb;
338 real spacing,nbr;
339 int s,i,j;
340 double blav,var;
341 char **leg;
342 real *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
343 real fitparm[4];
344 real ee,a,tau1,tau2;
346 if (n < 4)
348 fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
350 return;
353 fp = xvgropen(eefile,"Error estimates",
354 "Block size (time)","Error estimate", oenv);
355 if (output_env_get_print_xvgr_codes(oenv))
357 fprintf(fp,
358 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
359 (n-1)*dt,n);
361 snew(leg,2*nset);
362 xvgr_legend(fp,2*nset,(const char**)leg,oenv);
363 sfree(leg);
365 spacing = pow(2,1.0/resol);
366 snew(tbs,n);
367 snew(ybs,n);
368 snew(fitsig,n);
369 for(s=0; s<nset; s++)
371 nbs = 0;
372 prev_bs = 0;
373 nbr = nb_min;
374 while (nbr <= n)
376 bs = n/(int)nbr;
377 if (bs != prev_bs)
379 nb = n/bs;
380 var = 0;
381 for(i=0; i<nb; i++)
383 blav=0;
384 for (j=0; j<bs; j++)
386 blav += val[s][bs*i+j];
388 var += sqr(av[s] - blav/bs);
390 tbs[nbs] = bs*dt;
391 if (sig[s] == 0)
393 ybs[nbs] = 0;
395 else
397 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
399 nbs++;
401 nbr *= spacing;
402 nb = (int)(nbr+0.5);
403 prev_bs = bs;
405 if (sig[s] == 0)
407 ee = 0;
408 a = 1;
409 tau1 = 0;
410 tau2 = 0;
412 else
414 for(i=0; i<nbs/2; i++)
416 rtmp = tbs[i];
417 tbs[i] = tbs[nbs-1-i];
418 tbs[nbs-1-i] = rtmp;
419 rtmp = ybs[i];
420 ybs[i] = ybs[nbs-1-i];
421 ybs[nbs-1-i] = rtmp;
423 /* The initial slope of the normalized ybs^2 is 1.
424 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
425 * From this we take our initial guess for tau1.
427 twooe = 2/exp(1);
428 i = -1;
431 i++;
432 tau1_est = tbs[i];
433 } while (i < nbs - 1 &&
434 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
436 if (ybs[0] > ybs[1])
438 fprintf(stdout,"Data set %d has strange time correlations:\n"
439 "the std. error using single points is larger than that of blocks of 2 points\n"
440 "The error estimate might be inaccurate, check the fit\n",
441 s+1);
442 /* Use the total time as tau for the fitting weights */
443 tau_sig = (n - 1)*dt;
445 else
447 tau_sig = tau1_est;
450 if (debug)
452 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
455 /* Generate more or less appropriate sigma's,
456 * also taking the density of points into account.
458 for(i=0; i<nbs; i++)
460 if (i == 0)
462 dens = tbs[1]/tbs[0] - 1;
464 else if (i == nbs-1)
466 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
468 else
470 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
472 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
475 if (!bSingleExpFit)
477 fitparm[0] = tau1_est;
478 fitparm[1] = 0.95;
479 /* We set the initial guess for tau2
480 * to halfway between tau1_est and the total time (on log scale).
482 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
483 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
484 bDebugMode(),effnERREST,fitparm,0);
485 fitparm[3] = 1-fitparm[1];
487 if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
488 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
490 if (!bSingleExpFit)
492 if (fitparm[2] > (n-1)*dt)
494 fprintf(stdout,
495 "Warning: tau2 is longer than the length of the data (%g)\n"
496 " the statistics might be bad\n",
497 (n-1)*dt);
499 else
501 fprintf(stdout,"a fitted parameter is negative\n");
503 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
504 sig[s]*anal_ee_inf(fitparm,n*dt),
505 fitparm[1],fitparm[0],fitparm[2]);
506 /* Do a fit with tau2 fixed at the total time.
507 * One could also choose any other large value for tau2.
509 fitparm[0] = tau1_est;
510 fitparm[1] = 0.95;
511 fitparm[2] = (n-1)*dt;
512 fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
513 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
514 effnERREST,fitparm,4);
515 fitparm[3] = 1-fitparm[1];
517 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
518 || (fitparm[1]>1 && !bAllowNegLTCorr))
520 if (!bSingleExpFit) {
521 fprintf(stdout,"a fitted parameter is negative\n");
522 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
523 sig[s]*anal_ee_inf(fitparm,n*dt),
524 fitparm[1],fitparm[0],fitparm[2]);
526 /* Do a single exponential fit */
527 fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
528 fitparm[0] = tau1_est;
529 fitparm[1] = 1.0;
530 fitparm[2] = 0.0;
531 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
532 effnERREST,fitparm,6);
533 fitparm[3] = 1-fitparm[1];
536 ee = sig[s]*anal_ee_inf(fitparm,n*dt);
537 a = fitparm[1];
538 tau1 = fitparm[0];
539 tau2 = fitparm[2];
541 fprintf(stdout,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
542 s+1,ee,a,tau1,tau2);
543 fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
544 fprintf(fp,"@ legend string %d \"ee %6g\"\n",
545 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
546 for(i=0; i<nbs; i++)
548 fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
549 sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
552 if (bFitAc)
554 int fitlen;
555 real *ac,acint,ac_fit[4];
557 snew(ac,n);
558 for(i=0; i<n; i++) {
559 ac[i] = val[s][i] - av[s];
560 if (i > 0)
561 fitsig[i] = sqrt(i);
562 else
563 fitsig[i] = 1;
565 low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
566 dt,eacNormal,1,FALSE,TRUE,
567 FALSE,0,0,effnNONE,0);
569 fitlen = n/nb_min;
571 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
572 acint = 0.5*ac[0];
573 for(i=1; i<=fitlen/2; i++)
575 acint += ac[i];
577 acint *= dt;
579 /* Generate more or less appropriate sigma's */
580 for(i=0; i<=fitlen; i++)
582 fitsig[i] = sqrt(acint + dt*i);
585 ac_fit[0] = 0.5*acint;
586 ac_fit[1] = 0.95;
587 ac_fit[2] = 10*acint;
588 do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
589 bDebugMode(),effnEXP3,ac_fit,0);
590 ac_fit[3] = 1 - ac_fit[1];
592 fprintf(stdout,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
593 s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
594 ac_fit[1],ac_fit[0],ac_fit[2]);
596 fprintf(fp,"&\n");
597 for(i=0; i<nbs; i++)
599 fprintf(fp,"%g %g\n",tbs[i],
600 sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
603 sfree(ac);
605 if (s < nset-1)
607 fprintf(fp,"&\n");
610 sfree(fitsig);
611 sfree(ybs);
612 sfree(tbs);
613 ffclose(fp);
616 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
617 gmx_bool bError,real fit_start,real smooth_tail_start,
618 const output_env_t oenv)
620 const real tol = 1e-8;
621 real *kt;
622 real weight,d2;
623 int j;
625 please_cite(stdout,"Spoel2006b");
627 /* Compute negative derivative k(t) = -dc(t)/dt */
628 if (!bError) {
629 snew(kt,nn);
630 compute_derivative(nn,time,val[0],kt);
631 for(j=0; (j<nn); j++)
632 kt[j] = -kt[j];
633 if (debug) {
634 d2 = 0;
635 for(j=0; (j<nn); j++)
636 d2 += sqr(kt[j] - val[3][j]);
637 fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
639 analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
640 temp,smooth_tail_start,oenv);
641 sfree(kt);
643 else if (nset == 6) {
644 analyse_corr(nn,time,val[0],val[2],val[4],
645 val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
647 else {
648 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
649 printf("Not doing anything. Sorry.\n");
653 static void filter(real flen,int n,int nset,real **val,real dt,
654 const output_env_t oenv)
656 int f,s,i,j;
657 double *filt,sum,vf,fluc,fluctot;
659 f = (int)(flen/(2*dt));
660 snew(filt,f+1);
661 filt[0] = 1;
662 sum = 1;
663 for(i=1; i<=f; i++) {
664 filt[i] = cos(M_PI*dt*i/flen);
665 sum += 2*filt[i];
667 for(i=0; i<=f; i++)
668 filt[i] /= sum;
669 fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
670 fprintf(stdout," using a filter of length %g of %d points\n",flen,2*f+1);
671 fluctot = 0;
672 for(s=0; s<nset; s++) {
673 fluc = 0;
674 for(i=f; i<n-f; i++) {
675 vf = filt[0]*val[s][i];
676 for(j=1; j<=f; j++)
677 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
678 fluc += sqr(val[s][i] - vf);
680 fluc /= n - 2*f;
681 fluctot += fluc;
682 fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
684 fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
685 fprintf(stdout,"\n");
687 sfree(filt);
690 static void do_fit(FILE *out,int n,gmx_bool bYdy,
691 int ny,real *x0,real **val,
692 int npargs,t_pargs *ppa,const output_env_t oenv)
694 real *c1=NULL,*sig=NULL,*fitparm;
695 real tendfit,tbeginfit;
696 int i,efitfn,nparm;
698 efitfn = get_acffitfn();
699 nparm = nfp_ffn[efitfn];
700 fprintf(out,"Will fit to the following function:\n");
701 fprintf(out,"%s\n",longs_ffn[efitfn]);
702 c1 = val[n];
703 if (bYdy) {
704 c1 = val[n];
705 sig = val[n+1];
706 fprintf(out,"Using two columns as y and sigma values\n");
707 } else {
708 snew(sig,ny);
710 if (opt2parg_bSet("-beginfit",npargs,ppa)) {
711 tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
712 } else {
713 tbeginfit = x0[0];
715 if (opt2parg_bSet("-endfit",npargs,ppa)) {
716 tendfit = opt2parg_real("-endfit",npargs,ppa);
717 } else {
718 tendfit = x0[ny-1];
721 snew(fitparm,nparm);
722 switch(efitfn) {
723 case effnEXP1:
724 fitparm[0] = 0.5;
725 break;
726 case effnEXP2:
727 fitparm[0] = 0.5;
728 fitparm[1] = c1[0];
729 break;
730 case effnEXP3:
731 fitparm[0] = 1.0;
732 fitparm[1] = 0.5*c1[0];
733 fitparm[2] = 10.0;
734 break;
735 case effnEXP5:
736 fitparm[0] = fitparm[2] = 0.5*c1[0];
737 fitparm[1] = 10;
738 fitparm[3] = 40;
739 fitparm[4] = 0;
740 break;
741 case effnEXP7:
742 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
743 fitparm[1] = 1;
744 fitparm[3] = 10;
745 fitparm[5] = 100;
746 fitparm[6] = 0;
747 break;
748 case effnEXP9:
749 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
750 fitparm[1] = 0.1;
751 fitparm[3] = 1;
752 fitparm[5] = 10;
753 fitparm[7] = 100;
754 fitparm[8] = 0;
755 break;
756 default:
757 fprintf(out,"Warning: don't know how to initialize the parameters\n");
758 for(i=0; (i<nparm); i++)
759 fitparm[i] = 1;
761 fprintf(out,"Starting parameters:\n");
762 for(i=0; (i<nparm); i++)
763 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
764 if (do_lmfit(ny,c1,sig,0,x0,tbeginfit,tendfit,
765 oenv,bDebugMode(),efitfn,fitparm,0)) {
766 for(i=0; (i<nparm); i++)
767 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
769 else {
770 fprintf(out,"No solution was found\n");
774 static void do_ballistic(const char *balFile, int nData,
775 real *t, real **val, int nSet,
776 real balTime, int nBalExp,
777 gmx_bool bDerivative,
778 const output_env_t oenv)
780 double **ctd=NULL, *td=NULL;
781 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
782 static char *leg[] = {"Ac'(t)"};
783 FILE *fp;
784 int i, set;
786 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
788 snew(ctd, nSet);
789 snew(td, nData);
791 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
792 xvgr_legend(fp,asize(leg),(const char**)leg,oenv);
794 for (set=0; set<nSet; set++)
796 snew(ctd[set], nData);
797 for (i=0; i<nData; i++) {
798 ctd[set][i] = (double)val[set][i];
799 if (set==0)
800 td[i] = (double)t[i];
803 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
806 for (i=0; i<nData; i++)
808 fprintf(fp, " %g",t[i]);
809 for (set=0; set<nSet; set++)
811 fprintf(fp, " %g", ctd[set][i]);
813 fprintf(fp, "\n");
817 for (set=0; set<nSet; set++)
818 sfree(ctd[set]);
819 sfree(ctd);
820 sfree(td);
822 else
823 printf("Number of data points is less than the number of parameters to fit\n."
824 "The system is underdetermined, hence no ballistic term can be found.\n\n");
827 static void do_geminate(const char *gemFile, int nData,
828 real *t, real **val, int nSet,
829 const real D, const real rcut, const real balTime,
830 const int nFitPoints, const real begFit, const real endFit,
831 const output_env_t oenv)
833 double **ctd=NULL, **ctdGem=NULL, *td=NULL;
834 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
835 begFit, endFit, balTime, 1, FALSE);
836 const char *leg[] = {"Ac\\sgem\\N(t)"};
837 FILE *fp;
838 int i, set;
840 snew(ctd, nSet);
841 snew(ctdGem, nSet);
842 snew(td, nData);
844 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
845 xvgr_legend(fp,asize(leg),leg,oenv);
847 for (set=0; set<nSet; set++)
849 snew(ctd[set], nData);
850 snew(ctdGem[set], nData);
851 for (i=0; i<nData; i++) {
852 ctd[set][i] = (double)val[set][i];
853 if (set==0)
854 td[i] = (double)t[i];
856 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
859 for (i=0; i<nData; i++)
861 fprintf(fp, " %g",t[i]);
862 for (set=0; set<nSet; set++)
864 fprintf(fp, " %g", ctdGem[set][i]);
866 fprintf(fp, "\n");
869 for (set=0; set<nSet; set++)
871 sfree(ctd[set]);
872 sfree(ctdGem[set]);
874 sfree(ctd);
875 sfree(ctdGem);
876 sfree(td);
879 int gmx_analyze(int argc,char *argv[])
881 static const char *desc[] = {
882 "g_analyze reads an ascii file and analyzes data sets.",
883 "A line in the input file may start with a time",
884 "(see option [TT]-time[tt]) and any number of y values may follow.",
885 "Multiple sets can also be",
886 "read when they are separated by & (option [TT]-n[tt]),",
887 "in this case only one y value is read from each line.",
888 "All lines starting with # and @ are skipped.",
889 "All analyses can also be done for the derivative of a set",
890 "(option [TT]-d[tt]).[PAR]",
892 "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
893 "points are equidistant in time.[PAR]",
895 "g_analyze always shows the average and standard deviation of each",
896 "set. For each set it also shows the relative deviation of the third",
897 "and fourth cumulant from those of a Gaussian distribution with the same",
898 "standard deviation.[PAR]",
900 "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
902 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
903 "i/2 periods. The formula is:[BR]"
904 "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
905 "This is useful for principal components obtained from covariance",
906 "analysis, since the principal components of random diffusion are",
907 "pure cosines.[PAR]",
909 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
911 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
913 "Option [TT]-av[tt] produces the average over the sets.",
914 "Error bars can be added with the option [TT]-errbar[tt].",
915 "The errorbars can represent the standard deviation, the error",
916 "(assuming the points are independent) or the interval containing",
917 "90% of the points, by discarding 5% of the points at the top and",
918 "the bottom.[PAR]",
920 "Option [TT]-ee[tt] produces error estimates using block averaging.",
921 "A set is divided in a number of blocks and averages are calculated for",
922 "each block. The error for the total average is calculated from",
923 "the variance between averages of the m blocks B_i as follows:",
924 "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
925 "These errors are plotted as a function of the block size.",
926 "Also an analytical block average curve is plotted, assuming",
927 "that the autocorrelation is a sum of two exponentials.",
928 "The analytical curve for the block average is:[BR]",
929 "f(t) = sigma sqrt(2/T ( a (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
930 " (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
931 "where T is the total time.",
932 "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
933 "When the actual block average is very close to the analytical curve,",
934 "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
935 "The complete derivation is given in",
936 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
938 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
939 "component from a hydrogen bond autocorrelation function by the fitting",
940 "of a sum of exponentials, as described in e.g.",
941 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
942 "is the one with the most negative coefficient in the exponential,",
943 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
944 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
946 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
947 "(and optionally kD) to the hydrogen bond autocorrelation function",
948 "according to the reversible geminate recombination model. Removal of",
949 "the ballistic component first is strongly adviced. The model is presented in",
950 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
952 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
953 "of each set and over all sets with respect to a filtered average.",
954 "The filter is proportional to cos(pi t/len) where t goes from -len/2",
955 "to len/2. len is supplied with the option [TT]-filter[tt].",
956 "This filter reduces oscillations with period len/2 and len by a factor",
957 "of 0.79 and 0.33 respectively.[PAR]",
959 "Option [TT]-g[tt] fits the data to the function given with option",
960 "[TT]-fitfn[tt].[PAR]",
962 "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
963 "by fitting to a t + b on log-log scale. All points after the first",
964 "zero or negative value are ignored.[PAR]"
966 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
967 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
968 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
970 static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
971 static gmx_bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
972 static gmx_bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
973 static gmx_bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
974 static int nsets_in=1,d=1,nb_min=4,resol=10, nBalExp=4, nFitPoints=100;
975 static real temp=298.15,fit_start=1, fit_end=60, smooth_tail_start=-1, balTime=0.2, diffusion=5e-5,rcut=0.35;
977 /* must correspond to enum avbar* declared at beginning of file */
978 static const char *avbar_opt[avbarNR+1] = {
979 NULL, "none", "stddev", "error", "90", NULL
982 t_pargs pa[] = {
983 { "-time", FALSE, etBOOL, {&bHaveT},
984 "Expect a time in the input" },
985 { "-b", FALSE, etREAL, {&tb},
986 "First time to read from set" },
987 { "-e", FALSE, etREAL, {&te},
988 "Last time to read from set" },
989 { "-n", FALSE, etINT, {&nsets_in},
990 "Read # sets separated by &" },
991 { "-d", FALSE, etBOOL, {&bDer},
992 "Use the derivative" },
993 { "-dp", FALSE, etINT, {&d},
994 "HIDDENThe derivative is the difference over # points" },
995 { "-bw", FALSE, etREAL, {&binwidth},
996 "Binwidth for the distribution" },
997 { "-errbar", FALSE, etENUM, {avbar_opt},
998 "Error bars for -av" },
999 { "-integrate",FALSE,etBOOL, {&bIntegrate},
1000 "Integrate data function(s) numerically using trapezium rule" },
1001 { "-aver_start",FALSE, etREAL, {&aver_start},
1002 "Start averaging the integral from here" },
1003 { "-xydy", FALSE, etBOOL, {&bXYdy},
1004 "Interpret second data set as error in the y values for integrating" },
1005 { "-regression",FALSE,etBOOL,{&bRegression},
1006 "Perform a linear regression analysis on the data. If -xydy is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize chi^2 = (y - A0 x0 - A1 x1 - ... - AN xN)^2 where now Y is the first data set in the input file and xi the others. Do read the information at the option [TT]-time[tt]." },
1007 { "-luzar", FALSE, etBOOL, {&bLuzar},
1008 "Do a Luzar and Chandler analysis on a correlation function and related as produced by g_hbond. When in addition the -xydy flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1009 { "-temp", FALSE, etREAL, {&temp},
1010 "Temperature for the Luzar hydrogen bonding kinetics analysis" },
1011 { "-fitstart", FALSE, etREAL, {&fit_start},
1012 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1013 { "-fitend", FALSE, etREAL, {&fit_end},
1014 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with -gem" },
1015 { "-smooth",FALSE, etREAL, {&smooth_tail_start},
1016 "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
1017 { "-nbmin", FALSE, etINT, {&nb_min},
1018 "HIDDENMinimum number of blocks for block averaging" },
1019 { "-resol", FALSE, etINT, {&resol},
1020 "HIDDENResolution for the block averaging, block size increases with"
1021 " a factor 2^(1/#)" },
1022 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1023 "HIDDENAlways use a single exponential fit for the error estimate" },
1024 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1025 "HIDDENAllow a negative long-time correlation" },
1026 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1027 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1028 { "-filter", FALSE, etREAL, {&filtlen},
1029 "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
1030 { "-power", FALSE, etBOOL, {&bPower},
1031 "Fit data to: b t^a" },
1032 { "-subav", FALSE, etBOOL, {&bSubAv},
1033 "Subtract the average before autocorrelating" },
1034 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1035 "Calculate one ACF over all sets" },
1036 { "-nbalexp", FALSE, etINT, {&nBalExp},
1037 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1038 { "-baltime", FALSE, etREAL, {&balTime},
1039 "HIDDENTime up to which the ballistic component will be fitted" },
1040 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1041 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1042 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1043 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1044 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1045 /* "What type of gminate recombination to use"}, */
1046 /* { "-D", FALSE, etREAL, {&diffusion}, */
1047 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1049 #define NPA asize(pa)
1051 FILE *out,*out_fit;
1052 int n,nlast,s,nset,i,j=0;
1053 real **val,*t,dt,tot,error;
1054 double *av,*sig,cum1,cum2,cum3,cum4,db;
1055 const char *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*balfile,*gemfile,*fitfile;
1056 output_env_t oenv;
1058 t_filenm fnm[] = {
1059 { efXVG, "-f", "graph", ffREAD },
1060 { efXVG, "-ac", "autocorr", ffOPTWR },
1061 { efXVG, "-msd", "msd", ffOPTWR },
1062 { efXVG, "-cc", "coscont", ffOPTWR },
1063 { efXVG, "-dist", "distr", ffOPTWR },
1064 { efXVG, "-av", "average", ffOPTWR },
1065 { efXVG, "-ee", "errest", ffOPTWR },
1066 { efXVG, "-bal", "ballisitc",ffOPTWR },
1067 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1068 { efLOG, "-g", "fitlog", ffOPTWR }
1070 #define NFILE asize(fnm)
1072 int npargs;
1073 t_pargs *ppa;
1075 npargs = asize(pa);
1076 ppa = add_acf_pargs(&npargs,pa);
1078 CopyRight(stderr,argv[0]);
1079 parse_common_args(&argc,argv,PCA_CAN_VIEW,
1080 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
1082 acfile = opt2fn_null("-ac",NFILE,fnm);
1083 msdfile = opt2fn_null("-msd",NFILE,fnm);
1084 ccfile = opt2fn_null("-cc",NFILE,fnm);
1085 distfile = opt2fn_null("-dist",NFILE,fnm);
1086 avfile = opt2fn_null("-av",NFILE,fnm);
1087 eefile = opt2fn_null("-ee",NFILE,fnm);
1088 balfile = opt2fn_null("-bal",NFILE,fnm);
1089 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1090 /* When doing autocorrelation we don't want a fitlog for fitting
1091 * the function itself (not the acf) when the user did not ask for it.
1093 if (opt2parg_bSet("-fitfn",npargs,ppa) && acfile == NULL)
1095 fitfile = opt2fn("-g",NFILE,fnm);
1097 else
1099 fitfile = opt2fn_null("-g",NFILE,fnm);
1102 val = read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
1103 opt2parg_bSet("-b",npargs,ppa),tb,
1104 opt2parg_bSet("-e",npargs,ppa),te,
1105 nsets_in,&nset,&n,&dt,&t);
1106 printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
1108 if (bDer)
1110 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1111 d,d);
1112 n -= d;
1113 for(s=0; s<nset; s++)
1115 for(i=0; (i<n); i++)
1117 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1122 if (bIntegrate)
1124 real sum,stddev;
1126 printf("Calculating the integral using the trapezium rule\n");
1128 if (bXYdy)
1130 sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
1131 printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
1133 else
1135 for(s=0; s<nset; s++)
1137 sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
1138 printf("Integral %d %10.5f +/- %10.5f\n",s+1,sum,stddev);
1143 if (fitfile != NULL)
1145 out_fit = ffopen(fitfile,"w");
1146 if (bXYdy && nset >= 2)
1148 do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
1150 else
1152 for(s=0; s<nset; s++)
1154 do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
1157 ffclose(out_fit);
1160 printf(" std. dev. relative deviation of\n");
1161 printf(" standard --------- cumulants from those of\n");
1162 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1163 printf(" cum. 3 cum. 4\n");
1164 snew(av,nset);
1165 snew(sig,nset);
1166 for(s=0; (s<nset); s++) {
1167 cum1 = 0;
1168 cum2 = 0;
1169 cum3 = 0;
1170 cum4 = 0;
1171 for(i=0; (i<n); i++)
1172 cum1 += val[s][i];
1173 cum1 /= n;
1174 for(i=0; (i<n); i++) {
1175 db = val[s][i]-cum1;
1176 cum2 += db*db;
1177 cum3 += db*db*db;
1178 cum4 += db*db*db*db;
1180 cum2 /= n;
1181 cum3 /= n;
1182 cum4 /= n;
1183 av[s] = cum1;
1184 sig[s] = sqrt(cum2);
1185 if (n > 1)
1186 error = sqrt(cum2/(n-1));
1187 else
1188 error = 0;
1189 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1190 s+1,av[s],sig[s],error,
1191 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1192 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1194 printf("\n");
1196 if (filtlen)
1197 filter(filtlen,n,nset,val,dt,oenv);
1199 if (msdfile) {
1200 out=xvgropen(msdfile,"Mean square displacement",
1201 "time","MSD (nm\\S2\\N)",oenv);
1202 nlast = (int)(n*frac);
1203 for(s=0; s<nset; s++) {
1204 for(j=0; j<=nlast; j++) {
1205 if (j % 100 == 0)
1206 fprintf(stderr,"\r%d",j);
1207 tot=0;
1208 for(i=0; i<n-j; i++)
1209 tot += sqr(val[s][i]-val[s][i+j]);
1210 tot /= (real)(n-j);
1211 fprintf(out," %g %8g\n",dt*j,tot);
1213 if (s<nset-1)
1214 fprintf(out,"&\n");
1216 ffclose(out);
1217 fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1219 if (ccfile)
1220 plot_coscont(ccfile,n,nset,val,oenv);
1222 if (distfile)
1223 histogram(distfile,binwidth,n,nset,val,oenv);
1224 if (avfile)
1225 average(avfile,nenum(avbar_opt),n,nset,val,t);
1226 if (eefile)
1227 estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1228 bEeFitAc,bEESEF,bEENLC,oenv);
1229 if (balfile)
1230 do_ballistic(balfile,n,t,val,nset,balTime,nBalExp,bDer,oenv);
1231 /* if (gemfile) */
1232 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1233 /* nFitPoints, fit_start, fit_end, oenv); */
1234 if (bPower)
1235 power_fit(n,nset,val,t);
1237 if (acfile != NULL)
1239 if (bSubAv)
1241 for(s=0; s<nset; s++)
1243 for(i=0; i<n; i++)
1245 val[s][i] -= av[s];
1249 do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1250 eacNormal,bAverCorr);
1253 if (bRegression)
1254 regression_analysis(n,bXYdy,t,nset,val);
1256 if (bLuzar)
1257 luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1259 view_all(oenv,NFILE, fnm);
1261 thanx(stderr);
1263 return 0;