Remove unused function generate_excls and make clean_excls static
[gromacs.git] / src / gromacs / gmxana / gmx_analyze.cpp
blob66ea69f08338b148c0117c4c49c8df0fa3d8aae6
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <cmath>
40 #include <cstdlib>
41 #include <cstring>
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/commandline/viewit.h"
45 #include "gromacs/correlationfunctions/autocorr.h"
46 #include "gromacs/correlationfunctions/expfit.h"
47 #include "gromacs/correlationfunctions/integrate.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/linearalgebra/matrix.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/statistics/statistics.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/pleasecite.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/snprintf.h"
63 /* must correspond to char *avbar_opt[] declared in main() */
64 enum {
65 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
68 static void power_fit(int n, int nset, real **val, real *t)
70 real *x, *y, quality, a, b, r;
71 int s, i;
73 snew(x, n);
74 snew(y, n);
76 if (t[0] > 0)
78 for (i = 0; i < n; i++)
80 if (t[0] > 0)
82 x[i] = std::log(t[i]);
86 else
88 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
89 for (i = 0; i < n; i++)
91 x[i] = std::log1p(static_cast<real>(i));
95 for (s = 0; s < nset; s++)
97 for (i = 0; i < n && val[s][i] >= 0; i++)
99 y[i] = std::log(val[s][i]);
101 if (i < n)
103 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
105 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
106 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
107 s+1, quality, a, std::exp(b));
110 sfree(y);
111 sfree(x);
114 static real cosine_content(int nhp, int n, const real *y)
115 /* Assumes n equidistant points */
117 double fac, cosyint, yyint;
118 int i;
120 if (n < 2)
122 return 0;
125 fac = M_PI*nhp/(n-1);
127 cosyint = 0;
128 yyint = 0;
129 for (i = 0; i < n; i++)
131 cosyint += std::cos(fac*i)*y[i];
132 yyint += y[i]*y[i];
135 return 2*cosyint*cosyint/(n*yyint);
138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
139 const gmx_output_env_t *oenv)
141 FILE *fp;
142 int s;
143 real cc;
145 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
146 oenv);
148 for (s = 0; s < nset; s++)
150 cc = cosine_content(s+1, n, val[s]);
151 fprintf(fp, " %d %g\n", s+1, cc);
152 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
153 s+1, 0.5*(s+1), cc);
155 fprintf(stdout, "\n");
157 xvgrclose(fp);
160 static void regression_analysis(int n, gmx_bool bXYdy,
161 real *x, int nset, real **val)
163 real S, chi2, a, b, da, db, r = 0;
164 int ok;
166 if (bXYdy || (nset == 1))
168 printf("Fitting data to a function f(x) = ax + b\n");
169 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
170 printf("Error estimates will be given if w_i (sigma) values are given\n");
171 printf("(use option -xydy).\n\n");
172 if (bXYdy)
174 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
176 gmx_fatal(FARGS, "Error fitting the data: %s",
177 gmx_stats_message(ok));
180 else
182 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
184 gmx_fatal(FARGS, "Error fitting the data: %s",
185 gmx_stats_message(ok));
188 chi2 = gmx::square((n-2)*S);
189 printf("Chi2 = %g\n", chi2);
190 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
191 printf("Correlation coefficient = %.1f%%\n", 100*r);
192 printf("\n");
193 if (bXYdy)
195 printf("a = %g +/- %g\n", a, da);
196 printf("b = %g +/- %g\n", b, db);
198 else
200 printf("a = %g\n", a);
201 printf("b = %g\n", b);
204 else
206 double chi2, *a, **xx, *y;
207 int i, j;
209 snew(y, n);
210 snew(xx, nset-1);
211 for (j = 0; (j < nset-1); j++)
213 snew(xx[j], n);
215 for (i = 0; (i < n); i++)
217 y[i] = val[0][i];
218 for (j = 1; (j < nset); j++)
220 xx[j-1][i] = val[j][i];
223 snew(a, nset-1);
224 chi2 = multi_regression(nullptr, n, y, nset-1, xx, a);
225 printf("Fitting %d data points in %d sets\n", n, nset-1);
226 printf("chi2 = %g\n", chi2);
227 printf("A =");
228 for (i = 0; (i < nset-1); i++)
230 printf(" %g", a[i]);
231 sfree(xx[i]);
233 printf("\n");
234 sfree(xx);
235 sfree(y);
236 sfree(a);
240 static void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
241 const gmx_output_env_t *oenv)
243 FILE *fp;
244 int i, s;
245 double minval, maxval;
246 int nbin;
247 int64_t *histo;
249 minval = val[0][0];
250 maxval = val[0][0];
251 for (s = 0; s < nset; s++)
253 for (i = 0; i < n; i++)
255 if (val[s][i] < minval)
257 minval = val[s][i];
259 else if (val[s][i] > maxval)
261 maxval = val[s][i];
266 minval = binwidth*std::floor(minval/binwidth);
267 maxval = binwidth*std::ceil(maxval/binwidth);
268 if (minval != 0)
270 minval -= binwidth;
272 maxval += binwidth;
274 nbin = gmx::roundToInt(((maxval - minval)/binwidth) + 1);
275 fprintf(stderr, "Making distributions with %d bins\n", nbin);
276 snew(histo, nbin);
277 fp = xvgropen(distfile, "Distribution", "", "", oenv);
278 for (s = 0; s < nset; s++)
280 for (i = 0; i < nbin; i++)
282 histo[i] = 0;
284 for (i = 0; i < n; i++)
286 histo[gmx::roundToInt((val[s][i] - minval)/binwidth)]++;
288 for (i = 0; i < nbin; i++)
290 fprintf(fp, " %g %g\n", minval+i*binwidth, static_cast<double>(histo[i])/(n*binwidth));
292 if (s < nset-1)
294 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
297 xvgrclose(fp);
300 static int real_comp(const void *a, const void *b)
302 real dif = *reinterpret_cast<const real*>(a) - *reinterpret_cast<const real*>(b);
304 if (dif < 0)
306 return -1;
308 else if (dif > 0)
310 return 1;
312 else
314 return 0;
318 static void average(const char *avfile, int avbar_opt,
319 int n, int nset, real **val, real *t)
321 FILE *fp;
322 int i, s, edge = 0;
323 double av, var, err;
324 real *tmp = nullptr;
326 fp = gmx_ffopen(avfile, "w");
327 if ((avbar_opt == avbarERROR) && (nset == 1))
329 avbar_opt = avbarNONE;
331 if (avbar_opt != avbarNONE)
333 if (avbar_opt == avbar90)
335 snew(tmp, nset);
336 fprintf(fp, "@TYPE xydydy\n");
337 edge = gmx::roundToInt(nset*0.05);
338 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
339 " interval\n", edge, gmx::roundToInt(100.*(nset-2*edge)/nset));
341 else
343 fprintf(fp, "@TYPE xydy\n");
347 for (i = 0; i < n; i++)
349 av = 0;
350 for (s = 0; s < nset; s++)
352 av += val[s][i];
354 av /= nset;
355 fprintf(fp, " %g %g", t[i], av);
356 var = 0;
357 if (avbar_opt != avbarNONE)
359 if (avbar_opt == avbar90)
361 for (s = 0; s < nset; s++)
363 tmp[s] = val[s][i];
365 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
366 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
368 else
370 for (s = 0; s < nset; s++)
372 var += gmx::square(val[s][i]-av);
374 if (avbar_opt == avbarSTDDEV)
376 err = std::sqrt(var/nset);
378 else
380 err = std::sqrt(var/(nset*(nset-1)));
382 fprintf(fp, " %g", err);
385 fprintf(fp, "\n");
387 gmx_ffclose(fp);
389 if (avbar_opt == avbar90)
391 sfree(tmp);
395 /*! \brief Compute final error estimate.
397 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
399 static real optimal_error_estimate(double sigma, const double fitparm[], real tTotal)
401 // When sigma is zero, the fitparm data can be uninitialized
402 if (sigma == 0.0)
404 return 0;
406 double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
407 if ((tTotal <= 0) || (ss <= 0))
409 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
410 tTotal, ss);
411 return 0;
414 return sigma*std::sqrt(2*ss/tTotal);
417 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
418 int nset, double *av, double *sig, real **val, real dt,
419 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
420 const gmx_output_env_t *oenv)
422 FILE *fp;
423 int bs, prev_bs, nbs, nb;
424 real spacing, nbr;
425 int s, i, j;
426 double blav, var;
427 char **leg;
428 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
429 double fitparm[3];
430 real ee, a, tau1, tau2;
432 if (n < 4)
434 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
436 return;
439 fp = xvgropen(eefile, "Error estimates",
440 "Block size (time)", "Error estimate", oenv);
441 if (output_env_get_print_xvgr_codes(oenv))
443 fprintf(fp,
444 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
445 (n-1)*dt, n);
447 snew(leg, 2*nset);
448 xvgr_legend(fp, 2*nset, leg, oenv);
449 sfree(leg);
451 spacing = std::pow(2.0, 1.0/resol);
452 snew(tbs, n);
453 snew(ybs, n);
454 snew(fitsig, n);
455 for (s = 0; s < nset; s++)
457 nbs = 0;
458 prev_bs = 0;
459 nbr = nb_min;
460 while (nbr <= n)
462 bs = n/static_cast<int>(nbr);
463 if (bs != prev_bs)
465 nb = n/bs;
466 var = 0;
467 for (i = 0; i < nb; i++)
469 blav = 0;
470 for (j = 0; j < bs; j++)
472 blav += val[s][bs*i+j];
474 var += gmx::square(av[s] - blav/bs);
476 tbs[nbs] = bs*dt;
477 if (sig[s] == 0)
479 ybs[nbs] = 0;
481 else
483 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
485 nbs++;
487 nbr *= spacing;
488 prev_bs = bs;
490 if (sig[s] == 0)
492 ee = 0;
493 a = 1;
494 tau1 = 0;
495 tau2 = 0;
496 fitparm[0] = 0;
497 fitparm[1] = 0;
498 fitparm[2] = 0;
500 else
502 for (i = 0; i < nbs/2; i++)
504 rtmp = tbs[i];
505 tbs[i] = tbs[nbs-1-i];
506 tbs[nbs-1-i] = rtmp;
507 rtmp = ybs[i];
508 ybs[i] = ybs[nbs-1-i];
509 ybs[nbs-1-i] = rtmp;
511 /* The initial slope of the normalized ybs^2 is 1.
512 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
513 * From this we take our initial guess for tau1.
515 twooe = 2.0/std::exp(1.0);
516 i = -1;
519 i++;
520 tau1_est = tbs[i];
522 while (i < nbs - 1 &&
523 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
525 if (ybs[0] > ybs[1])
527 fprintf(stdout, "Data set %d has strange time correlations:\n"
528 "the std. error using single points is larger than that of blocks of 2 points\n"
529 "The error estimate might be inaccurate, check the fit\n",
530 s+1);
531 /* Use the total time as tau for the fitting weights */
532 tau_sig = (n - 1)*dt;
534 else
536 tau_sig = tau1_est;
539 if (debug)
541 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
544 /* Generate more or less appropriate sigma's,
545 * also taking the density of points into account.
547 for (i = 0; i < nbs; i++)
549 if (i == 0)
551 dens = tbs[1]/tbs[0] - 1;
553 else if (i == nbs-1)
555 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
557 else
559 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
561 fitsig[i] = std::sqrt((tau_sig + tbs[i])/dens);
564 if (!bSingleExpFit)
566 fitparm[0] = tau1_est;
567 fitparm[1] = 0.95;
568 /* We set the initial guess for tau2
569 * to halfway between tau1_est and the total time (on log scale).
571 fitparm[2] = std::sqrt(tau1_est*(n-1)*dt);
572 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
573 bDebugMode(), effnERREST, fitparm, 0,
574 nullptr);
576 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
577 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
579 if (!bSingleExpFit)
581 if (fitparm[2] > (n-1)*dt)
583 fprintf(stdout,
584 "Warning: tau2 is longer than the length of the data (%g)\n"
585 " the statistics might be bad\n",
586 (n-1)*dt);
588 else
590 fprintf(stdout, "a fitted parameter is negative\n");
592 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
593 optimal_error_estimate(sig[s], fitparm, n*dt),
594 fitparm[1], fitparm[0], fitparm[2]);
595 /* Do a fit with tau2 fixed at the total time.
596 * One could also choose any other large value for tau2.
598 fitparm[0] = tau1_est;
599 fitparm[1] = 0.95;
600 fitparm[2] = (n-1)*dt;
601 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
602 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
603 effnERREST, fitparm, 4, nullptr);
605 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
606 || (fitparm[1] > 1 && !bAllowNegLTCorr))
608 if (!bSingleExpFit)
610 fprintf(stdout, "a fitted parameter is negative\n");
611 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
612 optimal_error_estimate(sig[s], fitparm, n*dt),
613 fitparm[1], fitparm[0], fitparm[2]);
615 /* Do a single exponential fit */
616 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
617 fitparm[0] = tau1_est;
618 fitparm[1] = 1.0;
619 fitparm[2] = 0.0;
620 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
621 effnERREST, fitparm, 6, nullptr);
624 ee = optimal_error_estimate(sig[s], fitparm, n*dt);
625 a = fitparm[1];
626 tau1 = fitparm[0];
627 tau2 = fitparm[2];
629 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
630 s+1, ee, a, tau1, tau2);
631 if (output_env_get_xvg_format(oenv) == exvgXMGR)
633 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
634 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
635 optimal_error_estimate(sig[s], fitparm, n*dt));
637 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
639 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
640 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
641 optimal_error_estimate(sig[s], fitparm, n*dt));
643 for (i = 0; i < nbs; i++)
645 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*std::sqrt(ybs[i]/(n*dt)),
646 sig[s]*std::sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
649 if (bFitAc)
651 int fitlen;
652 real *ac, acint;
653 double ac_fit[4];
655 snew(ac, n);
656 for (i = 0; i < n; i++)
658 ac[i] = val[s][i] - av[s];
659 if (i > 0)
661 fitsig[i] = std::sqrt(static_cast<real>(i));
663 else
665 fitsig[i] = 1;
668 low_do_autocorr(nullptr, oenv, nullptr, n, 1, -1, &ac,
669 dt, eacNormal, 1, FALSE, TRUE,
670 FALSE, 0, 0, effnNONE);
672 fitlen = n/nb_min;
674 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
675 acint = 0.5*ac[0];
676 for (i = 1; i <= fitlen/2; i++)
678 acint += ac[i];
680 acint *= dt;
682 /* Generate more or less appropriate sigma's */
683 for (i = 0; i <= fitlen; i++)
685 fitsig[i] = std::sqrt(acint + dt*i);
688 ac_fit[0] = 0.5*acint;
689 ac_fit[1] = 0.95;
690 ac_fit[2] = 10*acint;
691 do_lmfit(n/nb_min, ac, fitsig, dt, nullptr, 0, fitlen*dt, oenv,
692 bDebugMode(), effnEXPEXP, ac_fit, 0, nullptr);
693 ac_fit[3] = 1 - ac_fit[1];
695 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
696 s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
697 ac_fit[1], ac_fit[0], ac_fit[2]);
699 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
700 for (i = 0; i < nbs; i++)
702 fprintf(fp, "%g %g\n", tbs[i],
703 sig[s]*std::sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
706 sfree(ac);
708 if (s < nset-1)
710 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
713 sfree(fitsig);
714 sfree(ybs);
715 sfree(tbs);
716 xvgrclose(fp);
719 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
720 gmx_bool bError, real fit_start)
722 real *kt;
723 real d2;
724 int j;
726 please_cite(stdout, "Spoel2006b");
728 /* Compute negative derivative k(t) = -dc(t)/dt */
729 if (!bError)
731 snew(kt, nn);
732 compute_derivative(nn, time, val[0], kt);
733 for (j = 0; (j < nn); j++)
735 kt[j] = -kt[j];
737 if (debug)
739 d2 = 0;
740 for (j = 0; (j < nn); j++)
742 d2 += gmx::square(kt[j] - val[3][j]);
744 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2/nn));
746 analyse_corr(nn, time, val[0], val[2], kt, nullptr, nullptr, nullptr, fit_start,
747 temp);
748 sfree(kt);
750 else if (nset == 6)
752 analyse_corr(nn, time, val[0], val[2], val[4],
753 val[1], val[3], val[5], fit_start, temp);
755 else
757 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
758 printf("Not doing anything. Sorry.\n");
762 static void filter(real flen, int n, int nset, real **val, real dt)
764 int f, s, i, j;
765 double *filt, sum, vf, fluc, fluctot;
767 f = static_cast<int>(flen/(2*dt));
768 snew(filt, f+1);
769 filt[0] = 1;
770 sum = 1;
771 for (i = 1; i <= f; i++)
773 filt[i] = std::cos(M_PI*dt*i/flen);
774 sum += 2*filt[i];
776 for (i = 0; i <= f; i++)
778 filt[i] /= sum;
780 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
781 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
782 fluctot = 0;
783 for (s = 0; s < nset; s++)
785 fluc = 0;
786 for (i = f; i < n-f; i++)
788 vf = filt[0]*val[s][i];
789 for (j = 1; j <= f; j++)
791 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
793 fluc += gmx::square(val[s][i] - vf);
795 fluc /= n - 2*f;
796 fluctot += fluc;
797 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, std::sqrt(fluc));
799 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot/nset));
800 fprintf(stdout, "\n");
802 sfree(filt);
805 static void do_fit(FILE *out, int n, gmx_bool bYdy,
806 int ny, real *x0, real **val,
807 int npargs, t_pargs *ppa, const gmx_output_env_t *oenv,
808 const char *fn_fitted)
810 real *c1 = nullptr, *sig = nullptr;
811 double *fitparm;
812 real tendfit, tbeginfit;
813 int i, efitfn, nparm;
815 efitfn = get_acffitfn();
816 nparm = effnNparams(efitfn);
817 fprintf(out, "Will fit to the following function:\n");
818 fprintf(out, "%s\n", effnDescription(efitfn));
819 c1 = val[n];
820 if (bYdy)
822 c1 = val[n];
823 sig = val[n+1];
824 fprintf(out, "Using two columns as y and sigma values\n");
826 else
828 snew(sig, ny);
830 if (opt2parg_bSet("-beginfit", npargs, ppa))
832 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
834 else
836 tbeginfit = x0[0];
838 if (opt2parg_bSet("-endfit", npargs, ppa))
840 tendfit = opt2parg_real("-endfit", npargs, ppa);
842 else
844 tendfit = x0[ny-1];
847 snew(fitparm, nparm);
848 switch (efitfn)
850 case effnEXP1:
851 fitparm[0] = 0.5;
852 break;
853 case effnEXP2:
854 fitparm[0] = 0.5;
855 fitparm[1] = c1[0];
856 break;
857 case effnEXPEXP:
858 fitparm[0] = 1.0;
859 fitparm[1] = 0.5*c1[0];
860 fitparm[2] = 10.0;
861 break;
862 case effnEXP5:
863 fitparm[0] = fitparm[2] = 0.5*c1[0];
864 fitparm[1] = 10;
865 fitparm[3] = 40;
866 fitparm[4] = 0;
867 break;
868 case effnEXP7:
869 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
870 fitparm[1] = 1;
871 fitparm[3] = 10;
872 fitparm[5] = 100;
873 fitparm[6] = 0;
874 break;
875 case effnEXP9:
876 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
877 fitparm[1] = 0.1;
878 fitparm[3] = 1;
879 fitparm[5] = 10;
880 fitparm[7] = 100;
881 fitparm[8] = 0;
882 break;
883 default:
884 fprintf(out, "Warning: don't know how to initialize the parameters\n");
885 for (i = 0; (i < nparm); i++)
887 fitparm[i] = 1;
890 fprintf(out, "Starting parameters:\n");
891 for (i = 0; (i < nparm); i++)
893 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
895 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
896 oenv, bDebugMode(), efitfn, fitparm, 0,
897 fn_fitted) > 0)
899 for (i = 0; (i < nparm); i++)
901 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
904 else
906 fprintf(out, "No solution was found\n");
910 static void print_fitted_function(const char *fitfile,
911 const char *fn_fitted,
912 gmx_bool bXYdy,
913 int nset,
914 int n,
915 real *t,
916 real **val,
917 int npargs,
918 t_pargs *ppa,
919 gmx_output_env_t *oenv)
921 FILE *out_fit = gmx_ffopen(fitfile, "w");
922 if (bXYdy && nset >= 2)
924 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
925 fn_fitted);
927 else
929 char *buf2 = nullptr;
930 int s, buflen = 0;
931 if (nullptr != fn_fitted)
933 buflen = std::strlen(fn_fitted)+32;
934 snew(buf2, buflen);
935 std::strncpy(buf2, fn_fitted, buflen);
936 buf2[std::strlen(buf2)-4] = '\0';
938 for (s = 0; s < nset; s++)
940 char *buf = nullptr;
941 if (nullptr != fn_fitted)
943 snew(buf, buflen);
944 snprintf(buf, n, "%s_%d.xvg", buf2, s);
946 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
947 sfree(buf);
949 sfree(buf2);
951 gmx_ffclose(out_fit);
954 int gmx_analyze(int argc, char *argv[])
956 static const char *desc[] = {
957 "[THISMODULE] reads an ASCII file and analyzes data sets.",
958 "A line in the input file may start with a time",
959 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
960 "Multiple sets can also be",
961 "read when they are separated by & (option [TT]-n[tt]);",
962 "in this case only one [IT]y[it]-value is read from each line.",
963 "All lines starting with # and @ are skipped.",
964 "All analyses can also be done for the derivative of a set",
965 "(option [TT]-d[tt]).[PAR]",
967 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
968 "points are equidistant in time.[PAR]",
970 "[THISMODULE] always shows the average and standard deviation of each",
971 "set, as well as the relative deviation of the third",
972 "and fourth cumulant from those of a Gaussian distribution with the same",
973 "standard deviation.[PAR]",
975 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
976 "Be sure that the time interval between data points is",
977 "much shorter than the time scale of the autocorrelation.[PAR]",
979 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
980 "i/2 periods. The formula is::",
982 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
984 "This is useful for principal components obtained from covariance",
985 "analysis, since the principal components of random diffusion are",
986 "pure cosines.[PAR]",
988 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
990 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
992 "Option [TT]-av[tt] produces the average over the sets.",
993 "Error bars can be added with the option [TT]-errbar[tt].",
994 "The errorbars can represent the standard deviation, the error",
995 "(assuming the points are independent) or the interval containing",
996 "90% of the points, by discarding 5% of the points at the top and",
997 "the bottom.[PAR]",
999 "Option [TT]-ee[tt] produces error estimates using block averaging.",
1000 "A set is divided in a number of blocks and averages are calculated for",
1001 "each block. The error for the total average is calculated from",
1002 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
1003 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
1004 "These errors are plotted as a function of the block size.",
1005 "Also an analytical block average curve is plotted, assuming",
1006 "that the autocorrelation is a sum of two exponentials.",
1007 "The analytical curve for the block average is::",
1009 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1010 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1012 "where T is the total time.",
1013 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1014 "When the actual block average is very close to the analytical curve,",
1015 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1016 "The complete derivation is given in",
1017 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1019 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1020 "of each set and over all sets with respect to a filtered average.",
1021 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1022 "to len/2. len is supplied with the option [TT]-filter[tt].",
1023 "This filter reduces oscillations with period len/2 and len by a factor",
1024 "of 0.79 and 0.33 respectively.[PAR]",
1026 "Option [TT]-g[tt] fits the data to the function given with option",
1027 "[TT]-fitfn[tt].[PAR]",
1029 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1030 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1031 "zero or with a negative value are ignored.[PAR]",
1033 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1034 "on output from [gmx-hbond]. The input file can be taken directly",
1035 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1036 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1037 "curves that make sense in the context of molecular dynamics, mainly",
1038 "exponential curves. More information is in the manual. To check the output",
1039 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1040 "original data and the fitted function to a new data file. The fitting",
1041 "parameters are stored as comment in the output file."
1043 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1044 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1045 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1046 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1047 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1048 static real temp = 298.15, fit_start = 1, fit_end = 60;
1050 /* must correspond to enum avbar* declared at beginning of file */
1051 static const char *avbar_opt[avbarNR+1] = {
1052 nullptr, "none", "stddev", "error", "90", nullptr
1055 t_pargs pa[] = {
1056 { "-time", FALSE, etBOOL, {&bHaveT},
1057 "Expect a time in the input" },
1058 { "-b", FALSE, etREAL, {&tb},
1059 "First time to read from set" },
1060 { "-e", FALSE, etREAL, {&te},
1061 "Last time to read from set" },
1062 { "-n", FALSE, etINT, {&nsets_in},
1063 "Read this number of sets separated by &" },
1064 { "-d", FALSE, etBOOL, {&bDer},
1065 "Use the derivative" },
1066 { "-dp", FALSE, etINT, {&d},
1067 "HIDDENThe derivative is the difference over this number of points" },
1068 { "-bw", FALSE, etREAL, {&binwidth},
1069 "Binwidth for the distribution" },
1070 { "-errbar", FALSE, etENUM, {avbar_opt},
1071 "Error bars for [TT]-av[tt]" },
1072 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1073 "Integrate data function(s) numerically using trapezium rule" },
1074 { "-aver_start", FALSE, etREAL, {&aver_start},
1075 "Start averaging the integral from here" },
1076 { "-xydy", FALSE, etBOOL, {&bXYdy},
1077 "Interpret second data set as error in the y values for integrating" },
1078 { "-regression", FALSE, etBOOL, {&bRegression},
1079 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1080 { "-luzar", FALSE, etBOOL, {&bLuzar},
1081 "Do a Luzar and Chandler analysis on a correlation function and "
1082 "related as produced by [gmx-hbond]. When in addition the "
1083 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1084 "interpreted as errors in c(t) and n(t)." },
1085 { "-temp", FALSE, etREAL, {&temp},
1086 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1087 { "-fitstart", FALSE, etREAL, {&fit_start},
1088 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1089 { "-fitend", FALSE, etREAL, {&fit_end},
1090 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1091 { "-nbmin", FALSE, etINT, {&nb_min},
1092 "HIDDENMinimum number of blocks for block averaging" },
1093 { "-resol", FALSE, etINT, {&resol},
1094 "HIDDENResolution for the block averaging, block size increases with"
1095 " a factor 2^(1/resol)" },
1096 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1097 "HIDDENAlways use a single exponential fit for the error estimate" },
1098 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1099 "HIDDENAllow a negative long-time correlation" },
1100 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1101 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1102 { "-filter", FALSE, etREAL, {&filtlen},
1103 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1104 { "-power", FALSE, etBOOL, {&bPower},
1105 "Fit data to: b t^a" },
1106 { "-subav", FALSE, etBOOL, {&bSubAv},
1107 "Subtract the average before autocorrelating" },
1108 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1109 "Calculate one ACF over all sets" },
1111 #define NPA asize(pa)
1113 FILE *out;
1114 int n, nlast, s, nset, i, j = 0;
1115 real **val, *t, dt, tot, error;
1116 double *av, *sig, cum1, cum2, cum3, cum4, db;
1117 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1118 gmx_output_env_t *oenv;
1120 t_filenm fnm[] = {
1121 { efXVG, "-f", "graph", ffREAD },
1122 { efXVG, "-ac", "autocorr", ffOPTWR },
1123 { efXVG, "-msd", "msd", ffOPTWR },
1124 { efXVG, "-cc", "coscont", ffOPTWR },
1125 { efXVG, "-dist", "distr", ffOPTWR },
1126 { efXVG, "-av", "average", ffOPTWR },
1127 { efXVG, "-ee", "errest", ffOPTWR },
1128 { efXVG, "-fitted", "fitted", ffOPTWR },
1129 { efLOG, "-g", "fitlog", ffOPTWR }
1131 #define NFILE asize(fnm)
1133 int npargs;
1134 t_pargs *ppa;
1136 npargs = asize(pa);
1137 ppa = add_acf_pargs(&npargs, pa);
1139 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1140 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, nullptr, &oenv))
1142 sfree(ppa);
1143 return 0;
1146 acfile = opt2fn_null("-ac", NFILE, fnm);
1147 msdfile = opt2fn_null("-msd", NFILE, fnm);
1148 ccfile = opt2fn_null("-cc", NFILE, fnm);
1149 distfile = opt2fn_null("-dist", NFILE, fnm);
1150 avfile = opt2fn_null("-av", NFILE, fnm);
1151 eefile = opt2fn_null("-ee", NFILE, fnm);
1152 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == nullptr)
1154 fitfile = opt2fn("-g", NFILE, fnm);
1156 else
1158 fitfile = opt2fn_null("-g", NFILE, fnm);
1161 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1162 opt2parg_bSet("-b", npargs, ppa), tb,
1163 opt2parg_bSet("-e", npargs, ppa), te,
1164 nsets_in, &nset, &n, &dt, &t);
1165 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1167 if (bDer)
1169 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1170 d, d);
1171 n -= d;
1172 for (s = 0; s < nset; s++)
1174 for (i = 0; (i < n); i++)
1176 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1181 if (bIntegrate)
1183 real sum, stddev;
1185 printf("Calculating the integral using the trapezium rule\n");
1187 if (bXYdy)
1189 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1190 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1192 else
1194 for (s = 0; s < nset; s++)
1196 sum = evaluate_integral(n, t, val[s], nullptr, aver_start, &stddev);
1197 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1202 if (fitfile != nullptr)
1204 print_fitted_function(fitfile,
1205 opt2fn_null("-fitted", NFILE, fnm),
1206 bXYdy, nset,
1207 n, t, val,
1208 npargs, ppa,
1209 oenv);
1212 printf(" std. dev. relative deviation of\n");
1213 printf(" standard --------- cumulants from those of\n");
1214 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1215 printf(" cum. 3 cum. 4\n");
1216 snew(av, nset);
1217 snew(sig, nset);
1218 for (s = 0; (s < nset); s++)
1220 cum1 = 0;
1221 cum2 = 0;
1222 cum3 = 0;
1223 cum4 = 0;
1224 for (i = 0; (i < n); i++)
1226 cum1 += val[s][i];
1228 cum1 /= n;
1229 for (i = 0; (i < n); i++)
1231 db = val[s][i]-cum1;
1232 cum2 += db*db;
1233 cum3 += db*db*db;
1234 cum4 += db*db*db*db;
1236 cum2 /= n;
1237 cum3 /= n;
1238 cum4 /= n;
1239 av[s] = cum1;
1240 sig[s] = std::sqrt(cum2);
1241 if (n > 1)
1243 error = std::sqrt(cum2/(n-1));
1245 else
1247 error = 0;
1249 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1250 s+1, av[s], sig[s], error,
1251 sig[s] != 0.0 ? cum3/(sig[s]*sig[s]*sig[s]*std::sqrt(8/M_PI)) : 0,
1252 sig[s] != 0.0 ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1254 printf("\n");
1256 if (filtlen != 0.0F)
1258 filter(filtlen, n, nset, val, dt);
1261 if (msdfile)
1263 out = xvgropen(msdfile, "Mean square displacement",
1264 "time", "MSD (nm\\S2\\N)", oenv);
1265 nlast = static_cast<int>(n*frac);
1266 for (s = 0; s < nset; s++)
1268 for (j = 0; j <= nlast; j++)
1270 if (j % 100 == 0)
1272 fprintf(stderr, "\r%d", j);
1273 fflush(stderr);
1275 tot = 0;
1276 for (i = 0; i < n-j; i++)
1278 tot += gmx::square(val[s][i]-val[s][i+j]);
1280 tot /= (n-j);
1281 fprintf(out, " %g %8g\n", dt*j, tot);
1283 if (s < nset-1)
1285 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1288 xvgrclose(out);
1289 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1290 fflush(stderr);
1292 if (ccfile)
1294 plot_coscont(ccfile, n, nset, val, oenv);
1297 if (distfile)
1299 histogram(distfile, binwidth, n, nset, val, oenv);
1301 if (avfile)
1303 average(avfile, nenum(avbar_opt), n, nset, val, t);
1305 if (eefile)
1307 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1308 bEeFitAc, bEESEF, bEENLC, oenv);
1310 if (bPower)
1312 power_fit(n, nset, val, t);
1315 if (acfile != nullptr)
1317 if (bSubAv)
1319 for (s = 0; s < nset; s++)
1321 for (i = 0; i < n; i++)
1323 val[s][i] -= av[s];
1327 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1328 eacNormal, bAverCorr);
1331 if (bRegression)
1333 regression_analysis(n, bXYdy, t, nset, val);
1336 if (bLuzar)
1338 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1341 view_all(oenv, NFILE, fnm);
1343 return 0;