Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / gmxana / gmx_analyze.cpp
blob4e98ff3d98b57c353ed4e8de60eae11f4599ee0b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <cmath>
40 #include <cstdlib>
41 #include <cstring>
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/commandline/viewit.h"
45 #include "gromacs/correlationfunctions/autocorr.h"
46 #include "gromacs/correlationfunctions/expfit.h"
47 #include "gromacs/correlationfunctions/integrate.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/gmxana/gstat.h"
51 #include "gromacs/linearalgebra/matrix.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/statistics/statistics.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/pleasecite.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/snprintf.h"
63 /* must correspond to char *avbar_opt[] declared in main() */
64 enum {
65 avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR
68 static void power_fit(int n, int nset, real **val, real *t)
70 real *x, *y, quality, a, b, r;
71 int s, i;
73 snew(x, n);
74 snew(y, n);
76 if (t[0] > 0)
78 for (i = 0; i < n; i++)
80 if (t[0] > 0)
82 x[i] = std::log(t[i]);
86 else
88 fprintf(stdout, "First time is not larger than 0, using index number as time for power fit\n");
89 for (i = 0; i < n; i++)
91 x[i] = std::log1p(static_cast<real>(i));
95 for (s = 0; s < nset; s++)
97 for (i = 0; i < n && val[s][i] >= 0; i++)
99 y[i] = std::log(val[s][i]);
101 if (i < n)
103 fprintf(stdout, "Will power fit up to point %d, since it is not larger than 0\n", i);
105 lsq_y_ax_b(i, x, y, &a, &b, &r, &quality);
106 fprintf(stdout, "Power fit set %3d: error %.3f a %g b %g\n",
107 s+1, quality, a, std::exp(b));
110 sfree(y);
111 sfree(x);
114 static real cosine_content(int nhp, int n, const real *y)
115 /* Assumes n equidistant points */
117 double fac, cosyint, yyint;
118 int i;
120 if (n < 2)
122 return 0;
125 fac = M_PI*nhp/(n-1);
127 cosyint = 0;
128 yyint = 0;
129 for (i = 0; i < n; i++)
131 cosyint += std::cos(fac*i)*y[i];
132 yyint += y[i]*y[i];
135 return 2*cosyint*cosyint/(n*yyint);
138 static void plot_coscont(const char *ccfile, int n, int nset, real **val,
139 const gmx_output_env_t *oenv)
141 FILE *fp;
142 int s;
143 real cc;
145 fp = xvgropen(ccfile, "Cosine content", "set / half periods", "cosine content",
146 oenv);
148 for (s = 0; s < nset; s++)
150 cc = cosine_content(s+1, n, val[s]);
151 fprintf(fp, " %d %g\n", s+1, cc);
152 fprintf(stdout, "Cosine content of set %d with %.1f periods: %g\n",
153 s+1, 0.5*(s+1), cc);
155 fprintf(stdout, "\n");
157 xvgrclose(fp);
160 static void regression_analysis(int n, gmx_bool bXYdy,
161 real *x, int nset, real **val)
163 real S, chi2, a, b, da, db, r = 0;
164 int ok;
166 if (bXYdy || (nset == 1))
168 printf("Fitting data to a function f(x) = ax + b\n");
169 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
170 printf("Error estimates will be given if w_i (sigma) values are given\n");
171 printf("(use option -xydy).\n\n");
172 if (bXYdy)
174 if ((ok = lsq_y_ax_b_error(n, x, val[0], val[1], &a, &b, &da, &db, &r, &S)) != estatsOK)
176 gmx_fatal(FARGS, "Error fitting the data: %s",
177 gmx_stats_message(ok));
180 else
182 if ((ok = lsq_y_ax_b(n, x, val[0], &a, &b, &r, &S)) != estatsOK)
184 gmx_fatal(FARGS, "Error fitting the data: %s",
185 gmx_stats_message(ok));
188 chi2 = gmx::square((n-2)*S);
189 printf("Chi2 = %g\n", chi2);
190 printf("S (Sqrt(Chi2/(n-2)) = %g\n", S);
191 printf("Correlation coefficient = %.1f%%\n", 100*r);
192 printf("\n");
193 if (bXYdy)
195 printf("a = %g +/- %g\n", a, da);
196 printf("b = %g +/- %g\n", b, db);
198 else
200 printf("a = %g\n", a);
201 printf("b = %g\n", b);
204 else
206 double chi2, *a, **xx, *y;
207 int i, j;
209 snew(y, n);
210 snew(xx, nset-1);
211 for (j = 0; (j < nset-1); j++)
213 snew(xx[j], n);
215 for (i = 0; (i < n); i++)
217 y[i] = val[0][i];
218 for (j = 1; (j < nset); j++)
220 xx[j-1][i] = val[j][i];
223 snew(a, nset-1);
224 chi2 = multi_regression(nullptr, n, y, nset-1, xx, a);
225 printf("Fitting %d data points in %d sets\n", n, nset-1);
226 printf("chi2 = %g\n", chi2);
227 printf("A =");
228 for (i = 0; (i < nset-1); i++)
230 printf(" %g", a[i]);
231 sfree(xx[i]);
233 printf("\n");
234 sfree(xx);
235 sfree(y);
236 sfree(a);
240 static void histogram(const char *distfile, real binwidth, int n, int nset, real **val,
241 const gmx_output_env_t *oenv)
243 FILE *fp;
244 int i, s;
245 double minval, maxval;
246 int nbin;
247 int64_t *histo;
249 minval = val[0][0];
250 maxval = val[0][0];
251 for (s = 0; s < nset; s++)
253 for (i = 0; i < n; i++)
255 if (val[s][i] < minval)
257 minval = val[s][i];
259 else if (val[s][i] > maxval)
261 maxval = val[s][i];
266 minval = binwidth*std::floor(minval/binwidth);
267 maxval = binwidth*std::ceil(maxval/binwidth);
268 if (minval != 0)
270 minval -= binwidth;
272 maxval += binwidth;
274 nbin = gmx::roundToInt(((maxval - minval)/binwidth) + 1);
275 fprintf(stderr, "Making distributions with %d bins\n", nbin);
276 snew(histo, nbin);
277 fp = xvgropen(distfile, "Distribution", "", "", oenv);
278 for (s = 0; s < nset; s++)
280 for (i = 0; i < nbin; i++)
282 histo[i] = 0;
284 for (i = 0; i < n; i++)
286 histo[gmx::roundToInt((val[s][i] - minval)/binwidth)]++;
288 for (i = 0; i < nbin; i++)
290 fprintf(fp, " %g %g\n", minval+i*binwidth, static_cast<double>(histo[i])/(n*binwidth));
292 if (s < nset-1)
294 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
297 xvgrclose(fp);
300 static int real_comp(const void *a, const void *b)
302 real dif = *reinterpret_cast<const real*>(a) - *reinterpret_cast<const real*>(b);
304 if (dif < 0)
306 return -1;
308 else if (dif > 0)
310 return 1;
312 else
314 return 0;
318 static void average(const char *avfile, int avbar_opt,
319 int n, int nset, real **val, real *t)
321 FILE *fp;
322 int i, s, edge = 0;
323 double av, var, err;
324 real *tmp = nullptr;
326 fp = gmx_ffopen(avfile, "w");
327 if ((avbar_opt == avbarERROR) && (nset == 1))
329 avbar_opt = avbarNONE;
331 if (avbar_opt != avbarNONE)
333 if (avbar_opt == avbar90)
335 snew(tmp, nset);
336 fprintf(fp, "@TYPE xydydy\n");
337 edge = gmx::roundToInt(nset*0.05);
338 fprintf(stdout, "Errorbars: discarding %d points on both sides: %d%%"
339 " interval\n", edge, gmx::roundToInt(100.*(nset-2*edge)/nset));
341 else
343 fprintf(fp, "@TYPE xydy\n");
347 for (i = 0; i < n; i++)
349 av = 0;
350 for (s = 0; s < nset; s++)
352 av += val[s][i];
354 av /= nset;
355 fprintf(fp, " %g %g", t[i], av);
356 var = 0;
357 if (avbar_opt != avbarNONE)
359 if (avbar_opt == avbar90)
361 for (s = 0; s < nset; s++)
363 tmp[s] = val[s][i];
365 qsort(tmp, nset, sizeof(tmp[0]), real_comp);
366 fprintf(fp, " %g %g", tmp[nset-1-edge]-av, av-tmp[edge]);
368 else
370 for (s = 0; s < nset; s++)
372 var += gmx::square(val[s][i]-av);
374 if (avbar_opt == avbarSTDDEV)
376 err = std::sqrt(var/nset);
378 else
380 err = std::sqrt(var/(nset*(nset-1)));
382 fprintf(fp, " %g", err);
385 fprintf(fp, "\n");
387 gmx_ffclose(fp);
389 if (avbar_opt == avbar90)
391 sfree(tmp);
395 /*! \brief Compute final error estimate.
397 * See Eqn A17, Hess, JCP 116 (2002) 209-217 for details.
399 static real optimal_error_estimate(double sigma, const double fitparm[], real tTotal)
401 double ss = fitparm[1]*fitparm[0]+(1-fitparm[1])*fitparm[2];
402 if ((tTotal <= 0) || (ss <= 0))
404 fprintf(stderr, "Problem in error estimate: T = %g, ss = %g\n",
405 tTotal, ss);
406 return 0;
409 return sigma*std::sqrt(2*ss/tTotal);
412 static void estimate_error(const char *eefile, int nb_min, int resol, int n,
413 int nset, double *av, double *sig, real **val, real dt,
414 gmx_bool bFitAc, gmx_bool bSingleExpFit, gmx_bool bAllowNegLTCorr,
415 const gmx_output_env_t *oenv)
417 FILE *fp;
418 int bs, prev_bs, nbs, nb;
419 real spacing, nbr;
420 int s, i, j;
421 double blav, var;
422 char **leg;
423 real *tbs, *ybs, rtmp, dens, *fitsig, twooe, tau1_est, tau_sig;
424 double fitparm[3];
425 real ee, a, tau1, tau2;
427 if (n < 4)
429 fprintf(stdout, "The number of points is smaller than 4, can not make an error estimate\n");
431 return;
434 fp = xvgropen(eefile, "Error estimates",
435 "Block size (time)", "Error estimate", oenv);
436 if (output_env_get_print_xvgr_codes(oenv))
438 fprintf(fp,
439 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
440 (n-1)*dt, n);
442 snew(leg, 2*nset);
443 xvgr_legend(fp, 2*nset, leg, oenv);
444 sfree(leg);
446 spacing = std::pow(2.0, 1.0/resol);
447 snew(tbs, n);
448 snew(ybs, n);
449 snew(fitsig, n);
450 for (s = 0; s < nset; s++)
452 nbs = 0;
453 prev_bs = 0;
454 nbr = nb_min;
455 while (nbr <= n)
457 bs = n/static_cast<int>(nbr);
458 if (bs != prev_bs)
460 nb = n/bs;
461 var = 0;
462 for (i = 0; i < nb; i++)
464 blav = 0;
465 for (j = 0; j < bs; j++)
467 blav += val[s][bs*i+j];
469 var += gmx::square(av[s] - blav/bs);
471 tbs[nbs] = bs*dt;
472 if (sig[s] == 0)
474 ybs[nbs] = 0;
476 else
478 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
480 nbs++;
482 nbr *= spacing;
483 prev_bs = bs;
485 if (sig[s] == 0)
487 ee = 0;
488 a = 1;
489 tau1 = 0;
490 tau2 = 0;
492 else
494 for (i = 0; i < nbs/2; i++)
496 rtmp = tbs[i];
497 tbs[i] = tbs[nbs-1-i];
498 tbs[nbs-1-i] = rtmp;
499 rtmp = ybs[i];
500 ybs[i] = ybs[nbs-1-i];
501 ybs[nbs-1-i] = rtmp;
503 /* The initial slope of the normalized ybs^2 is 1.
504 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
505 * From this we take our initial guess for tau1.
507 twooe = 2.0/std::exp(1.0);
508 i = -1;
511 i++;
512 tau1_est = tbs[i];
514 while (i < nbs - 1 &&
515 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
517 if (ybs[0] > ybs[1])
519 fprintf(stdout, "Data set %d has strange time correlations:\n"
520 "the std. error using single points is larger than that of blocks of 2 points\n"
521 "The error estimate might be inaccurate, check the fit\n",
522 s+1);
523 /* Use the total time as tau for the fitting weights */
524 tau_sig = (n - 1)*dt;
526 else
528 tau_sig = tau1_est;
531 if (debug)
533 fprintf(debug, "set %d tau1 estimate %f\n", s+1, tau1_est);
536 /* Generate more or less appropriate sigma's,
537 * also taking the density of points into account.
539 for (i = 0; i < nbs; i++)
541 if (i == 0)
543 dens = tbs[1]/tbs[0] - 1;
545 else if (i == nbs-1)
547 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
549 else
551 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
553 fitsig[i] = std::sqrt((tau_sig + tbs[i])/dens);
556 if (!bSingleExpFit)
558 fitparm[0] = tau1_est;
559 fitparm[1] = 0.95;
560 /* We set the initial guess for tau2
561 * to halfway between tau1_est and the total time (on log scale).
563 fitparm[2] = std::sqrt(tau1_est*(n-1)*dt);
564 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv,
565 bDebugMode(), effnERREST, fitparm, 0,
566 nullptr);
568 if (bSingleExpFit || fitparm[0] < 0 || fitparm[2] < 0 || fitparm[1] < 0
569 || (fitparm[1] > 1 && !bAllowNegLTCorr) || fitparm[2] > (n-1)*dt)
571 if (!bSingleExpFit)
573 if (fitparm[2] > (n-1)*dt)
575 fprintf(stdout,
576 "Warning: tau2 is longer than the length of the data (%g)\n"
577 " the statistics might be bad\n",
578 (n-1)*dt);
580 else
582 fprintf(stdout, "a fitted parameter is negative\n");
584 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
585 optimal_error_estimate(sig[s], fitparm, n*dt),
586 fitparm[1], fitparm[0], fitparm[2]);
587 /* Do a fit with tau2 fixed at the total time.
588 * One could also choose any other large value for tau2.
590 fitparm[0] = tau1_est;
591 fitparm[1] = 0.95;
592 fitparm[2] = (n-1)*dt;
593 fprintf(stdout, "Will fix tau2 at the total time: %g\n", fitparm[2]);
594 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
595 effnERREST, fitparm, 4, nullptr);
597 if (bSingleExpFit || fitparm[0] < 0 || fitparm[1] < 0
598 || (fitparm[1] > 1 && !bAllowNegLTCorr))
600 if (!bSingleExpFit)
602 fprintf(stdout, "a fitted parameter is negative\n");
603 fprintf(stdout, "invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
604 optimal_error_estimate(sig[s], fitparm, n*dt),
605 fitparm[1], fitparm[0], fitparm[2]);
607 /* Do a single exponential fit */
608 fprintf(stderr, "Will use a single exponential fit for set %d\n", s+1);
609 fitparm[0] = tau1_est;
610 fitparm[1] = 1.0;
611 fitparm[2] = 0.0;
612 do_lmfit(nbs, ybs, fitsig, 0, tbs, 0, dt*n, oenv, bDebugMode(),
613 effnERREST, fitparm, 6, nullptr);
616 ee = optimal_error_estimate(sig[s], fitparm, n*dt);
617 a = fitparm[1];
618 tau1 = fitparm[0];
619 tau2 = fitparm[2];
621 fprintf(stdout, "Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
622 s+1, ee, a, tau1, tau2);
623 if (output_env_get_xvg_format(oenv) == exvgXMGR)
625 fprintf(fp, "@ legend string %d \"av %f\"\n", 2*s, av[s]);
626 fprintf(fp, "@ legend string %d \"ee %6g\"\n", 2*s+1,
627 optimal_error_estimate(sig[s], fitparm, n*dt));
629 else if (output_env_get_xvg_format(oenv) == exvgXMGRACE)
631 fprintf(fp, "@ s%d legend \"av %f\"\n", 2*s, av[s]);
632 fprintf(fp, "@ s%d legend \"ee %6g\"\n", 2*s+1,
633 optimal_error_estimate(sig[s], fitparm, n*dt));
635 for (i = 0; i < nbs; i++)
637 fprintf(fp, "%g %g %g\n", tbs[i], sig[s]*std::sqrt(ybs[i]/(n*dt)),
638 sig[s]*std::sqrt(fit_function(effnERREST, fitparm, tbs[i])/(n*dt)));
641 if (bFitAc)
643 int fitlen;
644 real *ac, acint;
645 double ac_fit[4];
647 snew(ac, n);
648 for (i = 0; i < n; i++)
650 ac[i] = val[s][i] - av[s];
651 if (i > 0)
653 fitsig[i] = std::sqrt(static_cast<real>(i));
655 else
657 fitsig[i] = 1;
660 low_do_autocorr(nullptr, oenv, nullptr, n, 1, -1, &ac,
661 dt, eacNormal, 1, FALSE, TRUE,
662 FALSE, 0, 0, effnNONE);
664 fitlen = n/nb_min;
666 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
667 acint = 0.5*ac[0];
668 for (i = 1; i <= fitlen/2; i++)
670 acint += ac[i];
672 acint *= dt;
674 /* Generate more or less appropriate sigma's */
675 for (i = 0; i <= fitlen; i++)
677 fitsig[i] = std::sqrt(acint + dt*i);
680 ac_fit[0] = 0.5*acint;
681 ac_fit[1] = 0.95;
682 ac_fit[2] = 10*acint;
683 do_lmfit(n/nb_min, ac, fitsig, dt, nullptr, 0, fitlen*dt, oenv,
684 bDebugMode(), effnEXPEXP, ac_fit, 0, nullptr);
685 ac_fit[3] = 1 - ac_fit[1];
687 fprintf(stdout, "Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
688 s+1, optimal_error_estimate(sig[s], ac_fit, n*dt),
689 ac_fit[1], ac_fit[0], ac_fit[2]);
691 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
692 for (i = 0; i < nbs; i++)
694 fprintf(fp, "%g %g\n", tbs[i],
695 sig[s]*std::sqrt(fit_function(effnERREST, ac_fit, tbs[i]))/(n*dt));
698 sfree(ac);
700 if (s < nset-1)
702 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
705 sfree(fitsig);
706 sfree(ybs);
707 sfree(tbs);
708 xvgrclose(fp);
711 static void luzar_correl(int nn, real *time, int nset, real **val, real temp,
712 gmx_bool bError, real fit_start)
714 real *kt;
715 real d2;
716 int j;
718 please_cite(stdout, "Spoel2006b");
720 /* Compute negative derivative k(t) = -dc(t)/dt */
721 if (!bError)
723 snew(kt, nn);
724 compute_derivative(nn, time, val[0], kt);
725 for (j = 0; (j < nn); j++)
727 kt[j] = -kt[j];
729 if (debug)
731 d2 = 0;
732 for (j = 0; (j < nn); j++)
734 d2 += gmx::square(kt[j] - val[3][j]);
736 fprintf(debug, "RMS difference in derivatives is %g\n", std::sqrt(d2/nn));
738 analyse_corr(nn, time, val[0], val[2], kt, nullptr, nullptr, nullptr, fit_start,
739 temp);
740 sfree(kt);
742 else if (nset == 6)
744 analyse_corr(nn, time, val[0], val[2], val[4],
745 val[1], val[3], val[5], fit_start, temp);
747 else
749 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
750 printf("Not doing anything. Sorry.\n");
754 static void filter(real flen, int n, int nset, real **val, real dt)
756 int f, s, i, j;
757 double *filt, sum, vf, fluc, fluctot;
759 f = static_cast<int>(flen/(2*dt));
760 snew(filt, f+1);
761 filt[0] = 1;
762 sum = 1;
763 for (i = 1; i <= f; i++)
765 filt[i] = std::cos(M_PI*dt*i/flen);
766 sum += 2*filt[i];
768 for (i = 0; i <= f; i++)
770 filt[i] /= sum;
772 fprintf(stdout, "Will calculate the fluctuation over %d points\n", n-2*f);
773 fprintf(stdout, " using a filter of length %g of %d points\n", flen, 2*f+1);
774 fluctot = 0;
775 for (s = 0; s < nset; s++)
777 fluc = 0;
778 for (i = f; i < n-f; i++)
780 vf = filt[0]*val[s][i];
781 for (j = 1; j <= f; j++)
783 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
785 fluc += gmx::square(val[s][i] - vf);
787 fluc /= n - 2*f;
788 fluctot += fluc;
789 fprintf(stdout, "Set %3d filtered fluctuation: %12.6e\n", s+1, std::sqrt(fluc));
791 fprintf(stdout, "Overall filtered fluctuation: %12.6e\n", std::sqrt(fluctot/nset));
792 fprintf(stdout, "\n");
794 sfree(filt);
797 static void do_fit(FILE *out, int n, gmx_bool bYdy,
798 int ny, real *x0, real **val,
799 int npargs, t_pargs *ppa, const gmx_output_env_t *oenv,
800 const char *fn_fitted)
802 real *c1 = nullptr, *sig = nullptr;
803 double *fitparm;
804 real tendfit, tbeginfit;
805 int i, efitfn, nparm;
807 efitfn = get_acffitfn();
808 nparm = effnNparams(efitfn);
809 fprintf(out, "Will fit to the following function:\n");
810 fprintf(out, "%s\n", effnDescription(efitfn));
811 c1 = val[n];
812 if (bYdy)
814 c1 = val[n];
815 sig = val[n+1];
816 fprintf(out, "Using two columns as y and sigma values\n");
818 else
820 snew(sig, ny);
822 if (opt2parg_bSet("-beginfit", npargs, ppa))
824 tbeginfit = opt2parg_real("-beginfit", npargs, ppa);
826 else
828 tbeginfit = x0[0];
830 if (opt2parg_bSet("-endfit", npargs, ppa))
832 tendfit = opt2parg_real("-endfit", npargs, ppa);
834 else
836 tendfit = x0[ny-1];
839 snew(fitparm, nparm);
840 switch (efitfn)
842 case effnEXP1:
843 fitparm[0] = 0.5;
844 break;
845 case effnEXP2:
846 fitparm[0] = 0.5;
847 fitparm[1] = c1[0];
848 break;
849 case effnEXPEXP:
850 fitparm[0] = 1.0;
851 fitparm[1] = 0.5*c1[0];
852 fitparm[2] = 10.0;
853 break;
854 case effnEXP5:
855 fitparm[0] = fitparm[2] = 0.5*c1[0];
856 fitparm[1] = 10;
857 fitparm[3] = 40;
858 fitparm[4] = 0;
859 break;
860 case effnEXP7:
861 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
862 fitparm[1] = 1;
863 fitparm[3] = 10;
864 fitparm[5] = 100;
865 fitparm[6] = 0;
866 break;
867 case effnEXP9:
868 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
869 fitparm[1] = 0.1;
870 fitparm[3] = 1;
871 fitparm[5] = 10;
872 fitparm[7] = 100;
873 fitparm[8] = 0;
874 break;
875 default:
876 fprintf(out, "Warning: don't know how to initialize the parameters\n");
877 for (i = 0; (i < nparm); i++)
879 fitparm[i] = 1;
882 fprintf(out, "Starting parameters:\n");
883 for (i = 0; (i < nparm); i++)
885 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
887 if (do_lmfit(ny, c1, sig, 0, x0, tbeginfit, tendfit,
888 oenv, bDebugMode(), efitfn, fitparm, 0,
889 fn_fitted) > 0)
891 for (i = 0; (i < nparm); i++)
893 fprintf(out, "a%-2d = %12.5e\n", i+1, fitparm[i]);
896 else
898 fprintf(out, "No solution was found\n");
902 static void print_fitted_function(const char *fitfile,
903 const char *fn_fitted,
904 gmx_bool bXYdy,
905 int nset,
906 int n,
907 real *t,
908 real **val,
909 int npargs,
910 t_pargs *ppa,
911 gmx_output_env_t *oenv)
913 FILE *out_fit = gmx_ffopen(fitfile, "w");
914 if (bXYdy && nset >= 2)
916 do_fit(out_fit, 0, TRUE, n, t, val, npargs, ppa, oenv,
917 fn_fitted);
919 else
921 char *buf2 = nullptr;
922 int s, buflen = 0;
923 if (nullptr != fn_fitted)
925 buflen = std::strlen(fn_fitted)+32;
926 snew(buf2, buflen);
927 std::strncpy(buf2, fn_fitted, buflen);
928 buf2[std::strlen(buf2)-4] = '\0';
930 for (s = 0; s < nset; s++)
932 char *buf = nullptr;
933 if (nullptr != fn_fitted)
935 snew(buf, buflen);
936 snprintf(buf, n, "%s_%d.xvg", buf2, s);
938 do_fit(out_fit, s, FALSE, n, t, val, npargs, ppa, oenv, buf);
939 sfree(buf);
941 sfree(buf2);
943 gmx_ffclose(out_fit);
946 int gmx_analyze(int argc, char *argv[])
948 static const char *desc[] = {
949 "[THISMODULE] reads an ASCII file and analyzes data sets.",
950 "A line in the input file may start with a time",
951 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
952 "Multiple sets can also be",
953 "read when they are separated by & (option [TT]-n[tt]);",
954 "in this case only one [IT]y[it]-value is read from each line.",
955 "All lines starting with # and @ are skipped.",
956 "All analyses can also be done for the derivative of a set",
957 "(option [TT]-d[tt]).[PAR]",
959 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
960 "points are equidistant in time.[PAR]",
962 "[THISMODULE] always shows the average and standard deviation of each",
963 "set, as well as the relative deviation of the third",
964 "and fourth cumulant from those of a Gaussian distribution with the same",
965 "standard deviation.[PAR]",
967 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
968 "Be sure that the time interval between data points is",
969 "much shorter than the time scale of the autocorrelation.[PAR]",
971 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
972 "i/2 periods. The formula is::",
974 " [MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math]",
976 "This is useful for principal components obtained from covariance",
977 "analysis, since the principal components of random diffusion are",
978 "pure cosines.[PAR]",
980 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
982 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
984 "Option [TT]-av[tt] produces the average over the sets.",
985 "Error bars can be added with the option [TT]-errbar[tt].",
986 "The errorbars can represent the standard deviation, the error",
987 "(assuming the points are independent) or the interval containing",
988 "90% of the points, by discarding 5% of the points at the top and",
989 "the bottom.[PAR]",
991 "Option [TT]-ee[tt] produces error estimates using block averaging.",
992 "A set is divided in a number of blocks and averages are calculated for",
993 "each block. The error for the total average is calculated from",
994 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
995 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
996 "These errors are plotted as a function of the block size.",
997 "Also an analytical block average curve is plotted, assuming",
998 "that the autocorrelation is a sum of two exponentials.",
999 "The analytical curve for the block average is::",
1001 " [MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +",
1002 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],",
1004 "where T is the total time.",
1005 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
1006 "When the actual block average is very close to the analytical curve,",
1007 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
1008 "The complete derivation is given in",
1009 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
1011 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
1012 "of each set and over all sets with respect to a filtered average.",
1013 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
1014 "to len/2. len is supplied with the option [TT]-filter[tt].",
1015 "This filter reduces oscillations with period len/2 and len by a factor",
1016 "of 0.79 and 0.33 respectively.[PAR]",
1018 "Option [TT]-g[tt] fits the data to the function given with option",
1019 "[TT]-fitfn[tt].[PAR]",
1021 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
1022 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
1023 "zero or with a negative value are ignored.[PAR]",
1025 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
1026 "on output from [gmx-hbond]. The input file can be taken directly",
1027 "from [TT]gmx hbond -ac[tt], and then the same result should be produced.[PAR]",
1028 "Option [TT]-fitfn[tt] performs curve fitting to a number of different",
1029 "curves that make sense in the context of molecular dynamics, mainly",
1030 "exponential curves. More information is in the manual. To check the output",
1031 "of the fitting procedure the option [TT]-fitted[tt] will print both the",
1032 "original data and the fitted function to a new data file. The fitting",
1033 "parameters are stored as comment in the output file."
1035 static real tb = -1, te = -1, frac = 0.5, filtlen = 0, binwidth = 0.1, aver_start = 0;
1036 static gmx_bool bHaveT = TRUE, bDer = FALSE, bSubAv = TRUE, bAverCorr = FALSE, bXYdy = FALSE;
1037 static gmx_bool bEESEF = FALSE, bEENLC = FALSE, bEeFitAc = FALSE, bPower = FALSE;
1038 static gmx_bool bIntegrate = FALSE, bRegression = FALSE, bLuzar = FALSE;
1039 static int nsets_in = 1, d = 1, nb_min = 4, resol = 10;
1040 static real temp = 298.15, fit_start = 1, fit_end = 60;
1042 /* must correspond to enum avbar* declared at beginning of file */
1043 static const char *avbar_opt[avbarNR+1] = {
1044 nullptr, "none", "stddev", "error", "90", nullptr
1047 t_pargs pa[] = {
1048 { "-time", FALSE, etBOOL, {&bHaveT},
1049 "Expect a time in the input" },
1050 { "-b", FALSE, etREAL, {&tb},
1051 "First time to read from set" },
1052 { "-e", FALSE, etREAL, {&te},
1053 "Last time to read from set" },
1054 { "-n", FALSE, etINT, {&nsets_in},
1055 "Read this number of sets separated by &" },
1056 { "-d", FALSE, etBOOL, {&bDer},
1057 "Use the derivative" },
1058 { "-dp", FALSE, etINT, {&d},
1059 "HIDDENThe derivative is the difference over this number of points" },
1060 { "-bw", FALSE, etREAL, {&binwidth},
1061 "Binwidth for the distribution" },
1062 { "-errbar", FALSE, etENUM, {avbar_opt},
1063 "Error bars for [TT]-av[tt]" },
1064 { "-integrate", FALSE, etBOOL, {&bIntegrate},
1065 "Integrate data function(s) numerically using trapezium rule" },
1066 { "-aver_start", FALSE, etREAL, {&aver_start},
1067 "Start averaging the integral from here" },
1068 { "-xydy", FALSE, etBOOL, {&bXYdy},
1069 "Interpret second data set as error in the y values for integrating" },
1070 { "-regression", FALSE, etBOOL, {&bRegression},
1071 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1072 { "-luzar", FALSE, etBOOL, {&bLuzar},
1073 "Do a Luzar and Chandler analysis on a correlation function and "
1074 "related as produced by [gmx-hbond]. When in addition the "
1075 "[TT]-xydy[tt] flag is given the second and fourth column will be "
1076 "interpreted as errors in c(t) and n(t)." },
1077 { "-temp", FALSE, etREAL, {&temp},
1078 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1079 { "-fitstart", FALSE, etREAL, {&fit_start},
1080 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1081 { "-fitend", FALSE, etREAL, {&fit_end},
1082 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1083 { "-nbmin", FALSE, etINT, {&nb_min},
1084 "HIDDENMinimum number of blocks for block averaging" },
1085 { "-resol", FALSE, etINT, {&resol},
1086 "HIDDENResolution for the block averaging, block size increases with"
1087 " a factor 2^(1/resol)" },
1088 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1089 "HIDDENAlways use a single exponential fit for the error estimate" },
1090 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1091 "HIDDENAllow a negative long-time correlation" },
1092 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1093 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1094 { "-filter", FALSE, etREAL, {&filtlen},
1095 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1096 { "-power", FALSE, etBOOL, {&bPower},
1097 "Fit data to: b t^a" },
1098 { "-subav", FALSE, etBOOL, {&bSubAv},
1099 "Subtract the average before autocorrelating" },
1100 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1101 "Calculate one ACF over all sets" },
1103 #define NPA asize(pa)
1105 FILE *out;
1106 int n, nlast, s, nset, i, j = 0;
1107 real **val, *t, dt, tot, error;
1108 double *av, *sig, cum1, cum2, cum3, cum4, db;
1109 const char *acfile, *msdfile, *ccfile, *distfile, *avfile, *eefile, *fitfile;
1110 gmx_output_env_t *oenv;
1112 t_filenm fnm[] = {
1113 { efXVG, "-f", "graph", ffREAD },
1114 { efXVG, "-ac", "autocorr", ffOPTWR },
1115 { efXVG, "-msd", "msd", ffOPTWR },
1116 { efXVG, "-cc", "coscont", ffOPTWR },
1117 { efXVG, "-dist", "distr", ffOPTWR },
1118 { efXVG, "-av", "average", ffOPTWR },
1119 { efXVG, "-ee", "errest", ffOPTWR },
1120 { efXVG, "-fitted", "fitted", ffOPTWR },
1121 { efLOG, "-g", "fitlog", ffOPTWR }
1123 #define NFILE asize(fnm)
1125 int npargs;
1126 t_pargs *ppa;
1128 npargs = asize(pa);
1129 ppa = add_acf_pargs(&npargs, pa);
1131 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
1132 NFILE, fnm, npargs, ppa, asize(desc), desc, 0, nullptr, &oenv))
1134 sfree(ppa);
1135 return 0;
1138 acfile = opt2fn_null("-ac", NFILE, fnm);
1139 msdfile = opt2fn_null("-msd", NFILE, fnm);
1140 ccfile = opt2fn_null("-cc", NFILE, fnm);
1141 distfile = opt2fn_null("-dist", NFILE, fnm);
1142 avfile = opt2fn_null("-av", NFILE, fnm);
1143 eefile = opt2fn_null("-ee", NFILE, fnm);
1144 if (opt2parg_bSet("-fitfn", npargs, ppa) && acfile == nullptr)
1146 fitfile = opt2fn("-g", NFILE, fnm);
1148 else
1150 fitfile = opt2fn_null("-g", NFILE, fnm);
1153 val = read_xvg_time(opt2fn("-f", NFILE, fnm), bHaveT,
1154 opt2parg_bSet("-b", npargs, ppa), tb,
1155 opt2parg_bSet("-e", npargs, ppa), te,
1156 nsets_in, &nset, &n, &dt, &t);
1157 printf("Read %d sets of %d points, dt = %g\n\n", nset, n, dt);
1159 if (bDer)
1161 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1162 d, d);
1163 n -= d;
1164 for (s = 0; s < nset; s++)
1166 for (i = 0; (i < n); i++)
1168 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1173 if (bIntegrate)
1175 real sum, stddev;
1177 printf("Calculating the integral using the trapezium rule\n");
1179 if (bXYdy)
1181 sum = evaluate_integral(n, t, val[0], val[1], aver_start, &stddev);
1182 printf("Integral %10.3f +/- %10.5f\n", sum, stddev);
1184 else
1186 for (s = 0; s < nset; s++)
1188 sum = evaluate_integral(n, t, val[s], nullptr, aver_start, &stddev);
1189 printf("Integral %d %10.5f +/- %10.5f\n", s+1, sum, stddev);
1194 if (fitfile != nullptr)
1196 print_fitted_function(fitfile,
1197 opt2fn_null("-fitted", NFILE, fnm),
1198 bXYdy, nset,
1199 n, t, val,
1200 npargs, ppa,
1201 oenv);
1204 printf(" std. dev. relative deviation of\n");
1205 printf(" standard --------- cumulants from those of\n");
1206 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1207 printf(" cum. 3 cum. 4\n");
1208 snew(av, nset);
1209 snew(sig, nset);
1210 for (s = 0; (s < nset); s++)
1212 cum1 = 0;
1213 cum2 = 0;
1214 cum3 = 0;
1215 cum4 = 0;
1216 for (i = 0; (i < n); i++)
1218 cum1 += val[s][i];
1220 cum1 /= n;
1221 for (i = 0; (i < n); i++)
1223 db = val[s][i]-cum1;
1224 cum2 += db*db;
1225 cum3 += db*db*db;
1226 cum4 += db*db*db*db;
1228 cum2 /= n;
1229 cum3 /= n;
1230 cum4 /= n;
1231 av[s] = cum1;
1232 sig[s] = std::sqrt(cum2);
1233 if (n > 1)
1235 error = std::sqrt(cum2/(n-1));
1237 else
1239 error = 0;
1241 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1242 s+1, av[s], sig[s], error,
1243 sig[s] != 0.0 ? cum3/(sig[s]*sig[s]*sig[s]*std::sqrt(8/M_PI)) : 0,
1244 sig[s] != 0.0 ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1246 printf("\n");
1248 if (filtlen != 0.0f)
1250 filter(filtlen, n, nset, val, dt);
1253 if (msdfile)
1255 out = xvgropen(msdfile, "Mean square displacement",
1256 "time", "MSD (nm\\S2\\N)", oenv);
1257 nlast = static_cast<int>(n*frac);
1258 for (s = 0; s < nset; s++)
1260 for (j = 0; j <= nlast; j++)
1262 if (j % 100 == 0)
1264 fprintf(stderr, "\r%d", j);
1265 fflush(stderr);
1267 tot = 0;
1268 for (i = 0; i < n-j; i++)
1270 tot += gmx::square(val[s][i]-val[s][i+j]);
1272 tot /= (n-j);
1273 fprintf(out, " %g %8g\n", dt*j, tot);
1275 if (s < nset-1)
1277 fprintf(out, "%s\n", output_env_get_print_xvgr_codes(oenv) ? "&" : "");
1280 xvgrclose(out);
1281 fprintf(stderr, "\r%d, time=%g\n", j-1, (j-1)*dt);
1282 fflush(stderr);
1284 if (ccfile)
1286 plot_coscont(ccfile, n, nset, val, oenv);
1289 if (distfile)
1291 histogram(distfile, binwidth, n, nset, val, oenv);
1293 if (avfile)
1295 average(avfile, nenum(avbar_opt), n, nset, val, t);
1297 if (eefile)
1299 estimate_error(eefile, nb_min, resol, n, nset, av, sig, val, dt,
1300 bEeFitAc, bEESEF, bEENLC, oenv);
1302 if (bPower)
1304 power_fit(n, nset, val, t);
1307 if (acfile != nullptr)
1309 if (bSubAv)
1311 for (s = 0; s < nset; s++)
1313 for (i = 0; i < n; i++)
1315 val[s][i] -= av[s];
1319 do_autocorr(acfile, oenv, "Autocorrelation", n, nset, val, dt,
1320 eacNormal, bAverCorr);
1323 if (bRegression)
1325 regression_analysis(n, bXYdy, t, nset, val);
1328 if (bLuzar)
1330 luzar_correl(n, t, nset, val, temp, bXYdy, fit_start);
1333 view_all(oenv, NFILE, fnm);
1335 return 0;