Remove unused function generate_excls and make clean_excls static
[gromacs.git] / src / gromacs / gmxana / gmx_wham.cpp
blob79900c51a2f439a6b09863ba2ae9ee434215b697
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /*! \internal \file
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
43 #include "gmxpre.h"
45 #include "config.h"
47 #include <cassert>
48 #include <cctype>
49 #include <cmath>
50 #include <cstdio>
51 #include <cstdlib>
52 #include <cstring>
54 #include <algorithm>
55 #include <sstream>
57 #include "gromacs/commandline/pargs.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/functions.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull_params.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pulling/pull.h"
69 #include "gromacs/random/tabulatednormaldistribution.h"
70 #include "gromacs/random/threefry.h"
71 #include "gromacs/random/uniformintdistribution.h"
72 #include "gromacs/random/uniformrealdistribution.h"
73 #include "gromacs/utility/arraysize.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxomp.h"
79 #include "gromacs/utility/path.h"
80 #include "gromacs/utility/pleasecite.h"
81 #include "gromacs/utility/smalloc.h"
83 //! longest file names allowed in input files
84 #define WHAM_MAXFILELEN 2048
86 /*! \brief
87 * enum for energy units
89 enum {
90 enSel, en_kJ, en_kCal, en_kT, enNr
92 /*! \brief
93 * enum for type of input files (pdos, tpr, or pullf)
95 enum {
96 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
99 /*! \brief enum for bootstrapping method
101 * These bootstrap methods are supported:
102 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
103 * (bsMethod_BayesianHist)
104 * - bootstrap complete histograms (bsMethod_hist)
105 * - bootstrap trajectories from given umbrella histograms. This generates new
106 * "synthetic" histograms (bsMethod_traj)
107 * - bootstrap trajectories from Gaussian with mu/sigma computed from
108 * the respective histogram (bsMethod_trajGauss). This gives very similar
109 * results compared to bsMethod_traj.
111 * ********************************************************************
112 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
113 * JS Hub, BL de Groot, D van der Spoel
114 * g_wham - A free weighted histogram analysis implementation including
115 * robust error and autocorrelation estimates,
116 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
117 * ********************************************************************
119 enum {
120 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
121 bsMethod_traj, bsMethod_trajGauss
124 //! Parameters of one pull coodinate
125 typedef struct
127 int pull_type; //!< such as constraint, umbrella, ...
128 int geometry; //!< such as distance, direction, cylinder
129 int ngroup; //!< the number of pull groups involved
130 ivec dim; //!< pull dimension with geometry distance
131 int ndim; //!< nr of pull_dim != 0
132 real k; //!< force constants in tpr file
133 real init_dist; //!< reference displacement
134 char coord_unit[256]; //!< unit of the displacement
135 } t_pullcoord;
137 //! Parameters of the umbrella potentials
138 typedef struct
141 * \name Using umbrella pull code since gromacs 4.x
143 /*!\{*/
144 int npullcrds; //!< nr of umbrella pull coordinates for reading
145 t_pullcoord *pcrd; //!< the pull coordinates
146 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
147 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
148 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
150 /*!\}*/
152 * \name Using PDO files common until gromacs 3.x
154 /*!\{*/
155 int nSkip;
156 char Reference[256];
157 int nPull;
158 int nDim;
159 ivec Dims;
160 char PullName[4][256];
161 double UmbPos[4][3];
162 double UmbCons[4][3];
163 /*!\}*/
164 } t_UmbrellaHeader;
166 //! Data in the umbrella histograms
167 typedef struct
169 int nPull; //!< nr of pull groups in this pdo or pullf/x file
170 double **Histo; //!< nPull histograms
171 double **cum; //!< nPull cumulative distribution functions
172 int nBin; //!< nr of bins. identical to opt->bins
173 double *k; //!< force constants for the nPull coords
174 double *pos; //!< umbrella positions for the nPull coords
175 double *z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
176 int *N; //!< nr of data points in nPull histograms.
177 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
179 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
181 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
182 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
184 double *g;
185 double *tau; //!< intetrated autocorrelation time (IACT)
186 double *tausmooth; //!< smoothed IACT
188 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
190 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
191 gmx_bool **bContrib;
192 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
194 /*! \brief average force estimated from average displacement, fAv=dzAv*k
196 * Used for integration to guess the potential.
198 real *forceAv;
199 real *aver; //!< average of histograms
200 real *sigma; //!< stddev of histograms
201 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
202 } t_UmbrellaWindow;
204 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
205 typedef struct
207 int n; //!< total nr of pull coords in this tpr file
208 int nUse; //!< nr of pull coords used
209 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
210 } t_coordselection;
212 //! Parameters of WHAM
213 typedef struct // NOLINT(clang-analyzer-optin.performance.Padding)
216 * \name Input stuff
218 /*!\{*/
219 const char *fnTpr, *fnPullf, *fnCoordSel;
220 const char *fnPdo, *fnPullx; //!< file names of input
221 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
222 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
224 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
225 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
226 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
227 t_coordselection *coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
228 /*!\}*/
230 * \name Basic WHAM options
232 /*!\{*/
233 int bins; //!< nr of bins, min, max, and dz of profile
234 real min, max, dz;
235 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
236 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
237 /*!\}*/
239 * \name Output control
241 /*!\{*/
242 gmx_bool bLog; //!< energy output (instead of probability) for profile
243 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
244 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
245 /*! \brief after wham, set prof to zero at this z-position.
246 * When bootstrapping, set zProf0 to a "stable" reference position.
248 real zProf0;
249 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
251 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
252 gmx_bool bAuto; //!< determine min and max automatically but do not exit
254 gmx_bool verbose; //!< more noisy wham mode
255 int stepchange; //!< print maximum change in prof after how many interations
256 gmx_output_env_t *oenv; //!< xvgr options
257 /*!\}*/
259 * \name Autocorrelation stuff
261 /*!\{*/
262 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
263 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
264 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
265 real acTrestart; //!< when computing ACT, time between restarting points
267 /* \brief Enforce the same weight for each umbella window, that is
268 * calculate with the same number of data points for
269 * each window. That can be reasonable, if the histograms
270 * have different length, but due to autocorrelation,
271 * a longer simulation should not have larger weightin wham.
273 gmx_bool bHistEq;
274 /*!\}*/
277 * \name Bootstrapping stuff
279 /*!\{*/
280 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
282 /* \brief bootstrap method
284 * if == bsMethod_hist, consider complete histograms as independent
285 * data points and, hence, only mix complete histograms.
286 * if == bsMethod_BayesianHist, consider complete histograms
287 * as independent data points, but assign random weights
288 * to the histograms during the bootstrapping ("Bayesian bootstrap")
289 * In case of long correlations (e.g., inside a channel), these
290 * will yield a more realistic error.
291 * if == bsMethod_traj(Gauss), generate synthetic histograms
292 * for each given
293 * histogram by generating an autocorrelated random sequence
294 * that is distributed according to the respective given
295 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
296 * (instead of from the umbrella histogram) to generate a new
297 * histogram.
299 int bsMethod;
301 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
302 real tauBootStrap;
304 /* \brief when mixing histograms, mix only histograms withing blocks
305 long the reaction coordinate xi. Avoids gaps along xi. */
306 int histBootStrapBlockLength;
308 int bsSeed; //!< random seed for bootstrapping
310 /* \brief Write cumulative distribution functions (CDFs) of histograms
311 and write the generated histograms for each bootstrap */
312 gmx_bool bs_verbose;
313 /*!\}*/
315 * \name tabulated umbrella potential stuff
317 /*!\{*/
318 gmx_bool bTab;
319 double *tabX, *tabY, tabMin, tabMax, tabDz;
320 int tabNbins;
321 /*!\}*/
322 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
323 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
324 } t_UmbrellaOptions;
326 //! Make an umbrella window (may contain several histograms)
327 static t_UmbrellaWindow * initUmbrellaWindows(int nwin)
329 t_UmbrellaWindow *win;
330 int i;
331 snew(win, nwin);
332 for (i = 0; i < nwin; i++)
334 win[i].Histo = win[i].cum = nullptr;
335 win[i].k = win[i].pos = win[i].z = nullptr;
336 win[i].N = win[i].Ntot = nullptr;
337 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
338 win[i].bContrib = nullptr;
339 win[i].ztime = nullptr;
340 win[i].forceAv = nullptr;
341 win[i].aver = win[i].sigma = nullptr;
342 win[i].bsWeight = nullptr;
344 return win;
347 //! Delete an umbrella window (may contain several histograms)
348 static void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
350 int i, j;
351 for (i = 0; i < nwin; i++)
353 if (win[i].Histo)
355 for (j = 0; j < win[i].nPull; j++)
357 sfree(win[i].Histo[j]);
360 if (win[i].cum)
362 for (j = 0; j < win[i].nPull; j++)
364 sfree(win[i].cum[j]);
367 if (win[i].bContrib)
369 for (j = 0; j < win[i].nPull; j++)
371 sfree(win[i].bContrib[j]);
374 sfree(win[i].Histo);
375 sfree(win[i].cum);
376 sfree(win[i].k);
377 sfree(win[i].pos);
378 sfree(win[i].z);
379 sfree(win[i].N);
380 sfree(win[i].Ntot);
381 sfree(win[i].g);
382 sfree(win[i].tau);
383 sfree(win[i].tausmooth);
384 sfree(win[i].bContrib);
385 sfree(win[i].ztime);
386 sfree(win[i].forceAv);
387 sfree(win[i].aver);
388 sfree(win[i].sigma);
389 sfree(win[i].bsWeight);
391 sfree(win);
394 /*! \brief
395 * Read and setup tabulated umbrella potential
397 static void setup_tab(const char *fn, t_UmbrellaOptions *opt)
399 int i, ny, nl;
400 double **y;
402 printf("Setting up tabulated potential from file %s\n", fn);
403 nl = read_xvg(fn, &y, &ny);
404 opt->tabNbins = nl;
405 if (ny != 2)
407 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
409 opt->tabMin = y[0][0];
410 opt->tabMax = y[0][nl-1];
411 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
412 if (opt->tabDz <= 0)
414 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
415 "ascending z-direction", fn);
417 for (i = 0; i < nl-1; i++)
419 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
421 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
424 snew(opt->tabY, nl);
425 snew(opt->tabX, nl);
426 for (i = 0; i < nl; i++)
428 opt->tabX[i] = y[0][i];
429 opt->tabY[i] = y[1][i];
431 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
432 opt->tabMin, opt->tabMax, opt->tabDz);
435 //! Read the header of an PDO file (position, force const, nr of groups)
436 static void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
438 char line[2048];
439 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
440 int i;
441 std::istringstream ist;
443 /* line 1 */
444 if (fgets(line, 2048, file) == nullptr)
446 gmx_fatal(FARGS, "Error reading header from pdo file\n");
448 ist.str(line);
449 ist >> Buffer0 >> Buffer1 >> Buffer2;
450 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
452 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
453 "(Found in first line: `%s')\n",
454 Buffer1, "UMBRELLA", line);
456 if (std::strcmp(Buffer2, "3.0") != 0)
458 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
461 /* line 2 */
462 if (fgets(line, 2048, file) == nullptr)
464 gmx_fatal(FARGS, "Error reading header from pdo file\n");
466 ist.str(line);
467 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
468 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
470 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
471 if (header->nDim != 1)
473 gmx_fatal(FARGS, "Currently only supports one dimension");
476 /* line3 */
477 if (fgets(line, 2048, file) == nullptr)
479 gmx_fatal(FARGS, "Error reading header from pdo file\n");
481 ist.str(line);
482 ist >> Buffer0 >> Buffer1 >> header->nSkip;
484 /* line 4 */
485 if (fgets(line, 2048, file) == nullptr)
487 gmx_fatal(FARGS, "Error reading header from pdo file\n");
489 ist.str(line);
490 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
492 /* line 5 */
493 if (fgets(line, 2048, file) == nullptr)
495 gmx_fatal(FARGS, "Error reading header from pdo file\n");
497 ist.str(line);
498 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
500 if (opt->verbose)
502 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
503 header->Reference);
506 for (i = 0; i < header->nPull; ++i)
508 if (fgets(line, 2048, file) == nullptr)
510 gmx_fatal(FARGS, "Error reading header from pdo file\n");
512 ist.str(line);
513 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
514 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
515 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
517 if (opt->verbose)
519 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
520 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
524 if (fgets(line, 2048, file) == nullptr)
526 gmx_fatal(FARGS, "Cannot read from file\n");
528 ist.str(line);
529 ist >> Buffer3;
530 if (std::strcmp(Buffer3, "#####") != 0)
532 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
536 //! smarter fgets
537 static char *fgets3(FILE *fp, char ptr[], int *len)
539 char *p;
540 int slen;
542 if (fgets(ptr, *len-1, fp) == nullptr)
544 return nullptr;
546 p = ptr;
547 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
549 /* This line is longer than len characters, let's increase len! */
550 *len += STRLEN;
551 p += STRLEN;
552 srenew(ptr, *len);
553 if (fgets(p-1, STRLEN, fp) == nullptr)
555 break;
558 slen = std::strlen(ptr);
559 if (ptr[slen-1] == '\n')
561 ptr[slen-1] = '\0';
564 return ptr;
567 /*! \brief Read the data columns of and PDO file.
569 * TO DO: Get rid of the scanf function to avoid the clang warning.
570 * At the moment, this warning is avoided by hiding the format string
571 * the variable fmtlf.
573 static void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
574 int fileno, t_UmbrellaWindow * win,
575 t_UmbrellaOptions *opt,
576 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
578 int i, inttemp, bins, count, ntot;
579 real minval, maxval, minfound = 1e20, maxfound = -1e20;
580 double temp, time, time0 = 0, dt;
581 char *ptr = nullptr;
582 t_UmbrellaWindow * window = nullptr;
583 gmx_bool timeok, dt_ok = true;
584 char *tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
585 int len = STRLEN, dstep = 1;
586 const int blocklen = 4096;
587 int *lennow = nullptr;
589 if (!bGetMinMax)
591 bins = opt->bins;
592 minval = opt->min;
593 maxval = opt->max;
595 window = win+fileno;
596 /* Need to alocate memory and set up structure */
597 window->nPull = header->nPull;
598 window->nBin = bins;
600 snew(window->Histo, window->nPull);
601 snew(window->z, window->nPull);
602 snew(window->k, window->nPull);
603 snew(window->pos, window->nPull);
604 snew(window->N, window->nPull);
605 snew(window->Ntot, window->nPull);
606 snew(window->g, window->nPull);
607 snew(window->bsWeight, window->nPull);
609 window->bContrib = nullptr;
611 if (opt->bCalcTauInt)
613 snew(window->ztime, window->nPull);
615 else
617 window->ztime = nullptr;
619 snew(lennow, window->nPull);
621 for (i = 0; i < window->nPull; ++i)
623 window->z[i] = 1;
624 window->bsWeight[i] = 1.;
625 snew(window->Histo[i], bins);
626 window->k[i] = header->UmbCons[i][0];
627 window->pos[i] = header->UmbPos[i][0];
628 window->N[i] = 0;
629 window->Ntot[i] = 0;
630 window->g[i] = 1.;
631 if (opt->bCalcTauInt)
633 window->ztime[i] = nullptr;
637 /* Done with setup */
639 else
641 minfound = 1e20;
642 maxfound = -1e20;
643 minval = maxval = bins = 0; /* Get rid of warnings */
646 count = 0;
647 snew(tmpbuf, len);
648 while ( (ptr = fgets3(file, tmpbuf, &len)) != nullptr)
650 trim(ptr);
652 if (ptr[0] == '#' || std::strlen(ptr) < 2)
654 continue;
657 /* Initiate format string */
658 fmtign[0] = '\0';
659 std::strcat(fmtign, "%*s");
661 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
662 /* Round time to fs */
663 time = 1.0/1000*( gmx::roundToInt64(time*1000) );
665 /* get time step of pdo file */
666 if (count == 0)
668 time0 = time;
670 else if (count == 1)
672 dt = time-time0;
673 if (opt->dt > 0.0)
675 dstep = gmx::roundToInt(opt->dt/dt);
676 if (dstep == 0)
678 dstep = 1;
681 if (!bGetMinMax)
683 window->dt = dt*dstep;
686 count++;
688 dt_ok = ((count-1)%dstep == 0);
689 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
690 /* if (opt->verbose)
691 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
692 time,opt->tmin, opt->tmax, dt_ok,timeok); */
694 if (timeok)
696 for (i = 0; i < header->nPull; ++i)
698 std::strcpy(fmt, fmtign);
699 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
700 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
701 if (sscanf(ptr, fmt, &temp))
703 temp += header->UmbPos[i][0];
704 if (bGetMinMax)
706 if (temp < minfound)
708 minfound = temp;
710 if (temp > maxfound)
712 maxfound = temp;
715 else
717 if (opt->bCalcTauInt)
719 /* save time series for autocorrelation analysis */
720 ntot = window->Ntot[i];
721 if (ntot >= lennow[i])
723 lennow[i] += blocklen;
724 srenew(window->ztime[i], lennow[i]);
726 window->ztime[i][ntot] = temp;
729 temp -= minval;
730 temp /= (maxval-minval);
731 temp *= bins;
732 temp = std::floor(temp);
734 inttemp = static_cast<int> (temp);
735 if (opt->bCycl)
737 if (inttemp < 0)
739 inttemp += bins;
741 else if (inttemp >= bins)
743 inttemp -= bins;
747 if (inttemp >= 0 && inttemp < bins)
749 window->Histo[i][inttemp] += 1.;
750 window->N[i]++;
752 window->Ntot[i]++;
757 if (time > opt->tmax)
759 if (opt->verbose)
761 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
763 break;
767 if (bGetMinMax)
769 *mintmp = minfound;
770 *maxtmp = maxfound;
773 sfree(lennow);
774 sfree(tmpbuf);
777 /*! \brief Set identical weights for all histograms
779 * Normally, the weight is given by the number data points in each
780 * histogram, together with the autocorrelation time. This can be overwritten
781 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
782 * an appropriate autocorrelation times instead of using this function.
784 static void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
786 int i, k, j, NEnforced;
787 double ratio;
789 NEnforced = window[0].Ntot[0];
790 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
791 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
792 /* enforce all histograms to have the same weight as the very first histogram */
794 for (j = 0; j < nWindows; ++j)
796 for (k = 0; k < window[j].nPull; ++k)
798 ratio = 1.0*NEnforced/window[j].Ntot[k];
799 for (i = 0; i < window[0].nBin; ++i)
801 window[j].Histo[k][i] *= ratio;
803 window[j].N[k] = gmx::roundToInt(ratio*window[j].N[k]);
808 /*! \brief Simple linear interpolation between two given tabulated points
810 static double tabulated_pot(double dist, t_UmbrellaOptions *opt)
812 int jl, ju;
813 double pl, pu, dz, dp;
815 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
816 ju = jl+1;
817 if (jl < 0 || ju >= opt->tabNbins)
819 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
820 "Provide an extended table.", dist, jl, ju);
822 pl = opt->tabY[jl];
823 pu = opt->tabY[ju];
824 dz = dist-opt->tabX[jl];
825 dp = (pu-pl)*dz/opt->tabDz;
826 return pl+dp;
830 /*! \brief
831 * Check which bins substiantially contribute (accelerates WHAM)
833 * Don't worry, that routine does not mean we compute the PMF in limited precision.
834 * After rapid convergence (using only substiantal contributions), we always switch to
835 * full precision.
837 static void setup_acc_wham(const double *profile, t_UmbrellaWindow * window, int nWindows,
838 t_UmbrellaOptions *opt)
840 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
841 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
842 gmx_bool bAnyContrib;
843 static int bFirst = 1;
844 static double wham_contrib_lim;
846 if (bFirst)
848 for (i = 0; i < nWindows; ++i)
850 nGrptot += window[i].nPull;
852 wham_contrib_lim = opt->Tolerance/nGrptot;
855 ztot = opt->max-opt->min;
856 ztot_half = ztot/2;
858 for (i = 0; i < nWindows; ++i)
860 if (!window[i].bContrib)
862 snew(window[i].bContrib, window[i].nPull);
864 for (j = 0; j < window[i].nPull; ++j)
866 if (!window[i].bContrib[j])
868 snew(window[i].bContrib[j], opt->bins);
870 bAnyContrib = FALSE;
871 for (k = 0; k < opt->bins; ++k)
873 temp = (1.0*k+0.5)*dz+min;
874 distance = temp - window[i].pos[j]; /* distance to umbrella center */
875 if (opt->bCycl)
876 { /* in cyclic wham: */
877 if (distance > ztot_half) /* |distance| < ztot_half */
879 distance -= ztot;
881 else if (distance < -ztot_half)
883 distance += ztot;
886 /* Note: there are two contributions to bin k in the wham equations:
887 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
888 ii) exp(- U/(BOLTZ*opt->Temperature))
889 where U is the umbrella potential
890 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
893 if (!opt->bTab)
895 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
897 else
899 U = tabulated_pot(distance, opt); /* Use tabulated potential */
901 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
902 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
903 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
904 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
905 if (window[i].bContrib[j][k])
907 nContrib++;
909 nTot++;
911 /* If this histo is far outside min and max all bContrib may be FALSE,
912 causing a floating point exception later on. To avoid that, switch
913 them all to true.*/
914 if (!bAnyContrib)
916 for (k = 0; k < opt->bins; ++k)
918 window[i].bContrib[j][k] = TRUE;
923 if (bFirst)
925 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
926 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
929 if (opt->verbose)
931 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
932 nContrib, nTot);
934 bFirst = 0;
937 //! Compute the PMF (one of the two main WHAM routines)
938 static void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
939 t_UmbrellaOptions *opt, gmx_bool bExact)
941 double ztot_half, ztot, min = opt->min, dz = opt->dz;
943 ztot = opt->max-opt->min;
944 ztot_half = ztot/2;
946 #pragma omp parallel
950 int nthreads = gmx_omp_get_max_threads();
951 int thread_id = gmx_omp_get_thread_num();
952 int i;
953 int i0 = thread_id*opt->bins/nthreads;
954 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
956 for (i = i0; i < i1; ++i)
958 int j, k;
959 double num, denom, invg, temp = 0, distance, U = 0;
960 num = denom = 0.;
961 for (j = 0; j < nWindows; ++j)
963 for (k = 0; k < window[j].nPull; ++k)
965 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
966 temp = (1.0*i+0.5)*dz+min;
967 num += invg*window[j].Histo[k][i];
969 if (!(bExact || window[j].bContrib[k][i]))
971 continue;
973 distance = temp - window[j].pos[k]; /* distance to umbrella center */
974 if (opt->bCycl)
975 { /* in cyclic wham: */
976 if (distance > ztot_half) /* |distance| < ztot_half */
978 distance -= ztot;
980 else if (distance < -ztot_half)
982 distance += ztot;
986 if (!opt->bTab)
988 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
990 else
992 U = tabulated_pot(distance, opt); /* Use tabulated potential */
994 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
997 profile[i] = num/denom;
1000 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1004 //! Compute the free energy offsets z (one of the two main WHAM routines)
1005 static double calc_z(const double * profile, t_UmbrellaWindow * window, int nWindows,
1006 t_UmbrellaOptions *opt, gmx_bool bExact)
1008 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1009 double maxglob = -1e20;
1011 ztot = opt->max-opt->min;
1012 ztot_half = ztot/2;
1014 #pragma omp parallel
1018 int nthreads = gmx_omp_get_max_threads();
1019 int thread_id = gmx_omp_get_thread_num();
1020 int i;
1021 int i0 = thread_id*nWindows/nthreads;
1022 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1023 double maxloc = -1e20;
1025 for (i = i0; i < i1; ++i)
1027 double total = 0, temp, distance, U = 0;
1028 int j, k;
1030 for (j = 0; j < window[i].nPull; ++j)
1032 total = 0;
1033 for (k = 0; k < window[i].nBin; ++k)
1035 if (!(bExact || window[i].bContrib[j][k]))
1037 continue;
1039 temp = (1.0*k+0.5)*dz+min;
1040 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1041 if (opt->bCycl)
1042 { /* in cyclic wham: */
1043 if (distance > ztot_half) /* |distance| < ztot_half */
1045 distance -= ztot;
1047 else if (distance < -ztot_half)
1049 distance += ztot;
1053 if (!opt->bTab)
1055 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1057 else
1059 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1061 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1063 /* Avoid floating point exception if window is far outside min and max */
1064 if (total != 0.0)
1066 total = -std::log(total);
1068 else
1070 total = 1000.0;
1072 temp = std::abs(total - window[i].z[j]);
1073 if (temp > maxloc)
1075 maxloc = temp;
1077 window[i].z[j] = total;
1080 /* Now get maximum maxloc from the threads and put in maxglob */
1081 if (maxloc > maxglob)
1083 #pragma omp critical
1085 if (maxloc > maxglob)
1087 maxglob = maxloc;
1092 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1095 return maxglob;
1098 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1099 static void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1101 int i, j, bins = opt->bins;
1102 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1103 double z, z1;
1105 if (min > 0. || max < 0.)
1107 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1108 opt->min, opt->max);
1111 snew(prof2, bins);
1113 for (i = 0; i < bins; i++)
1115 z = min+(i+0.5)*dz;
1116 zsym = -z;
1117 /* bin left of zsym */
1118 j = gmx::roundToInt((zsym-min)/dz)-1;
1119 if (j >= 0 && (j+1) < bins)
1121 /* interpolate profile linearly between bins j and j+1 */
1122 z1 = min+(j+0.5)*dz;
1123 deltaz = zsym-z1;
1124 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1125 /* average between left and right */
1126 prof2[i] = 0.5*(profsym+profile[i]);
1128 else
1130 prof2[i] = profile[i];
1134 std::memcpy(profile, prof2, bins*sizeof(double));
1135 sfree(prof2);
1138 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1139 static void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1141 int i, bins, imin;
1142 double unit_factor = 1., diff;
1144 bins = opt->bins;
1146 /* Not log? Nothing to do! */
1147 if (!opt->bLog)
1149 return;
1152 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1153 if (opt->unit == en_kT)
1155 unit_factor = 1.0;
1157 else if (opt->unit == en_kJ)
1159 unit_factor = BOLTZ*opt->Temperature;
1161 else if (opt->unit == en_kCal)
1163 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1165 else
1167 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1170 for (i = 0; i < bins; i++)
1172 if (profile[i] > 0.0)
1174 profile[i] = -std::log(profile[i])*unit_factor;
1178 /* shift to zero at z=opt->zProf0 */
1179 if (!opt->bProf0Set)
1181 diff = profile[0];
1183 else
1185 /* Get bin with shortest distance to opt->zProf0
1186 (-0.5 from bin position and +0.5 from rounding cancel) */
1187 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1188 if (imin < 0)
1190 imin = 0;
1192 else if (imin >= bins)
1194 imin = bins-1;
1196 diff = profile[imin];
1199 /* Shift to zero */
1200 for (i = 0; i < bins; i++)
1202 profile[i] -= diff;
1206 //! Make an array of random integers (used for bootstrapping)
1207 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1209 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1211 int ipull, blockBase, nr, ipullRandom;
1213 if (blockLength == 0)
1215 blockLength = nPull;
1218 for (ipull = 0; ipull < nPull; ipull++)
1220 blockBase = (ipull/blockLength)*blockLength;
1222 { /* make sure nothing bad happens in the last block */
1223 nr = dist(*rng); // [0,blockLength-1]
1224 ipullRandom = blockBase + nr;
1226 while (ipullRandom >= nPull);
1227 if (ipullRandom < 0 || ipullRandom >= nPull)
1229 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1230 "blockLength = %d, blockBase = %d\n",
1231 ipullRandom, nPull, nr, blockLength, blockBase);
1233 randomArray[ipull] = ipullRandom;
1237 /*! \brief Set pull group information of a synthetic histogram
1239 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1240 * but it is not required if we bootstrap complete histograms.
1242 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1243 t_UmbrellaWindow *thisWindow, int pullid)
1245 synthWindow->N [0] = thisWindow->N [pullid];
1246 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1247 synthWindow->pos [0] = thisWindow->pos [pullid];
1248 synthWindow->z [0] = thisWindow->z [pullid];
1249 synthWindow->k [0] = thisWindow->k [pullid];
1250 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1251 synthWindow->g [0] = thisWindow->g [pullid];
1252 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1255 /*! \brief Calculate cumulative distribution function of of all histograms.
1257 * This allow to create random number sequences
1258 * which are distributed according to the histograms. Required to generate
1259 * the "synthetic" histograms for the Bootstrap method
1261 static void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1262 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1264 int i, j, k, nbin;
1265 double last;
1266 std::string fn;
1267 FILE *fp = nullptr;
1269 if (opt->bs_verbose)
1271 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
1272 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1275 nbin = opt->bins;
1276 for (i = 0; i < nWindows; i++)
1278 snew(window[i].cum, window[i].nPull);
1279 for (j = 0; j < window[i].nPull; j++)
1281 snew(window[i].cum[j], nbin+1);
1282 window[i].cum[j][0] = 0.;
1283 for (k = 1; k <= nbin; k++)
1285 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1288 /* normalize CDFs. Ensure cum[nbin]==1 */
1289 last = window[i].cum[j][nbin];
1290 for (k = 0; k <= nbin; k++)
1292 window[i].cum[j][k] /= last;
1297 printf("Cumulative distribution functions of all histograms created.\n");
1298 if (opt->bs_verbose)
1300 for (k = 0; k <= nbin; k++)
1302 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1303 for (i = 0; i < nWindows; i++)
1305 for (j = 0; j < window[i].nPull; j++)
1307 fprintf(fp, "%g\t", window[i].cum[j][k]);
1310 fprintf(fp, "\n");
1312 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1313 xvgrclose(fp);
1318 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1320 * This is used to generate a random sequence distributed according to a histogram
1322 static void searchCumulative(const double xx[], int n, double x, int *j)
1324 int ju, jm, jl;
1326 jl = -1;
1327 ju = n;
1328 while (ju-jl > 1)
1330 jm = (ju+jl) >> 1;
1331 if (x >= xx[jm])
1333 jl = jm;
1335 else
1337 ju = jm;
1340 if (x == xx[0])
1342 *j = 0;
1344 else if (x == xx[n-1])
1346 *j = n-2;
1348 else
1350 *j = jl;
1354 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1355 static void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1356 int pullid, t_UmbrellaOptions *opt)
1358 int N, i, nbins, r_index, ibin;
1359 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1360 char errstr[1024];
1362 N = thisWindow->N[pullid];
1363 dt = thisWindow->dt;
1364 nbins = thisWindow->nBin;
1366 /* tau = autocorrelation time */
1367 if (opt->tauBootStrap > 0.0)
1369 tausteps = opt->tauBootStrap/dt;
1371 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1373 /* calc tausteps from g=1+2tausteps */
1374 g = thisWindow->g[pullid];
1375 tausteps = (g-1)/2;
1377 else
1379 sprintf(errstr,
1380 "When generating hypothetical trajectories from given umbrella histograms,\n"
1381 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1382 "cannot be predicted. You have 3 options:\n"
1383 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1384 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1385 std::strcat(errstr,
1386 " with option -iiact for all umbrella windows.\n"
1387 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1388 " Use option (3) only if you are sure what you're doing, you may severely\n"
1389 " underestimate the error if a too small ACT is given.\n");
1390 gmx_fatal(FARGS, "%s", errstr);
1393 synthWindow->N [0] = N;
1394 synthWindow->pos [0] = thisWindow->pos[pullid];
1395 synthWindow->z [0] = thisWindow->z[pullid];
1396 synthWindow->k [0] = thisWindow->k[pullid];
1397 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1398 synthWindow->g [0] = thisWindow->g [pullid];
1399 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1401 for (i = 0; i < nbins; i++)
1403 synthWindow->Histo[0][i] = 0.;
1406 if (opt->bsMethod == bsMethod_trajGauss)
1408 sig = thisWindow->sigma [pullid];
1409 mu = thisWindow->aver [pullid];
1412 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1413 Use the following:
1414 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1415 then
1416 z = a*x + sqrt(1-a^2)*y
1417 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1418 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1419 function
1420 C(t) = exp(-t/tau) with tau=-1/ln(a)
1422 Then, use error function to turn the Gaussian random variable into a uniformly
1423 distributed one in [0,1]. Eventually, use cumulative distribution function of
1424 histogram to get random variables distributed according to histogram.
1425 Note: The ACT of the flat distribution and of the generated histogram is not
1426 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1428 a = std::exp(-1.0/tausteps);
1429 ap = std::sqrt(1.0-a*a);
1430 invsqrt2 = 1.0/std::sqrt(2.0);
1432 /* init random sequence */
1433 x = opt->normalDistribution(opt->rng);
1435 if (opt->bsMethod == bsMethod_traj)
1437 /* bootstrap points from the umbrella histograms */
1438 for (i = 0; i < N; i++)
1440 y = opt->normalDistribution(opt->rng);
1441 x = a*x+ap*y;
1442 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1443 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1445 r = 0.5*(1+std::erf(x*invsqrt2));
1446 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1447 synthWindow->Histo[0][r_index] += 1.;
1450 else if (opt->bsMethod == bsMethod_trajGauss)
1452 /* bootstrap points from a Gaussian with the same average and sigma
1453 as the respective umbrella histogram. The idea was, that -given
1454 limited sampling- the bootstrapped histograms are otherwise biased
1455 from the limited sampling of the US histos. However, bootstrapping from
1456 the Gaussian seems to yield a similar estimate. */
1457 i = 0;
1458 while (i < N)
1460 y = opt->normalDistribution(opt->rng);
1461 x = a*x+ap*y;
1462 z = x*sig+mu;
1463 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1464 if (opt->bCycl)
1466 if (ibin < 0)
1468 while ( (ibin += nbins) < 0)
1473 else if (ibin >= nbins)
1475 while ( (ibin -= nbins) >= nbins)
1482 if (ibin >= 0 && ibin < nbins)
1484 synthWindow->Histo[0][ibin] += 1.;
1485 i++;
1489 else
1491 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1495 /*! \brief Write all histograms to a file
1497 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1498 * sets of bootstrapped histograms.
1500 static void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1501 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1503 std::string fn, title;
1504 FILE *fp;
1505 int bins, l, i, j;
1507 if (bs_index >= 0)
1509 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1510 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1512 else
1514 fn = gmx_strdup(fnhist);
1515 title = gmx::formatString("Umbrella histograms");
1518 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1519 bins = opt->bins;
1521 /* Write histograms */
1522 for (l = 0; l < bins; ++l)
1524 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1525 for (i = 0; i < nWindows; ++i)
1527 for (j = 0; j < window[i].nPull; ++j)
1529 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1532 fprintf(fp, "\n");
1535 xvgrclose(fp);
1536 printf("Wrote %s\n", fn.c_str());
1539 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1540 static void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1542 int i;
1543 double *r;
1544 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1546 snew(r, nAllPull);
1548 /* generate ordered random numbers between 0 and nAllPull */
1549 for (i = 0; i < nAllPull-1; i++)
1551 r[i] = dist(opt->rng);
1553 std::sort(r, r+nAllPull-1);
1554 r[nAllPull-1] = 1.0*nAllPull;
1556 synthwin[0].bsWeight[0] = r[0];
1557 for (i = 1; i < nAllPull; i++)
1559 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1562 /* avoid to have zero weight by adding a tiny value */
1563 for (i = 0; i < nAllPull; i++)
1565 if (synthwin[i].bsWeight[0] < 1e-5)
1567 synthwin[i].bsWeight[0] = 1e-5;
1571 sfree(r);
1574 //! The main bootstrapping routine
1575 static void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1576 const char *xlabel, char* ylabel, double *profile,
1577 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1579 t_UmbrellaWindow * synthWindow;
1580 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1581 int i, j, *randomArray = nullptr, winid, pullid, ib;
1582 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1583 FILE *fp;
1584 gmx_bool bExact = FALSE;
1586 /* init random generator */
1587 if (opt->bsSeed == 0)
1589 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1591 opt->rng.seed(opt->bsSeed);
1593 snew(bsProfile, opt->bins);
1594 snew(bsProfiles_av, opt->bins);
1595 snew(bsProfiles_av2, opt->bins);
1597 /* Create array of all pull groups. Note that different windows
1598 may have different nr of pull groups
1599 First: Get total nr of pull groups */
1600 nAllPull = 0;
1601 for (i = 0; i < nWindows; i++)
1603 nAllPull += window[i].nPull;
1605 snew(allPull_winId, nAllPull);
1606 snew(allPull_pullId, nAllPull);
1607 iAllPull = 0;
1608 /* Setup one array of all pull groups */
1609 for (i = 0; i < nWindows; i++)
1611 for (j = 0; j < window[i].nPull; j++)
1613 allPull_winId[iAllPull] = i;
1614 allPull_pullId[iAllPull] = j;
1615 iAllPull++;
1619 /* setup stuff for synthetic windows */
1620 snew(synthWindow, nAllPull);
1621 for (i = 0; i < nAllPull; i++)
1623 synthWindow[i].nPull = 1;
1624 synthWindow[i].nBin = opt->bins;
1625 snew(synthWindow[i].Histo, 1);
1626 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1628 snew(synthWindow[i].Histo[0], opt->bins);
1630 snew(synthWindow[i].N, 1);
1631 snew(synthWindow[i].pos, 1);
1632 snew(synthWindow[i].z, 1);
1633 snew(synthWindow[i].k, 1);
1634 snew(synthWindow[i].bContrib, 1);
1635 snew(synthWindow[i].g, 1);
1636 snew(synthWindow[i].bsWeight, 1);
1639 switch (opt->bsMethod)
1641 case bsMethod_hist:
1642 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1643 please_cite(stdout, "Hub2006");
1644 break;
1645 case bsMethod_BayesianHist:
1646 /* just copy all histogams into synthWindow array */
1647 for (i = 0; i < nAllPull; i++)
1649 winid = allPull_winId [i];
1650 pullid = allPull_pullId[i];
1651 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1653 break;
1654 case bsMethod_traj:
1655 case bsMethod_trajGauss:
1656 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1657 break;
1658 default:
1659 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1662 /* do bootstrapping */
1663 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1664 for (ib = 0; ib < opt->nBootStrap; ib++)
1666 printf(" *******************************************\n"
1667 " ******** Start bootstrap nr %d ************\n"
1668 " *******************************************\n", ib+1);
1670 switch (opt->bsMethod)
1672 case bsMethod_hist:
1673 /* bootstrap complete histograms from given histograms */
1674 srenew(randomArray, nAllPull);
1675 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1676 for (i = 0; i < nAllPull; i++)
1678 winid = allPull_winId [randomArray[i]];
1679 pullid = allPull_pullId[randomArray[i]];
1680 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1682 break;
1683 case bsMethod_BayesianHist:
1684 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1685 setRandomBsWeights(synthWindow, nAllPull, opt);
1686 break;
1687 case bsMethod_traj:
1688 case bsMethod_trajGauss:
1689 /* create new histos from given histos, that is generate new hypothetical
1690 trajectories */
1691 for (i = 0; i < nAllPull; i++)
1693 winid = allPull_winId[i];
1694 pullid = allPull_pullId[i];
1695 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1697 break;
1700 /* write histos in case of verbose output */
1701 if (opt->bs_verbose)
1703 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1706 /* do wham */
1707 i = 0;
1708 bExact = FALSE;
1709 maxchange = 1e20;
1710 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1713 if ( (i%opt->stepUpdateContrib) == 0)
1715 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1717 if (maxchange < opt->Tolerance)
1719 bExact = TRUE;
1721 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1723 printf("\t%4d) Maximum change %e\n", i, maxchange);
1725 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1726 i++;
1728 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1729 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1731 if (opt->bLog)
1733 prof_normalization_and_unit(bsProfile, opt);
1736 /* symmetrize profile around z=0 */
1737 if (opt->bSym)
1739 symmetrizeProfile(bsProfile, opt);
1742 /* save stuff to get average and stddev */
1743 for (i = 0; i < opt->bins; i++)
1745 tmp = bsProfile[i];
1746 bsProfiles_av[i] += tmp;
1747 bsProfiles_av2[i] += tmp*tmp;
1748 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1750 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1752 xvgrclose(fp);
1754 /* write average and stddev */
1755 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1756 if (output_env_get_print_xvgr_codes(opt->oenv))
1758 fprintf(fp, "@TYPE xydy\n");
1760 for (i = 0; i < opt->bins; i++)
1762 bsProfiles_av [i] /= opt->nBootStrap;
1763 bsProfiles_av2[i] /= opt->nBootStrap;
1764 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1765 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1766 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1768 xvgrclose(fp);
1769 printf("Wrote boot strap result to %s\n", fnres);
1772 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1773 static int whaminFileType(char *fn)
1775 int len;
1776 len = std::strlen(fn);
1777 if (std::strcmp(fn+len-3, "tpr") == 0)
1779 return whamin_tpr;
1781 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1783 return whamin_pullxf;
1785 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1787 return whamin_pdo;
1789 else
1791 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1795 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1796 static void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1797 t_UmbrellaOptions *opt)
1799 char **filename = nullptr, tmp[WHAM_MAXFILELEN+2];
1800 int nread, sizenow, i, block = 1;
1801 FILE *fp;
1803 fp = gmx_ffopen(fn, "r");
1804 nread = 0;
1805 sizenow = 0;
1806 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1808 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1810 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1812 if (nread >= sizenow)
1814 sizenow += block;
1815 srenew(filename, sizenow);
1816 for (i = sizenow-block; i < sizenow; i++)
1818 snew(filename[i], WHAM_MAXFILELEN);
1821 /* remove newline if there is one */
1822 if (tmp[std::strlen(tmp)-1] == '\n')
1824 tmp[std::strlen(tmp)-1] = '\0';
1826 std::strcpy(filename[nread], tmp);
1827 if (opt->verbose)
1829 printf("Found file %s in %s\n", filename[nread], fn);
1831 nread++;
1833 *filenamesRet = filename;
1834 *nfilesRet = nread;
1837 //! Open a file or a pipe to a gzipped file
1838 static FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1840 char Buffer[2048], gunzip[1024], *Path = nullptr;
1841 FILE *pipe = nullptr;
1842 static gmx_bool bFirst = true;
1844 /* gzipped pdo file? */
1845 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1847 /* search gunzip executable */
1848 if (!(Path = getenv("GMX_PATH_GZIP")))
1850 if (gmx_fexist("/bin/gunzip"))
1852 sprintf(gunzip, "%s", "/bin/gunzip");
1854 else if (gmx_fexist("/usr/bin/gunzip"))
1856 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1858 else
1860 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1861 "You may want to define the path to gunzip "
1862 "with the environment variable GMX_PATH_GZIP.");
1865 else
1867 sprintf(gunzip, "%s/gunzip", Path);
1868 if (!gmx_fexist(gunzip))
1870 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1871 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1874 if (bFirst)
1876 printf("Using gunzip executable %s\n", gunzip);
1877 bFirst = false;
1879 if (!gmx_fexist(fn))
1881 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1883 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1884 if (opt->verbose)
1886 printf("Executing command '%s'\n", Buffer);
1888 #if HAVE_PIPES
1889 if ((pipe = popen(Buffer, "r")) == nullptr)
1891 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1893 #else
1894 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1895 #endif
1896 *bPipeOpen = TRUE;
1898 else
1900 pipe = gmx_ffopen(fn, "r");
1901 *bPipeOpen = FALSE;
1904 return pipe;
1907 //! Close file or pipe
1908 static void pdo_close_file(FILE *fp)
1910 #if HAVE_PIPES
1911 pclose(fp);
1912 #else
1913 gmx_ffclose(fp);
1914 #endif
1917 //! Reading all pdo files
1918 static void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1919 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1921 FILE *file;
1922 real mintmp, maxtmp, done = 0.;
1923 int i;
1924 gmx_bool bPipeOpen;
1925 /* char Buffer0[1000]; */
1927 if (nfiles < 1)
1929 gmx_fatal(FARGS, "No files found. Hick.");
1932 /* if min and max are not given, get min and max from the input files */
1933 if (opt->bAuto)
1935 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1936 opt->min = 1e20;
1937 opt->max = -1e20;
1938 for (i = 0; i < nfiles; ++i)
1940 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1941 /*fgets(Buffer0,999,file);
1942 fprintf(stderr,"First line '%s'\n",Buffer0); */
1943 done = 100.0*(i+1)/nfiles;
1944 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1945 if (opt->verbose)
1947 printf("\n");
1949 read_pdo_header(file, header, opt);
1950 /* here only determine min and max of this window */
1951 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1952 if (maxtmp > opt->max)
1954 opt->max = maxtmp;
1956 if (mintmp < opt->min)
1958 opt->min = mintmp;
1960 if (bPipeOpen)
1962 pdo_close_file(file);
1964 else
1966 gmx_ffclose(file);
1969 printf("\n");
1970 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1971 if (opt->bBoundsOnly)
1973 printf("Found option -boundsonly, now exiting.\n");
1974 exit (0);
1977 /* store stepsize in profile */
1978 opt->dz = (opt->max-opt->min)/opt->bins;
1980 /* Having min and max, we read in all files */
1981 /* Loop over all files */
1982 for (i = 0; i < nfiles; ++i)
1984 done = 100.0*(i+1)/nfiles;
1985 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1986 if (opt->verbose)
1988 printf("\n");
1990 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1991 read_pdo_header(file, header, opt);
1992 /* load data into window */
1993 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
1994 if ((window+i)->Ntot[0] == 0)
1996 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1998 if (bPipeOpen)
2000 pdo_close_file(file);
2002 else
2004 gmx_ffclose(file);
2007 printf("\n");
2008 for (i = 0; i < nfiles; ++i)
2010 sfree(fn[i]);
2012 sfree(fn);
2015 //! translate 0/1 to N/Y to write pull dimensions
2016 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2018 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2019 static void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt, t_coordselection *coordsel)
2021 t_inputrec irInstance;
2022 t_inputrec *ir = &irInstance;
2023 t_state state;
2024 static int first = 1;
2026 /* printf("Reading %s \n",fn); */
2027 read_tpx_state(fn, ir, &state, nullptr);
2029 if (!ir->bPull)
2031 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2033 if (ir->pull->ncoord == 0)
2035 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2038 /* Read overall pull info */
2039 header->npullcrds = ir->pull->ncoord;
2040 header->bPrintCOM = ir->pull->bPrintCOM;
2041 header->bPrintRefValue = ir->pull->bPrintRefValue;
2042 header->bPrintComp = ir->pull->bPrintComp;
2044 /* Read pull coordinates */
2045 snew(header->pcrd, header->npullcrds);
2046 for (int i = 0; i < ir->pull->ncoord; i++)
2048 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2049 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2050 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2051 /* Angle type coordinates are handled fully in degrees in gmx wham */
2052 header->pcrd[i].k = ir->pull->coord[i].k*pull_conversion_factor_internal2userinput(&ir->pull->coord[i]);
2053 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2055 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2056 header->pcrd[i].ndim = header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2058 std::strcpy(header->pcrd[i].coord_unit,
2059 pull_coordinate_units(&ir->pull->coord[i]));
2061 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2063 gmx_fatal(FARGS, "Seems like you did free-energy perturbation, and you perturbed the force constant."
2064 " This is not supported.\n");
2066 if (coordsel && (coordsel->n != ir->pull->ncoord))
2068 gmx_fatal(FARGS, "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2069 "coordinate selection file (option -is)\n", ir->pull->ncoord, fn, coordsel->n);
2073 /* Check pull coords for consistency */
2074 int geom = -1;
2075 ivec thedim = { 0, 0, 0 };
2076 bool geometryIsSet = false;
2077 for (int i = 0; i < ir->pull->ncoord; i++)
2079 if (coordsel == nullptr || coordsel->bUse[i])
2081 if (header->pcrd[i].pull_type != epullUMBRELLA)
2083 gmx_fatal(FARGS, "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only umbrella coodinates can enter WHAM.\n"
2084 "If you have umrella and non-umbrella coordinates, you can select the umbrella coordinates with gmx wham -is\n",
2085 fn, i+1, epull_names[header->pcrd[i].pull_type]);
2087 if (!geometryIsSet)
2089 geom = header->pcrd[i].geometry;
2090 copy_ivec(header->pcrd[i].dim, thedim);
2091 geometryIsSet = true;
2093 if (geom != header->pcrd[i].geometry)
2095 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull geometry (coordinate 1: %s, coordinate %d: %s)\n"
2096 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2097 fn, epullg_names[geom], i+1, epullg_names[header->pcrd[i].geometry]);
2099 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY] || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2101 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull dimensions (coordinate 1: %s %s %s, coordinate %d: %s %s %s)\n"
2102 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2103 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i+1,
2104 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]));
2106 if (header->pcrd[i].geometry == epullgCYL)
2108 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2110 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2111 "However, found dimensions [%s %s %s]\n",
2112 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2113 int2YN(header->pcrd[i].dim[ZZ]));
2116 if (header->pcrd[i].k <= 0.0)
2118 gmx_fatal(FARGS, "%s: Pull coordinate %d has force constant of of %g.\n"
2119 "That doesn't seem to be an Umbrella tpr.\n",
2120 fn, i+1, header->pcrd[i].k);
2125 if (opt->verbose || first)
2127 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2128 int maxlen = 0;
2129 for (int i = 0; i < ir->pull->ncoord; i++)
2131 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2132 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2134 char fmt[STRLEN];
2135 sprintf(fmt, "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d dimensions). Used: %%s\n",
2136 maxlen+1);
2137 for (int i = 0; i < ir->pull->ncoord; i++)
2139 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2140 printf(fmt,
2141 epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2142 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]),
2143 header->pcrd[i].ndim, use ? "Yes" : "No");
2144 printf("\tPull group coordinates of %d groups expected in pullx files.\n", ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2146 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2147 header->bPrintRefValue ? "" : " not");
2149 if (!opt->verbose && first)
2151 printf("\tUse option -v to see this output for all input tpr files\n\n");
2154 first = 0;
2157 //! Read pullx.xvg or pullf.xvg
2158 static void read_pull_xf(const char *fn, t_UmbrellaHeader * header,
2159 t_UmbrellaWindow * window,
2160 t_UmbrellaOptions *opt,
2161 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2162 t_coordselection *coordsel)
2164 double **y = nullptr, pos = 0., t, force, time0 = 0., dt;
2165 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2166 int nColExpect, ntot, column;
2167 real min, max, minfound = 1e20, maxfound = -1e20;
2168 gmx_bool dt_ok, timeok;
2169 const char *quantity;
2170 const int blocklen = 4096;
2171 int *lennow = nullptr;
2172 static gmx_bool bFirst = TRUE;
2175 * Data columns in pull output:
2176 * - in force output pullf.xvg:
2177 * No reference columns, one column per pull coordinate
2179 * - in position output pullx.xvg:
2180 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2181 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2182 * be written separately into pullx file, but this is not supported and throws an error below;
2183 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2186 if (header->bPrintComp && opt->bPullx)
2188 gmx_fatal(FARGS, "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2189 "(mdp option pull-print-components). Provide the pull force files instead (with option -if).\n");
2192 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2193 snew(nColThisCrd, header->npullcrds);
2194 snew(nColCOMCrd, header->npullcrds);
2195 snew(nColRefCrd, header->npullcrds);
2197 if (!opt->bPullx)
2199 /* pullf reading: simply one column per coordinate */
2200 for (g = 0; g < header->npullcrds; g++)
2202 nColThisCrd[g] = 1;
2203 nColCOMCrd[g] = 0;
2204 nColRefCrd[g] = 0;
2207 else
2209 /* pullx reading. Note explanation above. */
2210 for (g = 0; g < header->npullcrds; g++)
2212 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2213 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim*header->pcrd[g].ngroup : 0);
2214 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2218 nColExpect = 1; /* time column */
2219 for (g = 0; g < header->npullcrds; g++)
2221 nColExpect += nColThisCrd[g];
2224 /* read pullf or pullx. Could be optimized if min and max are given. */
2225 nt = read_xvg(fn, &y, &ny);
2227 /* Check consistency */
2228 quantity = opt->bPullx ? "position" : "force";
2229 if (nt < 1)
2231 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2233 if (bFirst || opt->verbose)
2235 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2236 for (i = 0; i < header->npullcrds; i++)
2238 printf("\tColumns for pull coordinate %d\n", i+1);
2239 printf("\t\treaction coordinate: %d\n"
2240 "\t\tcenter-of-mass of groups: %d\n"
2241 "\t\treference position column: %s\n",
2242 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2244 printf("\tFound %d times in %s\n", nt, fn);
2245 bFirst = FALSE;
2247 if (nColExpect != ny)
2249 gmx_fatal(FARGS, "Expected %d columns (including time column) in %s, but found %d."
2250 " Maybe you confused options -if and -ix ?", nColExpect, fn, ny);
2253 if (!bGetMinMax)
2255 assert(window);
2256 bins = opt->bins;
2257 min = opt->min;
2258 max = opt->max;
2259 if (nt > 1)
2261 window->dt = y[0][1]-y[0][0];
2263 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2265 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2268 /* Need to alocate memory and set up structure for windows */
2269 if (coordsel)
2271 /* Use only groups selected with option -is file */
2272 if (header->npullcrds != coordsel->n)
2274 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2275 header->npullcrds, coordsel->n);
2277 window->nPull = coordsel->nUse;
2279 else
2281 window->nPull = header->npullcrds;
2284 window->nBin = bins;
2285 snew(window->Histo, window->nPull);
2286 snew(window->z, window->nPull);
2287 snew(window->k, window->nPull);
2288 snew(window->pos, window->nPull);
2289 snew(window->N, window->nPull);
2290 snew(window->Ntot, window->nPull);
2291 snew(window->g, window->nPull);
2292 snew(window->bsWeight, window->nPull);
2293 window->bContrib = nullptr;
2295 if (opt->bCalcTauInt)
2297 snew(window->ztime, window->nPull);
2299 else
2301 window->ztime = nullptr;
2303 snew(lennow, window->nPull);
2305 for (g = 0; g < window->nPull; ++g)
2307 window->z [g] = 1;
2308 window->bsWeight [g] = 1.;
2309 window->N [g] = 0;
2310 window->Ntot [g] = 0;
2311 window->g [g] = 1.;
2312 snew(window->Histo[g], bins);
2314 if (opt->bCalcTauInt)
2316 window->ztime[g] = nullptr;
2320 /* Copying umbrella center and force const is more involved since not
2321 all pull groups from header (tpr file) may be used in window variable */
2322 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2324 if (coordsel && !coordsel->bUse[g])
2326 continue;
2328 window->k [gUsed] = header->pcrd[g].k;
2329 window->pos[gUsed] = header->pcrd[g].init_dist;
2330 gUsed++;
2333 else
2334 { /* only determine min and max */
2335 minfound = 1e20;
2336 maxfound = -1e20;
2337 min = max = bins = 0; /* Get rid of warnings */
2341 for (i = 0; i < nt; i++)
2343 /* Do you want that time frame? */
2344 t = 1.0/1000*( gmx::roundToInt64((y[0][i]*1000))); /* round time to fs */
2346 /* get time step of pdo file and get dstep from opt->dt */
2347 if (i == 0)
2349 time0 = t;
2351 else if (i == 1)
2353 dt = t-time0;
2354 if (opt->dt > 0.0)
2356 dstep = gmx::roundToInt(opt->dt/dt);
2357 if (dstep == 0)
2359 dstep = 1;
2362 if (!bGetMinMax)
2364 window->dt = dt*dstep;
2368 dt_ok = (i%dstep == 0);
2369 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2370 /*if (opt->verbose)
2371 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2372 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2373 if (timeok)
2375 /* Note: if coordsel == NULL:
2376 * all groups in pullf/x file are stored in this window, and gUsed == g
2377 * if coordsel != NULL:
2378 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2380 gUsed = -1;
2381 for (g = 0; g < header->npullcrds; ++g)
2383 /* was this group selected for application in WHAM? */
2384 if (coordsel && !coordsel->bUse[g])
2386 continue;
2388 gUsed++;
2390 if (opt->bPullf)
2392 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2393 force = y[g+1][i];
2394 pos = -force/header->pcrd[g].k + header->pcrd[g].init_dist;
2396 else
2398 /* Pick the correct column index.
2399 Note that there is always exactly one displacement column.
2401 column = 1;
2402 for (int j = 0; j < g; j++)
2404 column += nColThisCrd[j];
2406 pos = y[column][i];
2409 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2410 if (bGetMinMax)
2412 if (pos < minfound)
2414 minfound = pos;
2416 if (pos > maxfound)
2418 maxfound = pos;
2421 else
2423 if (gUsed >= window->nPull)
2425 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2426 gUsed, window->nPull);
2429 if (opt->bCalcTauInt && !bGetMinMax)
2431 /* save time series for autocorrelation analysis */
2432 ntot = window->Ntot[gUsed];
2433 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2434 if (ntot >= lennow[gUsed])
2436 lennow[gUsed] += blocklen;
2437 srenew(window->ztime[gUsed], lennow[gUsed]);
2439 window->ztime[gUsed][ntot] = pos;
2442 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2443 if (opt->bCycl)
2445 if (ibin < 0)
2447 while ( (ibin += bins) < 0)
2452 else if (ibin >= bins)
2454 while ( (ibin -= bins) >= bins)
2460 if (ibin >= 0 && ibin < bins)
2462 window->Histo[gUsed][ibin] += 1.;
2463 window->N[gUsed]++;
2465 window->Ntot[gUsed]++;
2469 else if (t > opt->tmax)
2471 if (opt->verbose)
2473 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2475 break;
2479 if (bGetMinMax)
2481 *mintmp = minfound;
2482 *maxtmp = maxfound;
2484 sfree(lennow);
2485 for (i = 0; i < ny; i++)
2487 sfree(y[i]);
2491 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2492 static void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2493 t_UmbrellaHeader* header,
2494 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2496 int i;
2497 real mintmp, maxtmp;
2499 printf("Reading %d tpr and pullf files\n", nfiles);
2501 /* min and max not given? */
2502 if (opt->bAuto)
2504 printf("Automatic determination of boundaries...\n");
2505 opt->min = 1e20;
2506 opt->max = -1e20;
2507 for (i = 0; i < nfiles; i++)
2509 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2511 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2513 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2514 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2516 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2518 read_pull_xf(fnPull[i], header, nullptr, opt, TRUE, &mintmp, &maxtmp,
2519 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2520 if (maxtmp > opt->max)
2522 opt->max = maxtmp;
2524 if (mintmp < opt->min)
2526 opt->min = mintmp;
2529 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2530 if (opt->bBoundsOnly)
2532 printf("Found option -boundsonly, now exiting.\n");
2533 exit (0);
2536 /* store stepsize in profile */
2537 opt->dz = (opt->max-opt->min)/opt->bins;
2539 bool foundData = false;
2540 for (i = 0; i < nfiles; i++)
2542 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2544 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2546 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2547 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2549 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2551 read_pull_xf(fnPull[i], header, window+i, opt, FALSE, nullptr, nullptr,
2552 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2553 if (window[i].Ntot[0] == 0)
2555 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2557 else
2559 foundData = true;
2562 if (!foundData)
2564 gmx_fatal(FARGS, "No data points were found in pullf/pullx files. Maybe you need to specify the -b option?\n");
2567 for (i = 0; i < nfiles; i++)
2569 sfree(fnTprs[i]);
2570 sfree(fnPull[i]);
2572 sfree(fnTprs);
2573 sfree(fnPull);
2576 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2578 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2579 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2581 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2583 int nlines, ny, i, ig;
2584 double **iact;
2586 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2587 nlines = read_xvg(fn, &iact, &ny);
2588 if (nlines != nwins)
2590 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2591 nlines, fn, nwins);
2593 for (i = 0; i < nlines; i++)
2595 if (window[i].nPull != ny)
2597 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2598 "number of pull groups is different in different simulations. That is not\n"
2599 "supported yet. Sorry.\n");
2601 for (ig = 0; ig < window[i].nPull; ig++)
2603 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2604 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2606 if (iact[ig][i] <= 0.0)
2608 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2615 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2617 * This is useful
2618 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2619 * that -in case of limited sampling- the ACT may be severely underestimated.
2620 * Note: the g=1+2tau are overwritten.
2621 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2622 * by the smoothing
2624 static void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2626 int i, ig, j, jg;
2627 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2629 /* only evaluate within +- 3sigma of the Gausian */
2630 siglim = 3.0*opt->sigSmoothIact;
2631 siglim2 = gmx::square(siglim);
2632 /* pre-factor of Gaussian */
2633 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2634 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2636 for (i = 0; i < nwins; i++)
2638 snew(window[i].tausmooth, window[i].nPull);
2639 for (ig = 0; ig < window[i].nPull; ig++)
2641 tausm = 0.;
2642 weight = 0;
2643 pos = window[i].pos[ig];
2644 for (j = 0; j < nwins; j++)
2646 for (jg = 0; jg < window[j].nPull; jg++)
2648 dpos2 = gmx::square(window[j].pos[jg]-pos);
2649 if (dpos2 < siglim2)
2651 w = gaufact*std::exp(-dpos2*invtwosig2);
2652 weight += w;
2653 tausm += w*window[j].tau[jg];
2654 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2655 w,dpos2,pos,gaufact,invtwosig2); */
2659 tausm /= weight;
2660 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2662 window[i].tausmooth[ig] = tausm;
2664 else
2666 window[i].tausmooth[ig] = window[i].tau[ig];
2668 window[i].g[ig] = 1+2*tausm/window[i].dt;
2673 //! Stop integrating autoccorelation function when ACF drops under this value
2674 #define WHAM_AC_ZERO_LIMIT 0.05
2676 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2678 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2679 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2681 int i, ig, ncorr, ntot, j, k, *count, restart;
2682 real *corr, c0, dt, tmp;
2683 real *ztime, av, tausteps;
2684 FILE *fp, *fpcorr = nullptr;
2686 if (opt->verbose)
2688 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2689 "time [ps]", "autocorrelation function", opt->oenv);
2692 printf("\n");
2693 for (i = 0; i < nwins; i++)
2695 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2696 fflush(stdout);
2697 ntot = window[i].Ntot[0];
2699 /* using half the maximum time as length of autocorrelation function */
2700 ncorr = ntot/2;
2701 if (ntot < 10)
2703 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2704 " points. Provide more pull data!", ntot);
2706 snew(corr, ncorr);
2707 /* snew(corrSq,ncorr); */
2708 snew(count, ncorr);
2709 dt = window[i].dt;
2710 snew(window[i].tau, window[i].nPull);
2711 restart = gmx::roundToInt(opt->acTrestart/dt);
2712 if (restart == 0)
2714 restart = 1;
2717 for (ig = 0; ig < window[i].nPull; ig++)
2719 if (ntot != window[i].Ntot[ig])
2721 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2722 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2724 ztime = window[i].ztime[ig];
2726 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2727 for (j = 0, av = 0; (j < ntot); j++)
2729 av += ztime[j];
2731 av /= ntot;
2732 for (k = 0; (k < ncorr); k++)
2734 corr[k] = 0.;
2735 count[k] = 0;
2737 for (j = 0; (j < ntot); j += restart)
2739 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2741 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2742 corr [k] += tmp;
2743 /* corrSq[k] += tmp*tmp; */
2744 count[k]++;
2747 /* divide by nr of frames for each time displacement */
2748 for (k = 0; (k < ncorr); k++)
2750 /* count probably = (ncorr-k+(restart-1))/restart; */
2751 corr[k] = corr[k]/count[k];
2752 /* variance of autocorrelation function */
2753 /* corrSq[k]=corrSq[k]/count[k]; */
2755 /* normalize such that corr[0] == 0 */
2756 c0 = 1./corr[0];
2757 for (k = 0; (k < ncorr); k++)
2759 corr[k] *= c0;
2760 /* corrSq[k]*=c0*c0; */
2763 /* write ACFs in verbose mode */
2764 if (fpcorr)
2766 for (k = 0; (k < ncorr); k++)
2768 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2770 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2773 /* esimate integrated correlation time, fitting is too unstable */
2774 tausteps = 0.5*corr[0];
2775 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2776 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2778 tausteps += corr[j];
2781 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2782 Kumar et al, eq. 28 ff. */
2783 window[i].tau[ig] = tausteps*dt;
2784 window[i].g[ig] = 1+2*tausteps;
2785 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2786 } /* ig loop */
2787 sfree(corr);
2788 sfree(count);
2790 printf(" done\n");
2791 if (fpcorr)
2793 xvgrclose(fpcorr);
2796 /* plot IACT along reaction coordinate */
2797 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2798 if (output_env_get_print_xvgr_codes(opt->oenv))
2800 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2801 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2802 for (i = 0; i < nwins; i++)
2804 fprintf(fp, "# %3d ", i);
2805 for (ig = 0; ig < window[i].nPull; ig++)
2807 fprintf(fp, " %11g", window[i].tau[ig]);
2809 fprintf(fp, "\n");
2812 for (i = 0; i < nwins; i++)
2814 for (ig = 0; ig < window[i].nPull; ig++)
2816 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2819 if (opt->sigSmoothIact > 0.0)
2821 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2822 opt->sigSmoothIact);
2823 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2824 smoothIact(window, nwins, opt);
2825 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2826 if (output_env_get_print_xvgr_codes(opt->oenv))
2828 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2829 fprintf(fp, "@ s1 symbol color 2\n");
2831 for (i = 0; i < nwins; i++)
2833 for (ig = 0; ig < window[i].nPull; ig++)
2835 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2839 xvgrclose(fp);
2840 printf("Wrote %s\n", fn);
2843 /*! \brief
2844 * compute average and sigma of each umbrella histogram
2846 static void averageSigma(t_UmbrellaWindow *window, int nwins)
2848 int i, ig, ntot, k;
2849 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2851 for (i = 0; i < nwins; i++)
2853 snew(window[i].aver, window[i].nPull);
2854 snew(window[i].sigma, window[i].nPull);
2856 ntot = window[i].Ntot[0];
2857 for (ig = 0; ig < window[i].nPull; ig++)
2859 ztime = window[i].ztime[ig];
2860 for (k = 0, av = 0.; k < ntot; k++)
2862 av += ztime[k];
2864 av /= ntot;
2865 for (k = 0, sum2 = 0.; k < ntot; k++)
2867 diff = ztime[k]-av;
2868 sum2 += diff*diff;
2870 sig = std::sqrt(sum2/ntot);
2871 window[i].aver[ig] = av;
2873 /* Note: This estimate for sigma is biased from the limited sampling.
2874 Correct sigma by n/(n-1) where n = number of independent
2875 samples. Only possible if IACT is known.
2877 if (window[i].tau)
2879 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2880 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2882 else
2884 window[i].sigma[ig] = sig;
2886 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2892 /*! \brief
2893 * Use histograms to compute average force on pull group.
2895 static void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2897 int i, j, bins = opt->bins, k;
2898 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2899 double posmirrored;
2901 dz = (max-min)/bins;
2902 ztot = opt->max-min;
2903 ztot_half = ztot/2;
2905 /* Compute average displacement from histograms */
2906 for (j = 0; j < nWindows; ++j)
2908 snew(window[j].forceAv, window[j].nPull);
2909 for (k = 0; k < window[j].nPull; ++k)
2911 displAv = 0.0;
2912 weight = 0.0;
2913 for (i = 0; i < opt->bins; ++i)
2915 temp = (1.0*i+0.5)*dz+min;
2916 distance = temp - window[j].pos[k];
2917 if (opt->bCycl)
2918 { /* in cyclic wham: */
2919 if (distance > ztot_half) /* |distance| < ztot_half */
2921 distance -= ztot;
2923 else if (distance < -ztot_half)
2925 distance += ztot;
2928 w = window[j].Histo[k][i]/window[j].g[k];
2929 displAv += w*distance;
2930 weight += w;
2931 /* Are we near min or max? We are getting wrong forces from the histgrams since
2932 the histograms are zero outside [min,max). Therefore, assume that the position
2933 on the other side of the histomgram center is equally likely. */
2934 if (!opt->bCycl)
2936 posmirrored = window[j].pos[k]-distance;
2937 if (posmirrored >= max || posmirrored < min)
2939 displAv += -w*distance;
2940 weight += w;
2944 displAv /= weight;
2946 /* average force from average displacement */
2947 window[j].forceAv[k] = displAv*window[j].k[k];
2948 /* sigma from average square displacement */
2949 /* window[j].sigma [k] = sqrt(displAv2); */
2950 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2955 /*! \brief
2956 * Check if the complete reaction coordinate is covered by the histograms
2958 static void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2959 t_UmbrellaOptions *opt)
2961 int i, ig, j, bins = opt->bins;
2962 bool bBoundary;
2963 real avcount = 0, z, relcount, *count;
2964 snew(count, opt->bins);
2966 for (j = 0; j < opt->bins; ++j)
2968 for (i = 0; i < nwins; i++)
2970 for (ig = 0; ig < window[i].nPull; ig++)
2972 count[j] += window[i].Histo[ig][j];
2975 avcount += 1.0*count[j];
2977 avcount /= bins;
2978 for (j = 0; j < bins; ++j)
2980 relcount = count[j]/avcount;
2981 z = (j+0.5)*opt->dz+opt->min;
2982 bBoundary = j<bins/20 || (bins-j)>bins/20;
2983 /* check for bins with no data */
2984 if (count[j] == 0)
2986 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2987 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2989 /* and check for poor sampling */
2990 else if (relcount < 0.005 && !bBoundary)
2992 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2995 sfree(count);
2998 /*! \brief Compute initial potential by integrating the average force
3000 * This speeds up the convergence by roughly a factor of 2
3002 static void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3004 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3005 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3006 FILE *fp;
3008 dz = (opt->max-min)/bins;
3010 printf("Getting initial potential by integration.\n");
3012 /* Compute average displacement from histograms */
3013 computeAverageForce(window, nWindows, opt);
3015 /* Get force for each bin from all histograms in this bin, or, alternatively,
3016 if no histograms are inside this bin, from the closest histogram */
3017 snew(pot, bins);
3018 snew(f, bins);
3019 for (j = 0; j < opt->bins; ++j)
3021 pos = (1.0*j+0.5)*dz+min;
3022 nHist = 0;
3023 fAv = 0.;
3024 distmin = 1e20;
3025 groupmin = winmin = 0;
3026 for (i = 0; i < nWindows; i++)
3028 for (ig = 0; ig < window[i].nPull; ig++)
3030 hispos = window[i].pos[ig];
3031 dist = std::abs(hispos-pos);
3032 /* average force within bin */
3033 if (dist < dz/2)
3035 nHist++;
3036 fAv += window[i].forceAv[ig];
3038 /* at the same time, remember closest histogram */
3039 if (dist < distmin)
3041 winmin = i;
3042 groupmin = ig;
3043 distmin = dist;
3047 /* if no histogram found in this bin, use closest histogram */
3048 if (nHist > 0)
3050 fAv = fAv/nHist;
3052 else
3054 fAv = window[winmin].forceAv[groupmin];
3056 f[j] = fAv;
3058 for (j = 1; j < opt->bins; ++j)
3060 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3063 /* cyclic wham: linearly correct possible offset */
3064 if (opt->bCycl)
3066 diff = (pot[bins-1]-pot[0])/(bins-1);
3067 for (j = 1; j < opt->bins; ++j)
3069 pot[j] -= j*diff;
3072 if (opt->verbose)
3074 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3075 for (j = 0; j < opt->bins; ++j)
3077 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3079 xvgrclose(fp);
3080 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3083 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3084 energy offsets which are usually determined by wham
3085 First: turn pot into probabilities:
3087 for (j = 0; j < opt->bins; ++j)
3089 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3091 calc_z(pot, window, nWindows, opt, TRUE);
3093 sfree(pot);
3094 sfree(f);
3097 //! Count number of words in an line
3098 static int wordcount(char *ptr)
3100 int i, n, is[2];
3101 int cur = 0;
3103 if (std::strlen(ptr) == 0)
3105 return 0;
3107 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3108 n = 1;
3109 for (i = 0; (ptr[i] != '\0'); i++)
3111 is[cur] = isspace(ptr[i]);
3112 if ((i > 0) && (is[cur] && !is[1-cur]))
3114 n++;
3116 cur = 1-cur;
3118 return n;
3121 /*! \brief Read input file for pull group selection (option -is)
3123 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3125 static void readPullCoordSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3127 FILE *fp;
3128 int i, iline, n, len = STRLEN, temp;
3129 char *ptr = nullptr, *tmpbuf = nullptr;
3130 char fmt[1024], fmtign[1024];
3131 int block = 1, sizenow;
3133 fp = gmx_ffopen(opt->fnCoordSel, "r");
3134 opt->coordsel = nullptr;
3136 snew(tmpbuf, len);
3137 sizenow = 0;
3138 iline = 0;
3139 while ( (ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3141 trim(ptr);
3142 n = wordcount(ptr);
3144 if (iline >= sizenow)
3146 sizenow += block;
3147 srenew(opt->coordsel, sizenow);
3149 opt->coordsel[iline].n = n;
3150 opt->coordsel[iline].nUse = 0;
3151 snew(opt->coordsel[iline].bUse, n);
3153 fmtign[0] = '\0';
3154 for (i = 0; i < n; i++)
3156 std::strcpy(fmt, fmtign);
3157 std::strcat(fmt, "%d");
3158 if (sscanf(ptr, fmt, &temp))
3160 opt->coordsel[iline].bUse[i] = (temp > 0);
3161 if (opt->coordsel[iline].bUse[i])
3163 opt->coordsel[iline].nUse++;
3166 std::strcat(fmtign, "%*s");
3168 iline++;
3170 opt->nCoordsel = iline;
3171 if (nTpr != opt->nCoordsel)
3173 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel,
3174 opt->fnCoordSel);
3177 printf("\nUse only these pull coordinates:\n");
3178 for (iline = 0; iline < nTpr; iline++)
3180 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse, opt->coordsel[iline].n);
3181 for (i = 0; i < opt->coordsel[iline].n; i++)
3183 if (opt->coordsel[iline].bUse[i])
3185 printf(" %d", i+1);
3188 printf("\n");
3190 printf("\n");
3192 sfree(tmpbuf);
3195 //! Boolean XOR
3196 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3198 //! Number of elements in fnm (used for command line parsing)
3199 #define NFILE asize(fnm)
3201 //! The main gmx wham routine
3202 int gmx_wham(int argc, char *argv[])
3204 const char *desc[] = {
3205 "[THISMODULE] is an analysis program that implements the Weighted",
3206 "Histogram Analysis Method (WHAM). It is intended to analyze",
3207 "output files generated by umbrella sampling simulations to ",
3208 "compute a potential of mean force (PMF).[PAR]",
3210 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3211 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3212 "and, if multiple coordinates need to be analyzed, all used the same",
3213 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3214 "At present, three input modes are supported.",
3216 "* With option [TT]-it[tt], the user provides a file which contains the",
3217 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3218 " AND, with option [TT]-ix[tt], a file which contains file names of",
3219 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3220 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3221 " first pullx, etc.",
3222 "* Same as the previous input mode, except that the the user",
3223 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3224 " From the pull force the position in the umbrella potential is",
3225 " computed. This does not work with tabulated umbrella potentials.",
3226 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3227 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3228 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3229 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3230 " must be similar to the following::",
3232 " # UMBRELLA 3.0",
3233 " # Component selection: 0 0 1",
3234 " # nSkip 1",
3235 " # Ref. Group 'TestAtom'",
3236 " # Nr. of pull groups 2",
3237 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3238 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3239 " #####",
3241 " The number of pull groups, umbrella positions, force constants, and names ",
3242 " may (of course) differ. Following the header, a time column and ",
3243 " a data column for each pull group follows (i.e. the displacement",
3244 " with respect to the umbrella center). Up to four pull groups are possible ",
3245 " per [REF].pdo[ref] file at present.[PAR]",
3246 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If only ",
3247 "some of the pull coordinates should be used, a pull coordinate selection file (option [TT]-is[tt]) can ",
3248 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3249 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr file. ",
3250 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. Example:",
3251 "If you have three tpr files, each containing 4 pull coordinates, but only pull coordinates 1 and 2 should be ",
3252 "used, coordsel.dat looks like this::",
3254 " 1 1 0 0",
3255 " 1 1 0 0",
3256 " 1 1 0 0",
3258 "By default, the output files are::",
3260 " [TT]-o[tt] PMF output file",
3261 " [TT]-hist[tt] Histograms output file",
3263 "Always check whether the histograms sufficiently overlap.[PAR]",
3264 "The umbrella potential is assumed to be harmonic and the force constants are ",
3265 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3266 "a tabulated potential can be provided with [TT]-tab[tt].",
3268 "WHAM options",
3269 "^^^^^^^^^^^^",
3271 "* [TT]-bins[tt] Number of bins used in analysis",
3272 "* [TT]-temp[tt] Temperature in the simulations",
3273 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3274 "* [TT]-auto[tt] Automatic determination of boundaries",
3275 "* [TT]-min,-max[tt] Boundaries of the profile",
3277 "The data points that are used to compute the profile",
3278 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3279 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3280 "umbrella window.[PAR]",
3281 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3282 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3283 "With energy output, the energy in the first bin is defined to be zero. ",
3284 "If you want the free energy at a different ",
3285 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3286 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3287 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3288 "[THISMODULE] will make use of the",
3289 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3290 "reaction coordinate will assumed be be neighbors.[PAR]",
3291 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3292 "which may be useful for, e.g. membranes.",
3294 "Parallelization",
3295 "^^^^^^^^^^^^^^^",
3297 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3298 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3300 "Autocorrelations",
3301 "^^^^^^^^^^^^^^^^",
3303 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3304 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3305 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3306 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3307 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3308 "Because the IACTs can be severely underestimated in case of limited ",
3309 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3310 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3311 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3312 "integration of the ACFs while the ACFs are larger 0.05.",
3313 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3314 "less robust) method such as fitting to a double exponential, you can ",
3315 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3316 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3317 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the respective file.",
3319 "Error analysis",
3320 "^^^^^^^^^^^^^^",
3322 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3323 "otherwise the statistical error may be substantially underestimated. ",
3324 "More background and examples for the bootstrap technique can be found in ",
3325 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3326 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3327 "Four bootstrapping methods are supported and ",
3328 "selected with [TT]-bs-method[tt].",
3330 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3331 " data points, and the bootstrap is carried out by assigning random weights to the ",
3332 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3333 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3334 " statistical error is underestimated.",
3335 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3336 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3337 " (allowing duplication, i.e. sampling with replacement).",
3338 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3339 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3340 " divided into blocks and only histograms within each block are mixed. Note that ",
3341 " the histograms within each block must be representative for all possible histograms, ",
3342 " otherwise the statistical error is underestimated.",
3343 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3344 " such that the generated data points are distributed according the given histograms ",
3345 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3346 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3347 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3348 " Note that this method may severely underestimate the error in case of limited sampling, ",
3349 " that is if individual histograms do not represent the complete phase space at ",
3350 " the respective positions.",
3351 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3352 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3353 " and width of the umbrella histograms. That method yields similar error estimates ",
3354 " like method [TT]traj[tt].",
3356 "Bootstrapping output:",
3358 "* [TT]-bsres[tt] Average profile and standard deviations",
3359 "* [TT]-bsprof[tt] All bootstrapping profiles",
3361 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3362 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3363 "the histograms."
3366 const char *en_unit[] = {nullptr, "kJ", "kCal", "kT", nullptr};
3367 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr};
3368 const char *en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3369 static t_UmbrellaOptions opt;
3371 t_pargs pa[] = {
3372 { "-min", FALSE, etREAL, {&opt.min},
3373 "Minimum coordinate in profile"},
3374 { "-max", FALSE, etREAL, {&opt.max},
3375 "Maximum coordinate in profile"},
3376 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3377 "Determine min and max automatically"},
3378 { "-bins", FALSE, etINT, {&opt.bins},
3379 "Number of bins in profile"},
3380 { "-temp", FALSE, etREAL, {&opt.Temperature},
3381 "Temperature"},
3382 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3383 "Tolerance"},
3384 { "-v", FALSE, etBOOL, {&opt.verbose},
3385 "Verbose mode"},
3386 { "-b", FALSE, etREAL, {&opt.tmin},
3387 "First time to analyse (ps)"},
3388 { "-e", FALSE, etREAL, {&opt.tmax},
3389 "Last time to analyse (ps)"},
3390 { "-dt", FALSE, etREAL, {&opt.dt},
3391 "Analyse only every dt ps"},
3392 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3393 "Write histograms and exit"},
3394 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3395 "Determine min and max and exit (with [TT]-auto[tt])"},
3396 { "-log", FALSE, etBOOL, {&opt.bLog},
3397 "Calculate the log of the profile before printing"},
3398 { "-unit", FALSE, etENUM, {en_unit},
3399 "Energy unit in case of log output" },
3400 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3401 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3402 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3403 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3404 { "-sym", FALSE, etBOOL, {&opt.bSym},
3405 "Symmetrize profile around z=0"},
3406 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3407 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3408 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3409 "Calculate integrated autocorrelation times and use in wham"},
3410 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3411 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3412 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3413 "When computing autocorrelation functions, restart computing every .. (ps)"},
3414 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3415 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3416 "during smoothing"},
3417 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3418 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3419 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3420 "Bootstrap method" },
3421 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3422 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3423 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3424 "Seed for bootstrapping. (-1 = use time)"},
3425 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3426 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3427 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3428 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3429 { "-stepout", FALSE, etINT, {&opt.stepchange},
3430 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3431 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3432 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3435 t_filenm fnm[] = {
3436 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3437 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3438 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3439 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3440 { efDAT, "-is", "coordsel", ffOPTRD}, /* input: select pull coords to use */
3441 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3442 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3443 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3444 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3445 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3446 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3447 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3450 int i, j, l, nfiles, nwins, nfiles2;
3451 t_UmbrellaHeader header;
3452 t_UmbrellaWindow * window = nullptr;
3453 double *profile, maxchange = 1e20;
3454 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3455 char **fninTpr, **fninPull, **fninPdo;
3456 const char *fnPull;
3457 FILE *histout, *profout;
3458 char xlabel[STRLEN], ylabel[256], title[256];
3460 opt.bins = 200;
3461 opt.verbose = FALSE;
3462 opt.bHistOnly = FALSE;
3463 opt.bCycl = FALSE;
3464 opt.tmin = 50;
3465 opt.tmax = 1e20;
3466 opt.dt = 0.0;
3467 opt.min = 0;
3468 opt.max = 0;
3469 opt.bAuto = TRUE;
3470 opt.nCoordsel = 0;
3471 opt.coordsel = nullptr;
3473 /* bootstrapping stuff */
3474 opt.nBootStrap = 0;
3475 opt.bsMethod = bsMethod_hist;
3476 opt.tauBootStrap = 0.0;
3477 opt.bsSeed = -1;
3478 opt.histBootStrapBlockLength = 8;
3479 opt.bs_verbose = FALSE;
3481 opt.bLog = TRUE;
3482 opt.unit = en_kJ;
3483 opt.zProf0 = 0.;
3484 opt.Temperature = 298;
3485 opt.Tolerance = 1e-6;
3486 opt.bBoundsOnly = FALSE;
3487 opt.bSym = FALSE;
3488 opt.bCalcTauInt = FALSE;
3489 opt.sigSmoothIact = 0.0;
3490 opt.bAllowReduceIact = TRUE;
3491 opt.bInitPotByIntegration = TRUE;
3492 opt.acTrestart = 1.0;
3493 opt.stepchange = 100;
3494 opt.stepUpdateContrib = 100;
3496 if (!parse_common_args(&argc, argv, 0,
3497 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &opt.oenv))
3499 return 0;
3502 opt.unit = nenum(en_unit);
3503 opt.bsMethod = nenum(en_bsMethod);
3505 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3507 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3508 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3509 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3510 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3511 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3512 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3513 if (opt.bTab && opt.bPullf)
3515 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3516 "Provide pullx.xvg or pdo files!");
3519 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3521 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3523 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3525 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3526 "\n\n Check gmx wham -h !");
3529 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3530 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3531 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3532 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3533 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3535 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3536 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3537 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3538 if ( (bMinSet || bMaxSet) && bAutoSet)
3540 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3543 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3545 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3548 if (bMinSet && opt.bAuto)
3550 printf("Note: min and max given, switching off -auto.\n");
3551 opt.bAuto = FALSE;
3554 if (opt.bTauIntGiven && opt.bCalcTauInt)
3556 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3557 "the autocorrelation times. Not both.");
3560 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3562 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3563 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3565 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3567 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3568 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3571 /* Reading gmx4/gmx5 pull output and tpr files */
3572 if (opt.bTpr || opt.bPullf || opt.bPullx)
3574 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3576 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3577 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3578 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3579 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3580 if (nfiles != nfiles2)
3582 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3583 opt.fnTpr, nfiles2, fnPull);
3586 /* Read file that selects the pull group to be used */
3587 if (opt.fnCoordSel != nullptr)
3589 readPullCoordSelection(&opt, fninTpr, nfiles);
3592 window = initUmbrellaWindows(nfiles);
3593 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3595 else
3596 { /* reading pdo files */
3597 if (opt.fnCoordSel != nullptr)
3599 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3600 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3602 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3603 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3604 window = initUmbrellaWindows(nfiles);
3605 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3608 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3609 We can therefore get the units for the xlabel from the first coordinate. */
3610 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3612 nwins = nfiles;
3614 /* enforce equal weight for all histograms? */
3615 if (opt.bHistEq)
3617 enforceEqualWeights(window, nwins);
3620 /* write histograms */
3621 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3622 xlabel, "count", opt.oenv);
3623 for (l = 0; l < opt.bins; ++l)
3625 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3626 for (i = 0; i < nwins; ++i)
3628 for (j = 0; j < window[i].nPull; ++j)
3630 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3633 fprintf(histout, "\n");
3635 xvgrclose(histout);
3636 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3637 if (opt.bHistOnly)
3639 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3640 return 0;
3643 /* Using tabulated umbrella potential */
3644 if (opt.bTab)
3646 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3649 /* Integrated autocorrelation times provided ? */
3650 if (opt.bTauIntGiven)
3652 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3655 /* Compute integrated autocorrelation times */
3656 if (opt.bCalcTauInt)
3658 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3661 /* calc average and sigma for each histogram
3662 (maybe required for bootstrapping. If not, this is fast anyhow) */
3663 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3665 averageSigma(window, nwins);
3668 /* Get initial potential by simple integration */
3669 if (opt.bInitPotByIntegration)
3671 guessPotByIntegration(window, nwins, &opt, xlabel);
3674 /* Check if complete reaction coordinate is covered */
3675 checkReactionCoordinateCovered(window, nwins, &opt);
3677 /* Calculate profile */
3678 snew(profile, opt.bins);
3679 if (opt.verbose)
3681 opt.stepchange = 1;
3683 i = 0;
3686 if ( (i%opt.stepUpdateContrib) == 0)
3688 setup_acc_wham(profile, window, nwins, &opt);
3690 if (maxchange < opt.Tolerance)
3692 bExact = TRUE;
3693 /* if (opt.verbose) */
3694 printf("Switched to exact iteration in iteration %d\n", i);
3696 calc_profile(profile, window, nwins, &opt, bExact);
3697 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3699 printf("\t%4d) Maximum change %e\n", i, maxchange);
3701 i++;
3703 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3704 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3706 /* calc error from Kumar's formula */
3707 /* Unclear how the error propagates along reaction coordinate, therefore
3708 commented out */
3709 /* calc_error_kumar(profile,window, nwins,&opt); */
3711 /* Write profile in energy units? */
3712 if (opt.bLog)
3714 prof_normalization_and_unit(profile, &opt);
3715 std::strcpy(ylabel, en_unit_label[opt.unit]);
3716 std::strcpy(title, "Umbrella potential");
3718 else
3720 std::strcpy(ylabel, "Density of states");
3721 std::strcpy(title, "Density of states");
3724 /* symmetrize profile around z=0? */
3725 if (opt.bSym)
3727 symmetrizeProfile(profile, &opt);
3730 /* write profile or density of states */
3731 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3732 for (i = 0; i < opt.bins; ++i)
3734 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3736 xvgrclose(profout);
3737 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3739 /* Bootstrap Method */
3740 if (opt.nBootStrap)
3742 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3743 opt2fn("-hist", NFILE, fnm),
3744 xlabel, ylabel, profile, window, nwins, &opt);
3747 sfree(profile);
3748 freeUmbrellaWindows(window, nfiles);
3750 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3751 please_cite(stdout, "Hub2010");
3753 return 0;