Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / gmxana / gmx_wham.cpp
blobc8175ac1b880f57dd79b21d1d33b2ecd54c7d87b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /*! \internal \file
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
43 #include "gmxpre.h"
45 #include "config.h"
47 #include <cassert>
48 #include <cctype>
49 #include <cmath>
50 #include <cstdio>
51 #include <cstdlib>
52 #include <cstring>
54 #include <algorithm>
55 #include <sstream>
57 #include "gromacs/commandline/pargs.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/functions.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/pull_params.h"
67 #include "gromacs/mdtypes/state.h"
68 #include "gromacs/pulling/pull.h"
69 #include "gromacs/random/tabulatednormaldistribution.h"
70 #include "gromacs/random/threefry.h"
71 #include "gromacs/random/uniformintdistribution.h"
72 #include "gromacs/random/uniformrealdistribution.h"
73 #include "gromacs/utility/arraysize.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/futil.h"
78 #include "gromacs/utility/gmxomp.h"
79 #include "gromacs/utility/path.h"
80 #include "gromacs/utility/pleasecite.h"
81 #include "gromacs/utility/smalloc.h"
83 //! longest file names allowed in input files
84 #define WHAM_MAXFILELEN 2048
86 /*! \brief
87 * enum for energy units
89 enum {
90 enSel, en_kJ, en_kCal, en_kT, enNr
92 /*! \brief
93 * enum for type of input files (pdos, tpr, or pullf)
95 enum {
96 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
99 /*! \brief enum for bootstrapping method
101 * These bootstrap methods are supported:
102 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
103 * (bsMethod_BayesianHist)
104 * - bootstrap complete histograms (bsMethod_hist)
105 * - bootstrap trajectories from given umbrella histograms. This generates new
106 * "synthetic" histograms (bsMethod_traj)
107 * - bootstrap trajectories from Gaussian with mu/sigma computed from
108 * the respective histogram (bsMethod_trajGauss). This gives very similar
109 * results compared to bsMethod_traj.
111 * ********************************************************************
112 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
113 * JS Hub, BL de Groot, D van der Spoel
114 * g_wham - A free weighted histogram analysis implementation including
115 * robust error and autocorrelation estimates,
116 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
117 * ********************************************************************
119 enum {
120 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
121 bsMethod_traj, bsMethod_trajGauss
124 //! Parameters of one pull coodinate
125 typedef struct
127 int pull_type; //!< such as constraint, umbrella, ...
128 int geometry; //!< such as distance, direction, cylinder
129 int ngroup; //!< the number of pull groups involved
130 ivec dim; //!< pull dimension with geometry distance
131 int ndim; //!< nr of pull_dim != 0
132 real k; //!< force constants in tpr file
133 real init_dist; //!< reference displacement
134 char coord_unit[256]; //!< unit of the displacement
135 } t_pullcoord;
137 //! Parameters of the umbrella potentials
138 typedef struct
141 * \name Using umbrella pull code since gromacs 4.x
143 /*!\{*/
144 int npullcrds; //!< nr of umbrella pull coordinates for reading
145 t_pullcoord *pcrd; //!< the pull coordinates
146 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
147 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
148 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
150 /*!\}*/
152 * \name Using PDO files common until gromacs 3.x
154 /*!\{*/
155 int nSkip;
156 char Reference[256];
157 int nPull;
158 int nDim;
159 ivec Dims;
160 char PullName[4][256];
161 double UmbPos[4][3];
162 double UmbCons[4][3];
163 /*!\}*/
164 } t_UmbrellaHeader;
166 //! Data in the umbrella histograms
167 typedef struct
169 int nPull; //!< nr of pull groups in this pdo or pullf/x file
170 double **Histo; //!< nPull histograms
171 double **cum; //!< nPull cumulative distribution functions
172 int nBin; //!< nr of bins. identical to opt->bins
173 double *k; //!< force constants for the nPull coords
174 double *pos; //!< umbrella positions for the nPull coords
175 double *z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
176 int *N; //!< nr of data points in nPull histograms.
177 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
179 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
181 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
182 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
184 double *g;
185 double *tau; //!< intetrated autocorrelation time (IACT)
186 double *tausmooth; //!< smoothed IACT
188 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
190 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
191 gmx_bool **bContrib;
192 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
194 /*! \brief average force estimated from average displacement, fAv=dzAv*k
196 * Used for integration to guess the potential.
198 real *forceAv;
199 real *aver; //!< average of histograms
200 real *sigma; //!< stddev of histograms
201 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
202 } t_UmbrellaWindow;
204 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
205 typedef struct
207 int n; //!< total nr of pull coords in this tpr file
208 int nUse; //!< nr of pull coords used
209 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
210 } t_coordselection;
212 //! Parameters of WHAM
213 typedef struct // NOLINT(clang-analyzer-optin.performance.Padding)
216 * \name Input stuff
218 /*!\{*/
219 const char *fnTpr, *fnPullf, *fnCoordSel;
220 const char *fnPdo, *fnPullx; //!< file names of input
221 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
222 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
224 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
225 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
226 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
227 t_coordselection *coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
228 /*!\}*/
230 * \name Basic WHAM options
232 /*!\{*/
233 int bins; //!< nr of bins, min, max, and dz of profile
234 real min, max, dz;
235 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
236 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
237 /*!\}*/
239 * \name Output control
241 /*!\{*/
242 gmx_bool bLog; //!< energy output (instead of probability) for profile
243 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
244 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
245 /*! \brief after wham, set prof to zero at this z-position.
246 * When bootstrapping, set zProf0 to a "stable" reference position.
248 real zProf0;
249 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
251 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
252 gmx_bool bAuto; //!< determine min and max automatically but do not exit
254 gmx_bool verbose; //!< more noisy wham mode
255 int stepchange; //!< print maximum change in prof after how many interations
256 gmx_output_env_t *oenv; //!< xvgr options
257 /*!\}*/
259 * \name Autocorrelation stuff
261 /*!\{*/
262 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
263 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
264 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
265 real acTrestart; //!< when computing ACT, time between restarting points
267 /* \brief Enforce the same weight for each umbella window, that is
268 * calculate with the same number of data points for
269 * each window. That can be reasonable, if the histograms
270 * have different length, but due to autocorrelation,
271 * a longer simulation should not have larger weightin wham.
273 gmx_bool bHistEq;
274 /*!\}*/
277 * \name Bootstrapping stuff
279 /*!\{*/
280 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
282 /* \brief bootstrap method
284 * if == bsMethod_hist, consider complete histograms as independent
285 * data points and, hence, only mix complete histograms.
286 * if == bsMethod_BayesianHist, consider complete histograms
287 * as independent data points, but assign random weights
288 * to the histograms during the bootstrapping ("Bayesian bootstrap")
289 * In case of long correlations (e.g., inside a channel), these
290 * will yield a more realistic error.
291 * if == bsMethod_traj(Gauss), generate synthetic histograms
292 * for each given
293 * histogram by generating an autocorrelated random sequence
294 * that is distributed according to the respective given
295 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
296 * (instead of from the umbrella histogram) to generate a new
297 * histogram.
299 int bsMethod;
301 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
302 real tauBootStrap;
304 /* \brief when mixing histograms, mix only histograms withing blocks
305 long the reaction coordinate xi. Avoids gaps along xi. */
306 int histBootStrapBlockLength;
308 int bsSeed; //!< random seed for bootstrapping
310 /* \brief Write cumulative distribution functions (CDFs) of histograms
311 and write the generated histograms for each bootstrap */
312 gmx_bool bs_verbose;
313 /*!\}*/
315 * \name tabulated umbrella potential stuff
317 /*!\{*/
318 gmx_bool bTab;
319 double *tabX, *tabY, tabMin, tabMax, tabDz;
320 int tabNbins;
321 /*!\}*/
322 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
323 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
324 } t_UmbrellaOptions;
326 //! Make an umbrella window (may contain several histograms)
327 static t_UmbrellaWindow * initUmbrellaWindows(int nwin)
329 t_UmbrellaWindow *win;
330 int i;
331 snew(win, nwin);
332 for (i = 0; i < nwin; i++)
334 win[i].Histo = win[i].cum = nullptr;
335 win[i].k = win[i].pos = win[i].z = nullptr;
336 win[i].N = win[i].Ntot = nullptr;
337 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
338 win[i].bContrib = nullptr;
339 win[i].ztime = nullptr;
340 win[i].forceAv = nullptr;
341 win[i].aver = win[i].sigma = nullptr;
342 win[i].bsWeight = nullptr;
344 return win;
347 //! Delete an umbrella window (may contain several histograms)
348 static void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
350 int i, j;
351 for (i = 0; i < nwin; i++)
353 if (win[i].Histo)
355 for (j = 0; j < win[i].nPull; j++)
357 sfree(win[i].Histo[j]);
360 if (win[i].cum)
362 for (j = 0; j < win[i].nPull; j++)
364 sfree(win[i].cum[j]);
367 if (win[i].bContrib)
369 for (j = 0; j < win[i].nPull; j++)
371 sfree(win[i].bContrib[j]);
374 sfree(win[i].Histo);
375 sfree(win[i].cum);
376 sfree(win[i].k);
377 sfree(win[i].pos);
378 sfree(win[i].z);
379 sfree(win[i].N);
380 sfree(win[i].Ntot);
381 sfree(win[i].g);
382 sfree(win[i].tau);
383 sfree(win[i].tausmooth);
384 sfree(win[i].bContrib);
385 sfree(win[i].ztime);
386 sfree(win[i].forceAv);
387 sfree(win[i].aver);
388 sfree(win[i].sigma);
389 sfree(win[i].bsWeight);
391 sfree(win);
394 /*! \brief
395 * Read and setup tabulated umbrella potential
397 static void setup_tab(const char *fn, t_UmbrellaOptions *opt)
399 int i, ny, nl;
400 double **y;
402 printf("Setting up tabulated potential from file %s\n", fn);
403 nl = read_xvg(fn, &y, &ny);
404 opt->tabNbins = nl;
405 if (ny != 2)
407 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
409 opt->tabMin = y[0][0];
410 opt->tabMax = y[0][nl-1];
411 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
412 if (opt->tabDz <= 0)
414 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
415 "ascending z-direction", fn);
417 for (i = 0; i < nl-1; i++)
419 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
421 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
424 snew(opt->tabY, nl);
425 snew(opt->tabX, nl);
426 for (i = 0; i < nl; i++)
428 opt->tabX[i] = y[0][i];
429 opt->tabY[i] = y[1][i];
431 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
432 opt->tabMin, opt->tabMax, opt->tabDz);
435 //! Read the header of an PDO file (position, force const, nr of groups)
436 static void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
438 char line[2048];
439 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
440 int i;
441 std::istringstream ist;
443 /* line 1 */
444 if (fgets(line, 2048, file) == nullptr)
446 gmx_fatal(FARGS, "Error reading header from pdo file\n");
448 ist.str(line);
449 ist >> Buffer0 >> Buffer1 >> Buffer2;
450 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
452 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
453 "(Found in first line: `%s')\n",
454 Buffer1, "UMBRELLA", line);
456 if (std::strcmp(Buffer2, "3.0") != 0)
458 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
461 /* line 2 */
462 if (fgets(line, 2048, file) == nullptr)
464 gmx_fatal(FARGS, "Error reading header from pdo file\n");
466 ist.str(line);
467 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
468 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
470 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
471 if (header->nDim != 1)
473 gmx_fatal(FARGS, "Currently only supports one dimension");
476 /* line3 */
477 if (fgets(line, 2048, file) == nullptr)
479 gmx_fatal(FARGS, "Error reading header from pdo file\n");
481 ist.str(line);
482 ist >> Buffer0 >> Buffer1 >> header->nSkip;
484 /* line 4 */
485 if (fgets(line, 2048, file) == nullptr)
487 gmx_fatal(FARGS, "Error reading header from pdo file\n");
489 ist.str(line);
490 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
492 /* line 5 */
493 if (fgets(line, 2048, file) == nullptr)
495 gmx_fatal(FARGS, "Error reading header from pdo file\n");
497 ist.str(line);
498 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
500 if (opt->verbose)
502 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
503 header->Reference);
506 for (i = 0; i < header->nPull; ++i)
508 if (fgets(line, 2048, file) == nullptr)
510 gmx_fatal(FARGS, "Error reading header from pdo file\n");
512 ist.str(line);
513 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
514 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
515 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
517 if (opt->verbose)
519 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
520 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
524 if (fgets(line, 2048, file) == nullptr)
526 gmx_fatal(FARGS, "Cannot read from file\n");
528 ist.str(line);
529 ist >> Buffer3;
530 if (std::strcmp(Buffer3, "#####") != 0)
532 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
536 //! smarter fgets
537 static char *fgets3(FILE *fp, char ptr[], int *len)
539 char *p;
540 int slen;
542 if (fgets(ptr, *len-1, fp) == nullptr)
544 return nullptr;
546 p = ptr;
547 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
549 /* This line is longer than len characters, let's increase len! */
550 *len += STRLEN;
551 p += STRLEN;
552 srenew(ptr, *len);
553 if (fgets(p-1, STRLEN, fp) == nullptr)
555 break;
558 slen = std::strlen(ptr);
559 if (ptr[slen-1] == '\n')
561 ptr[slen-1] = '\0';
564 return ptr;
567 /*! \brief Read the data columns of and PDO file.
569 * TO DO: Get rid of the scanf function to avoid the clang warning.
570 * At the moment, this warning is avoided by hiding the format string
571 * the variable fmtlf.
573 static void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
574 int fileno, t_UmbrellaWindow * win,
575 t_UmbrellaOptions *opt,
576 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
578 int i, inttemp, bins, count, ntot;
579 real minval, maxval, minfound = 1e20, maxfound = -1e20;
580 double temp, time, time0 = 0, dt;
581 char *ptr = nullptr;
582 t_UmbrellaWindow * window = nullptr;
583 gmx_bool timeok, dt_ok = true;
584 char *tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
585 int len = STRLEN, dstep = 1;
586 const int blocklen = 4096;
587 int *lennow = nullptr;
589 if (!bGetMinMax)
591 bins = opt->bins;
592 minval = opt->min;
593 maxval = opt->max;
595 window = win+fileno;
596 /* Need to alocate memory and set up structure */
597 window->nPull = header->nPull;
598 window->nBin = bins;
600 snew(window->Histo, window->nPull);
601 snew(window->z, window->nPull);
602 snew(window->k, window->nPull);
603 snew(window->pos, window->nPull);
604 snew(window->N, window->nPull);
605 snew(window->Ntot, window->nPull);
606 snew(window->g, window->nPull);
607 snew(window->bsWeight, window->nPull);
609 window->bContrib = nullptr;
611 if (opt->bCalcTauInt)
613 snew(window->ztime, window->nPull);
615 else
617 window->ztime = nullptr;
619 snew(lennow, window->nPull);
621 for (i = 0; i < window->nPull; ++i)
623 window->z[i] = 1;
624 window->bsWeight[i] = 1.;
625 snew(window->Histo[i], bins);
626 window->k[i] = header->UmbCons[i][0];
627 window->pos[i] = header->UmbPos[i][0];
628 window->N[i] = 0;
629 window->Ntot[i] = 0;
630 window->g[i] = 1.;
631 if (opt->bCalcTauInt)
633 window->ztime[i] = nullptr;
637 /* Done with setup */
639 else
641 minfound = 1e20;
642 maxfound = -1e20;
643 minval = maxval = bins = 0; /* Get rid of warnings */
646 count = 0;
647 snew(tmpbuf, len);
648 while ( (ptr = fgets3(file, tmpbuf, &len)) != nullptr)
650 trim(ptr);
652 if (ptr[0] == '#' || std::strlen(ptr) < 2)
654 continue;
657 /* Initiate format string */
658 fmtign[0] = '\0';
659 std::strcat(fmtign, "%*s");
661 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
662 /* Round time to fs */
663 time = 1.0/1000*( gmx::roundToInt64(time*1000) );
665 /* get time step of pdo file */
666 if (count == 0)
668 time0 = time;
670 else if (count == 1)
672 dt = time-time0;
673 if (opt->dt > 0.0)
675 dstep = gmx::roundToInt(opt->dt/dt);
676 if (dstep == 0)
678 dstep = 1;
681 if (!bGetMinMax)
683 window->dt = dt*dstep;
686 count++;
688 dt_ok = ((count-1)%dstep == 0);
689 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
690 /* if (opt->verbose)
691 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
692 time,opt->tmin, opt->tmax, dt_ok,timeok); */
694 if (timeok)
696 for (i = 0; i < header->nPull; ++i)
698 std::strcpy(fmt, fmtign);
699 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
700 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
701 if (sscanf(ptr, fmt, &temp))
703 temp += header->UmbPos[i][0];
704 if (bGetMinMax)
706 if (temp < minfound)
708 minfound = temp;
710 if (temp > maxfound)
712 maxfound = temp;
715 else
717 if (opt->bCalcTauInt)
719 /* save time series for autocorrelation analysis */
720 ntot = window->Ntot[i];
721 if (ntot >= lennow[i])
723 lennow[i] += blocklen;
724 srenew(window->ztime[i], lennow[i]);
726 window->ztime[i][ntot] = temp;
729 temp -= minval;
730 temp /= (maxval-minval);
731 temp *= bins;
732 temp = std::floor(temp);
734 inttemp = static_cast<int> (temp);
735 if (opt->bCycl)
737 if (inttemp < 0)
739 inttemp += bins;
741 else if (inttemp >= bins)
743 inttemp -= bins;
747 if (inttemp >= 0 && inttemp < bins)
749 window->Histo[i][inttemp] += 1.;
750 window->N[i]++;
752 window->Ntot[i]++;
757 if (time > opt->tmax)
759 if (opt->verbose)
761 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
763 break;
767 if (bGetMinMax)
769 *mintmp = minfound;
770 *maxtmp = maxfound;
773 sfree(lennow);
774 sfree(tmpbuf);
777 /*! \brief Set identical weights for all histograms
779 * Normally, the weight is given by the number data points in each
780 * histogram, together with the autocorrelation time. This can be overwritten
781 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
782 * an appropriate autocorrelation times instead of using this function.
784 static void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
786 int i, k, j, NEnforced;
787 double ratio;
789 NEnforced = window[0].Ntot[0];
790 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
791 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
792 /* enforce all histograms to have the same weight as the very first histogram */
794 for (j = 0; j < nWindows; ++j)
796 for (k = 0; k < window[j].nPull; ++k)
798 ratio = 1.0*NEnforced/window[j].Ntot[k];
799 for (i = 0; i < window[0].nBin; ++i)
801 window[j].Histo[k][i] *= ratio;
803 window[j].N[k] = gmx::roundToInt(ratio*window[j].N[k]);
808 /*! \brief Simple linear interpolation between two given tabulated points
810 static double tabulated_pot(double dist, t_UmbrellaOptions *opt)
812 int jl, ju;
813 double pl, pu, dz, dp;
815 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
816 ju = jl+1;
817 if (jl < 0 || ju >= opt->tabNbins)
819 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
820 "Provide an extended table.", dist, jl, ju);
822 pl = opt->tabY[jl];
823 pu = opt->tabY[ju];
824 dz = dist-opt->tabX[jl];
825 dp = (pu-pl)*dz/opt->tabDz;
826 return pl+dp;
830 /*! \brief
831 * Check which bins substiantially contribute (accelerates WHAM)
833 * Don't worry, that routine does not mean we compute the PMF in limited precision.
834 * After rapid convergence (using only substiantal contributions), we always switch to
835 * full precision.
837 static void setup_acc_wham(const double *profile, t_UmbrellaWindow * window, int nWindows,
838 t_UmbrellaOptions *opt)
840 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
841 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
842 gmx_bool bAnyContrib;
843 static int bFirst = 1;
844 static double wham_contrib_lim;
846 if (bFirst)
848 for (i = 0; i < nWindows; ++i)
850 nGrptot += window[i].nPull;
852 wham_contrib_lim = opt->Tolerance/nGrptot;
855 ztot = opt->max-opt->min;
856 ztot_half = ztot/2;
858 for (i = 0; i < nWindows; ++i)
860 if (!window[i].bContrib)
862 snew(window[i].bContrib, window[i].nPull);
864 for (j = 0; j < window[i].nPull; ++j)
866 if (!window[i].bContrib[j])
868 snew(window[i].bContrib[j], opt->bins);
870 bAnyContrib = FALSE;
871 for (k = 0; k < opt->bins; ++k)
873 temp = (1.0*k+0.5)*dz+min;
874 distance = temp - window[i].pos[j]; /* distance to umbrella center */
875 if (opt->bCycl)
876 { /* in cyclic wham: */
877 if (distance > ztot_half) /* |distance| < ztot_half */
879 distance -= ztot;
881 else if (distance < -ztot_half)
883 distance += ztot;
886 /* Note: there are two contributions to bin k in the wham equations:
887 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
888 ii) exp(- U/(BOLTZ*opt->Temperature))
889 where U is the umbrella potential
890 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
893 if (!opt->bTab)
895 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
897 else
899 U = tabulated_pot(distance, opt); /* Use tabulated potential */
901 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
902 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
903 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
904 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
905 if (window[i].bContrib[j][k])
907 nContrib++;
909 nTot++;
911 /* If this histo is far outside min and max all bContrib may be FALSE,
912 causing a floating point exception later on. To avoid that, switch
913 them all to true.*/
914 if (!bAnyContrib)
916 for (k = 0; k < opt->bins; ++k)
918 window[i].bContrib[j][k] = TRUE;
923 if (bFirst)
925 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
926 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
929 if (opt->verbose)
931 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
932 nContrib, nTot);
934 bFirst = 0;
937 //! Compute the PMF (one of the two main WHAM routines)
938 static void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
939 t_UmbrellaOptions *opt, gmx_bool bExact)
941 double ztot_half, ztot, min = opt->min, dz = opt->dz;
943 ztot = opt->max-opt->min;
944 ztot_half = ztot/2;
946 #pragma omp parallel
950 int nthreads = gmx_omp_get_max_threads();
951 int thread_id = gmx_omp_get_thread_num();
952 int i;
953 int i0 = thread_id*opt->bins/nthreads;
954 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
956 for (i = i0; i < i1; ++i)
958 int j, k;
959 double num, denom, invg, temp = 0, distance, U = 0;
960 num = denom = 0.;
961 for (j = 0; j < nWindows; ++j)
963 for (k = 0; k < window[j].nPull; ++k)
965 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
966 temp = (1.0*i+0.5)*dz+min;
967 num += invg*window[j].Histo[k][i];
969 if (!(bExact || window[j].bContrib[k][i]))
971 continue;
973 distance = temp - window[j].pos[k]; /* distance to umbrella center */
974 if (opt->bCycl)
975 { /* in cyclic wham: */
976 if (distance > ztot_half) /* |distance| < ztot_half */
978 distance -= ztot;
980 else if (distance < -ztot_half)
982 distance += ztot;
986 if (!opt->bTab)
988 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
990 else
992 U = tabulated_pot(distance, opt); /* Use tabulated potential */
994 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
997 profile[i] = num/denom;
1000 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1004 //! Compute the free energy offsets z (one of the two main WHAM routines)
1005 static double calc_z(const double * profile, t_UmbrellaWindow * window, int nWindows,
1006 t_UmbrellaOptions *opt, gmx_bool bExact)
1008 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1009 double maxglob = -1e20;
1011 ztot = opt->max-opt->min;
1012 ztot_half = ztot/2;
1014 #pragma omp parallel
1018 int nthreads = gmx_omp_get_max_threads();
1019 int thread_id = gmx_omp_get_thread_num();
1020 int i;
1021 int i0 = thread_id*nWindows/nthreads;
1022 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1023 double maxloc = -1e20;
1025 for (i = i0; i < i1; ++i)
1027 double total = 0, temp, distance, U = 0;
1028 int j, k;
1030 for (j = 0; j < window[i].nPull; ++j)
1032 total = 0;
1033 for (k = 0; k < window[i].nBin; ++k)
1035 if (!(bExact || window[i].bContrib[j][k]))
1037 continue;
1039 temp = (1.0*k+0.5)*dz+min;
1040 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1041 if (opt->bCycl)
1042 { /* in cyclic wham: */
1043 if (distance > ztot_half) /* |distance| < ztot_half */
1045 distance -= ztot;
1047 else if (distance < -ztot_half)
1049 distance += ztot;
1053 if (!opt->bTab)
1055 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1057 else
1059 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1061 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1063 /* Avoid floating point exception if window is far outside min and max */
1064 if (total != 0.0)
1066 total = -std::log(total);
1068 else
1070 total = 1000.0;
1072 temp = std::abs(total - window[i].z[j]);
1073 if (temp > maxloc)
1075 maxloc = temp;
1077 window[i].z[j] = total;
1080 /* Now get maximum maxloc from the threads and put in maxglob */
1081 if (maxloc > maxglob)
1083 #pragma omp critical
1085 if (maxloc > maxglob)
1087 maxglob = maxloc;
1092 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1095 return maxglob;
1098 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1099 static void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1101 int i, j, bins = opt->bins;
1102 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1103 double z, z1;
1105 if (min > 0. || max < 0.)
1107 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1108 opt->min, opt->max);
1111 snew(prof2, bins);
1113 for (i = 0; i < bins; i++)
1115 z = min+(i+0.5)*dz;
1116 zsym = -z;
1117 /* bin left of zsym */
1118 j = gmx::roundToInt((zsym-min)/dz)-1;
1119 if (j >= 0 && (j+1) < bins)
1121 /* interpolate profile linearly between bins j and j+1 */
1122 z1 = min+(j+0.5)*dz;
1123 deltaz = zsym-z1;
1124 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1125 /* average between left and right */
1126 prof2[i] = 0.5*(profsym+profile[i]);
1128 else
1130 prof2[i] = profile[i];
1134 std::memcpy(profile, prof2, bins*sizeof(double));
1135 sfree(prof2);
1138 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1139 static void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1141 int i, bins, imin;
1142 double unit_factor = 1., diff;
1144 bins = opt->bins;
1146 /* Not log? Nothing to do! */
1147 if (!opt->bLog)
1149 return;
1152 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1153 if (opt->unit == en_kT)
1155 unit_factor = 1.0;
1157 else if (opt->unit == en_kJ)
1159 unit_factor = BOLTZ*opt->Temperature;
1161 else if (opt->unit == en_kCal)
1163 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1165 else
1167 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1170 for (i = 0; i < bins; i++)
1172 if (profile[i] > 0.0)
1174 profile[i] = -std::log(profile[i])*unit_factor;
1178 /* shift to zero at z=opt->zProf0 */
1179 if (!opt->bProf0Set)
1181 diff = profile[0];
1183 else
1185 /* Get bin with shortest distance to opt->zProf0
1186 (-0.5 from bin position and +0.5 from rounding cancel) */
1187 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1188 if (imin < 0)
1190 imin = 0;
1192 else if (imin >= bins)
1194 imin = bins-1;
1196 diff = profile[imin];
1199 /* Shift to zero */
1200 for (i = 0; i < bins; i++)
1202 profile[i] -= diff;
1206 //! Make an array of random integers (used for bootstrapping)
1207 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1209 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1211 int ipull, blockBase, nr, ipullRandom;
1213 if (blockLength == 0)
1215 blockLength = nPull;
1218 for (ipull = 0; ipull < nPull; ipull++)
1220 blockBase = (ipull/blockLength)*blockLength;
1222 { /* make sure nothing bad happens in the last block */
1223 nr = dist(*rng); // [0,blockLength-1]
1224 ipullRandom = blockBase + nr;
1226 while (ipullRandom >= nPull);
1227 if (ipullRandom < 0 || ipullRandom >= nPull)
1229 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1230 "blockLength = %d, blockBase = %d\n",
1231 ipullRandom, nPull, nr, blockLength, blockBase);
1233 randomArray[ipull] = ipullRandom;
1235 /*for (ipull=0; ipull<nPull; ipull++)
1236 printf("%d ",randomArray[ipull]); printf("\n"); */
1239 /*! \brief Set pull group information of a synthetic histogram
1241 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1242 * but it is not required if we bootstrap complete histograms.
1244 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1245 t_UmbrellaWindow *thisWindow, int pullid)
1247 synthWindow->N [0] = thisWindow->N [pullid];
1248 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1249 synthWindow->pos [0] = thisWindow->pos [pullid];
1250 synthWindow->z [0] = thisWindow->z [pullid];
1251 synthWindow->k [0] = thisWindow->k [pullid];
1252 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1253 synthWindow->g [0] = thisWindow->g [pullid];
1254 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1257 /*! \brief Calculate cumulative distribution function of of all histograms.
1259 * This allow to create random number sequences
1260 * which are distributed according to the histograms. Required to generate
1261 * the "synthetic" histograms for the Bootstrap method
1263 static void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1264 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1266 int i, j, k, nbin;
1267 double last;
1268 std::string fn;
1269 FILE *fp = nullptr;
1271 if (opt->bs_verbose)
1273 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
1274 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1277 nbin = opt->bins;
1278 for (i = 0; i < nWindows; i++)
1280 snew(window[i].cum, window[i].nPull);
1281 for (j = 0; j < window[i].nPull; j++)
1283 snew(window[i].cum[j], nbin+1);
1284 window[i].cum[j][0] = 0.;
1285 for (k = 1; k <= nbin; k++)
1287 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1290 /* normalize CDFs. Ensure cum[nbin]==1 */
1291 last = window[i].cum[j][nbin];
1292 for (k = 0; k <= nbin; k++)
1294 window[i].cum[j][k] /= last;
1299 printf("Cumulative distribution functions of all histograms created.\n");
1300 if (opt->bs_verbose)
1302 for (k = 0; k <= nbin; k++)
1304 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1305 for (i = 0; i < nWindows; i++)
1307 for (j = 0; j < window[i].nPull; j++)
1309 fprintf(fp, "%g\t", window[i].cum[j][k]);
1312 fprintf(fp, "\n");
1314 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1315 xvgrclose(fp);
1320 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1322 * This is used to generate a random sequence distributed according to a histogram
1324 static void searchCumulative(const double xx[], int n, double x, int *j)
1326 int ju, jm, jl;
1328 jl = -1;
1329 ju = n;
1330 while (ju-jl > 1)
1332 jm = (ju+jl) >> 1;
1333 if (x >= xx[jm])
1335 jl = jm;
1337 else
1339 ju = jm;
1342 if (x == xx[0])
1344 *j = 0;
1346 else if (x == xx[n-1])
1348 *j = n-2;
1350 else
1352 *j = jl;
1356 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1357 static void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1358 int pullid, t_UmbrellaOptions *opt)
1360 int N, i, nbins, r_index, ibin;
1361 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1362 char errstr[1024];
1364 N = thisWindow->N[pullid];
1365 dt = thisWindow->dt;
1366 nbins = thisWindow->nBin;
1368 /* tau = autocorrelation time */
1369 if (opt->tauBootStrap > 0.0)
1371 tausteps = opt->tauBootStrap/dt;
1373 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1375 /* calc tausteps from g=1+2tausteps */
1376 g = thisWindow->g[pullid];
1377 tausteps = (g-1)/2;
1379 else
1381 sprintf(errstr,
1382 "When generating hypothetical trajectories from given umbrella histograms,\n"
1383 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1384 "cannot be predicted. You have 3 options:\n"
1385 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1386 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1387 std::strcat(errstr,
1388 " with option -iiact for all umbrella windows.\n"
1389 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1390 " Use option (3) only if you are sure what you're doing, you may severely\n"
1391 " underestimate the error if a too small ACT is given.\n");
1392 gmx_fatal(FARGS, "%s", errstr);
1395 synthWindow->N [0] = N;
1396 synthWindow->pos [0] = thisWindow->pos[pullid];
1397 synthWindow->z [0] = thisWindow->z[pullid];
1398 synthWindow->k [0] = thisWindow->k[pullid];
1399 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1400 synthWindow->g [0] = thisWindow->g [pullid];
1401 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1403 for (i = 0; i < nbins; i++)
1405 synthWindow->Histo[0][i] = 0.;
1408 if (opt->bsMethod == bsMethod_trajGauss)
1410 sig = thisWindow->sigma [pullid];
1411 mu = thisWindow->aver [pullid];
1414 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1415 Use the following:
1416 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1417 then
1418 z = a*x + sqrt(1-a^2)*y
1419 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1420 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1421 function
1422 C(t) = exp(-t/tau) with tau=-1/ln(a)
1424 Then, use error function to turn the Gaussian random variable into a uniformly
1425 distributed one in [0,1]. Eventually, use cumulative distribution function of
1426 histogram to get random variables distributed according to histogram.
1427 Note: The ACT of the flat distribution and of the generated histogram is not
1428 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1430 a = std::exp(-1.0/tausteps);
1431 ap = std::sqrt(1.0-a*a);
1432 invsqrt2 = 1.0/std::sqrt(2.0);
1434 /* init random sequence */
1435 x = opt->normalDistribution(opt->rng);
1437 if (opt->bsMethod == bsMethod_traj)
1439 /* bootstrap points from the umbrella histograms */
1440 for (i = 0; i < N; i++)
1442 y = opt->normalDistribution(opt->rng);
1443 x = a*x+ap*y;
1444 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1445 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1447 r = 0.5*(1+std::erf(x*invsqrt2));
1448 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1449 synthWindow->Histo[0][r_index] += 1.;
1452 else if (opt->bsMethod == bsMethod_trajGauss)
1454 /* bootstrap points from a Gaussian with the same average and sigma
1455 as the respective umbrella histogram. The idea was, that -given
1456 limited sampling- the bootstrapped histograms are otherwise biased
1457 from the limited sampling of the US histos. However, bootstrapping from
1458 the Gaussian seems to yield a similar estimate. */
1459 i = 0;
1460 while (i < N)
1462 y = opt->normalDistribution(opt->rng);
1463 x = a*x+ap*y;
1464 z = x*sig+mu;
1465 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1466 if (opt->bCycl)
1468 if (ibin < 0)
1470 while ( (ibin += nbins) < 0)
1475 else if (ibin >= nbins)
1477 while ( (ibin -= nbins) >= nbins)
1484 if (ibin >= 0 && ibin < nbins)
1486 synthWindow->Histo[0][ibin] += 1.;
1487 i++;
1491 else
1493 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1497 /*! \brief Write all histograms to a file
1499 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1500 * sets of bootstrapped histograms.
1502 static void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1503 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1505 std::string fn, title;
1506 FILE *fp;
1507 int bins, l, i, j;
1509 if (bs_index >= 0)
1511 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1512 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1514 else
1516 fn = gmx_strdup(fnhist);
1517 title = gmx::formatString("Umbrella histograms");
1520 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1521 bins = opt->bins;
1523 /* Write histograms */
1524 for (l = 0; l < bins; ++l)
1526 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1527 for (i = 0; i < nWindows; ++i)
1529 for (j = 0; j < window[i].nPull; ++j)
1531 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1534 fprintf(fp, "\n");
1537 xvgrclose(fp);
1538 printf("Wrote %s\n", fn.c_str());
1541 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1542 static void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1544 int i;
1545 double *r;
1546 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1548 snew(r, nAllPull);
1550 /* generate ordered random numbers between 0 and nAllPull */
1551 for (i = 0; i < nAllPull-1; i++)
1553 r[i] = dist(opt->rng);
1555 std::sort(r, r+nAllPull-1);
1556 r[nAllPull-1] = 1.0*nAllPull;
1558 synthwin[0].bsWeight[0] = r[0];
1559 for (i = 1; i < nAllPull; i++)
1561 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1564 /* avoid to have zero weight by adding a tiny value */
1565 for (i = 0; i < nAllPull; i++)
1567 if (synthwin[i].bsWeight[0] < 1e-5)
1569 synthwin[i].bsWeight[0] = 1e-5;
1573 sfree(r);
1576 //! The main bootstrapping routine
1577 static void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1578 const char *xlabel, char* ylabel, double *profile,
1579 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1581 t_UmbrellaWindow * synthWindow;
1582 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1583 int i, j, *randomArray = nullptr, winid, pullid, ib;
1584 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1585 FILE *fp;
1586 gmx_bool bExact = FALSE;
1588 /* init random generator */
1589 if (opt->bsSeed == 0)
1591 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1593 opt->rng.seed(opt->bsSeed);
1595 snew(bsProfile, opt->bins);
1596 snew(bsProfiles_av, opt->bins);
1597 snew(bsProfiles_av2, opt->bins);
1599 /* Create array of all pull groups. Note that different windows
1600 may have different nr of pull groups
1601 First: Get total nr of pull groups */
1602 nAllPull = 0;
1603 for (i = 0; i < nWindows; i++)
1605 nAllPull += window[i].nPull;
1607 snew(allPull_winId, nAllPull);
1608 snew(allPull_pullId, nAllPull);
1609 iAllPull = 0;
1610 /* Setup one array of all pull groups */
1611 for (i = 0; i < nWindows; i++)
1613 for (j = 0; j < window[i].nPull; j++)
1615 allPull_winId[iAllPull] = i;
1616 allPull_pullId[iAllPull] = j;
1617 iAllPull++;
1621 /* setup stuff for synthetic windows */
1622 snew(synthWindow, nAllPull);
1623 for (i = 0; i < nAllPull; i++)
1625 synthWindow[i].nPull = 1;
1626 synthWindow[i].nBin = opt->bins;
1627 snew(synthWindow[i].Histo, 1);
1628 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1630 snew(synthWindow[i].Histo[0], opt->bins);
1632 snew(synthWindow[i].N, 1);
1633 snew(synthWindow[i].pos, 1);
1634 snew(synthWindow[i].z, 1);
1635 snew(synthWindow[i].k, 1);
1636 snew(synthWindow[i].bContrib, 1);
1637 snew(synthWindow[i].g, 1);
1638 snew(synthWindow[i].bsWeight, 1);
1641 switch (opt->bsMethod)
1643 case bsMethod_hist:
1644 snew(randomArray, nAllPull);
1645 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1646 please_cite(stdout, "Hub2006");
1647 break;
1648 case bsMethod_BayesianHist:
1649 /* just copy all histogams into synthWindow array */
1650 for (i = 0; i < nAllPull; i++)
1652 winid = allPull_winId [i];
1653 pullid = allPull_pullId[i];
1654 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1656 break;
1657 case bsMethod_traj:
1658 case bsMethod_trajGauss:
1659 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1660 break;
1661 default:
1662 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1665 /* do bootstrapping */
1666 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1667 for (ib = 0; ib < opt->nBootStrap; ib++)
1669 printf(" *******************************************\n"
1670 " ******** Start bootstrap nr %d ************\n"
1671 " *******************************************\n", ib+1);
1673 switch (opt->bsMethod)
1675 case bsMethod_hist:
1676 /* bootstrap complete histograms from given histograms */
1677 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1678 for (i = 0; i < nAllPull; i++)
1680 winid = allPull_winId [randomArray[i]];
1681 pullid = allPull_pullId[randomArray[i]];
1682 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1684 break;
1685 case bsMethod_BayesianHist:
1686 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1687 setRandomBsWeights(synthWindow, nAllPull, opt);
1688 break;
1689 case bsMethod_traj:
1690 case bsMethod_trajGauss:
1691 /* create new histos from given histos, that is generate new hypothetical
1692 trajectories */
1693 for (i = 0; i < nAllPull; i++)
1695 winid = allPull_winId[i];
1696 pullid = allPull_pullId[i];
1697 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1699 break;
1702 /* write histos in case of verbose output */
1703 if (opt->bs_verbose)
1705 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1708 /* do wham */
1709 i = 0;
1710 bExact = FALSE;
1711 maxchange = 1e20;
1712 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1715 if ( (i%opt->stepUpdateContrib) == 0)
1717 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1719 if (maxchange < opt->Tolerance)
1721 bExact = TRUE;
1723 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1725 printf("\t%4d) Maximum change %e\n", i, maxchange);
1727 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1728 i++;
1730 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1731 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1733 if (opt->bLog)
1735 prof_normalization_and_unit(bsProfile, opt);
1738 /* symmetrize profile around z=0 */
1739 if (opt->bSym)
1741 symmetrizeProfile(bsProfile, opt);
1744 /* save stuff to get average and stddev */
1745 for (i = 0; i < opt->bins; i++)
1747 tmp = bsProfile[i];
1748 bsProfiles_av[i] += tmp;
1749 bsProfiles_av2[i] += tmp*tmp;
1750 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1752 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1754 xvgrclose(fp);
1756 /* write average and stddev */
1757 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1758 if (output_env_get_print_xvgr_codes(opt->oenv))
1760 fprintf(fp, "@TYPE xydy\n");
1762 for (i = 0; i < opt->bins; i++)
1764 bsProfiles_av [i] /= opt->nBootStrap;
1765 bsProfiles_av2[i] /= opt->nBootStrap;
1766 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1767 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1768 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1770 xvgrclose(fp);
1771 printf("Wrote boot strap result to %s\n", fnres);
1774 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1775 static int whaminFileType(char *fn)
1777 int len;
1778 len = std::strlen(fn);
1779 if (std::strcmp(fn+len-3, "tpr") == 0)
1781 return whamin_tpr;
1783 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1785 return whamin_pullxf;
1787 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1789 return whamin_pdo;
1791 else
1793 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1797 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1798 static void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1799 t_UmbrellaOptions *opt)
1801 char **filename = nullptr, tmp[WHAM_MAXFILELEN+2];
1802 int nread, sizenow, i, block = 1;
1803 FILE *fp;
1805 fp = gmx_ffopen(fn, "r");
1806 nread = 0;
1807 sizenow = 0;
1808 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1810 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1812 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1814 if (nread >= sizenow)
1816 sizenow += block;
1817 srenew(filename, sizenow);
1818 for (i = sizenow-block; i < sizenow; i++)
1820 snew(filename[i], WHAM_MAXFILELEN);
1823 /* remove newline if there is one */
1824 if (tmp[std::strlen(tmp)-1] == '\n')
1826 tmp[std::strlen(tmp)-1] = '\0';
1828 std::strcpy(filename[nread], tmp);
1829 if (opt->verbose)
1831 printf("Found file %s in %s\n", filename[nread], fn);
1833 nread++;
1835 *filenamesRet = filename;
1836 *nfilesRet = nread;
1839 //! Open a file or a pipe to a gzipped file
1840 static FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1842 char Buffer[2048], gunzip[1024], *Path = nullptr;
1843 FILE *pipe = nullptr;
1844 static gmx_bool bFirst = true;
1846 /* gzipped pdo file? */
1847 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1849 /* search gunzip executable */
1850 if (!(Path = getenv("GMX_PATH_GZIP")))
1852 if (gmx_fexist("/bin/gunzip"))
1854 sprintf(gunzip, "%s", "/bin/gunzip");
1856 else if (gmx_fexist("/usr/bin/gunzip"))
1858 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1860 else
1862 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1863 "You may want to define the path to gunzip "
1864 "with the environment variable GMX_PATH_GZIP.");
1867 else
1869 sprintf(gunzip, "%s/gunzip", Path);
1870 if (!gmx_fexist(gunzip))
1872 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1873 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1876 if (bFirst)
1878 printf("Using gunzip executable %s\n", gunzip);
1879 bFirst = false;
1881 if (!gmx_fexist(fn))
1883 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1885 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1886 if (opt->verbose)
1888 printf("Executing command '%s'\n", Buffer);
1890 #if HAVE_PIPES
1891 if ((pipe = popen(Buffer, "r")) == nullptr)
1893 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1895 #else
1896 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1897 #endif
1898 *bPipeOpen = TRUE;
1900 else
1902 pipe = gmx_ffopen(fn, "r");
1903 *bPipeOpen = FALSE;
1906 return pipe;
1909 //! Close file or pipe
1910 static void pdo_close_file(FILE *fp)
1912 #if HAVE_PIPES
1913 pclose(fp);
1914 #else
1915 gmx_ffclose(fp);
1916 #endif
1919 //! Reading all pdo files
1920 static void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1921 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1923 FILE *file;
1924 real mintmp, maxtmp, done = 0.;
1925 int i;
1926 gmx_bool bPipeOpen;
1927 /* char Buffer0[1000]; */
1929 if (nfiles < 1)
1931 gmx_fatal(FARGS, "No files found. Hick.");
1934 /* if min and max are not given, get min and max from the input files */
1935 if (opt->bAuto)
1937 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1938 opt->min = 1e20;
1939 opt->max = -1e20;
1940 for (i = 0; i < nfiles; ++i)
1942 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1943 /*fgets(Buffer0,999,file);
1944 fprintf(stderr,"First line '%s'\n",Buffer0); */
1945 done = 100.0*(i+1)/nfiles;
1946 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1947 if (opt->verbose)
1949 printf("\n");
1951 read_pdo_header(file, header, opt);
1952 /* here only determine min and max of this window */
1953 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1954 if (maxtmp > opt->max)
1956 opt->max = maxtmp;
1958 if (mintmp < opt->min)
1960 opt->min = mintmp;
1962 if (bPipeOpen)
1964 pdo_close_file(file);
1966 else
1968 gmx_ffclose(file);
1971 printf("\n");
1972 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1973 if (opt->bBoundsOnly)
1975 printf("Found option -boundsonly, now exiting.\n");
1976 exit (0);
1979 /* store stepsize in profile */
1980 opt->dz = (opt->max-opt->min)/opt->bins;
1982 /* Having min and max, we read in all files */
1983 /* Loop over all files */
1984 for (i = 0; i < nfiles; ++i)
1986 done = 100.0*(i+1)/nfiles;
1987 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1988 if (opt->verbose)
1990 printf("\n");
1992 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1993 read_pdo_header(file, header, opt);
1994 /* load data into window */
1995 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
1996 if ((window+i)->Ntot[0] == 0)
1998 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2000 if (bPipeOpen)
2002 pdo_close_file(file);
2004 else
2006 gmx_ffclose(file);
2009 printf("\n");
2010 for (i = 0; i < nfiles; ++i)
2012 sfree(fn[i]);
2014 sfree(fn);
2017 //! translate 0/1 to N/Y to write pull dimensions
2018 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2020 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2021 static void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt, t_coordselection *coordsel)
2023 t_inputrec irInstance;
2024 t_inputrec *ir = &irInstance;
2025 t_state state;
2026 static int first = 1;
2028 /* printf("Reading %s \n",fn); */
2029 read_tpx_state(fn, ir, &state, nullptr);
2031 if (!ir->bPull)
2033 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2035 if (ir->pull->ncoord == 0)
2037 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2040 /* Read overall pull info */
2041 header->npullcrds = ir->pull->ncoord;
2042 header->bPrintCOM = ir->pull->bPrintCOM;
2043 header->bPrintRefValue = ir->pull->bPrintRefValue;
2044 header->bPrintComp = ir->pull->bPrintComp;
2046 /* Read pull coordinates */
2047 snew(header->pcrd, header->npullcrds);
2048 for (int i = 0; i < ir->pull->ncoord; i++)
2050 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2051 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2052 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2053 /* Angle type coordinates are handled fully in degrees in gmx wham */
2054 header->pcrd[i].k = ir->pull->coord[i].k*pull_conversion_factor_internal2userinput(&ir->pull->coord[i]);
2055 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2057 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2058 header->pcrd[i].ndim = header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2060 std::strcpy(header->pcrd[i].coord_unit,
2061 pull_coordinate_units(&ir->pull->coord[i]));
2063 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2065 gmx_fatal(FARGS, "Seems like you did free-energy perturbation, and you perturbed the force constant."
2066 " This is not supported.\n");
2068 if (coordsel && (coordsel->n != ir->pull->ncoord))
2070 gmx_fatal(FARGS, "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2071 "coordinate selection file (option -is)\n", ir->pull->ncoord, fn, coordsel->n);
2075 /* Check pull coords for consistency */
2076 int geom = -1;
2077 ivec thedim = { 0, 0, 0 };
2078 bool geometryIsSet = false;
2079 for (int i = 0; i < ir->pull->ncoord; i++)
2081 if (coordsel == nullptr || coordsel->bUse[i])
2083 if (header->pcrd[i].pull_type != epullUMBRELLA)
2085 gmx_fatal(FARGS, "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only umbrella coodinates can enter WHAM.\n"
2086 "If you have umrella and non-umbrella coordinates, you can select the umbrella coordinates with gmx wham -is\n",
2087 fn, i+1, epull_names[header->pcrd[i].pull_type]);
2089 if (!geometryIsSet)
2091 geom = header->pcrd[i].geometry;
2092 copy_ivec(header->pcrd[i].dim, thedim);
2093 geometryIsSet = true;
2095 if (geom != header->pcrd[i].geometry)
2097 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull geometry (coordinate 1: %s, coordinate %d: %s)\n"
2098 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2099 fn, epullg_names[geom], i+1, epullg_names[header->pcrd[i].geometry]);
2101 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY] || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2103 gmx_fatal(FARGS, "%s: Your pull coordinates have different pull dimensions (coordinate 1: %s %s %s, coordinate %d: %s %s %s)\n"
2104 "If you want to use only some pull coordinates in WHAM, please select them with option gmx wham -is\n",
2105 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i+1,
2106 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]));
2108 if (header->pcrd[i].geometry == epullgCYL)
2110 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2112 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2113 "However, found dimensions [%s %s %s]\n",
2114 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2115 int2YN(header->pcrd[i].dim[ZZ]));
2118 if (header->pcrd[i].k <= 0.0)
2120 gmx_fatal(FARGS, "%s: Pull coordinate %d has force constant of of %g.\n"
2121 "That doesn't seem to be an Umbrella tpr.\n",
2122 fn, i+1, header->pcrd[i].k);
2127 if (opt->verbose || first)
2129 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2130 int maxlen = 0;
2131 for (int i = 0; i < ir->pull->ncoord; i++)
2133 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2134 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2136 char fmt[STRLEN];
2137 sprintf(fmt, "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d dimensions). Used: %%s\n",
2138 maxlen+1);
2139 for (int i = 0; i < ir->pull->ncoord; i++)
2141 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2142 printf(fmt,
2143 epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2144 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]), int2YN(header->pcrd[i].dim[ZZ]),
2145 header->pcrd[i].ndim, use ? "Yes" : "No");
2146 printf("\tPull group coordinates of %d groups expected in pullx files.\n", ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2148 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2149 header->bPrintRefValue ? "" : " not");
2151 if (!opt->verbose && first)
2153 printf("\tUse option -v to see this output for all input tpr files\n\n");
2156 first = 0;
2159 //! Read pullx.xvg or pullf.xvg
2160 static void read_pull_xf(const char *fn, t_UmbrellaHeader * header,
2161 t_UmbrellaWindow * window,
2162 t_UmbrellaOptions *opt,
2163 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2164 t_coordselection *coordsel)
2166 double **y = nullptr, pos = 0., t, force, time0 = 0., dt;
2167 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2168 int nColExpect, ntot, column;
2169 real min, max, minfound = 1e20, maxfound = -1e20;
2170 gmx_bool dt_ok, timeok;
2171 const char *quantity;
2172 const int blocklen = 4096;
2173 int *lennow = nullptr;
2174 static gmx_bool bFirst = TRUE;
2177 * Data columns in pull output:
2178 * - in force output pullf.xvg:
2179 * No reference columns, one column per pull coordinate
2181 * - in position output pullx.xvg:
2182 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2183 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2184 * be written separately into pullx file, but this is not supported and throws an error below;
2185 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2188 if (header->bPrintComp && opt->bPullx)
2190 gmx_fatal(FARGS, "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2191 "(mdp option pull-print-components). Provide the pull force files instead (with option -if).\n");
2194 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2195 snew(nColThisCrd, header->npullcrds);
2196 snew(nColCOMCrd, header->npullcrds);
2197 snew(nColRefCrd, header->npullcrds);
2199 if (!opt->bPullx)
2201 /* pullf reading: simply one column per coordinate */
2202 for (g = 0; g < header->npullcrds; g++)
2204 nColThisCrd[g] = 1;
2205 nColCOMCrd[g] = 0;
2206 nColRefCrd[g] = 0;
2209 else
2211 /* pullx reading. Note explanation above. */
2212 for (g = 0; g < header->npullcrds; g++)
2214 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2215 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim*header->pcrd[g].ngroup : 0);
2216 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2220 nColExpect = 1; /* time column */
2221 for (g = 0; g < header->npullcrds; g++)
2223 nColExpect += nColThisCrd[g];
2226 /* read pullf or pullx. Could be optimized if min and max are given. */
2227 nt = read_xvg(fn, &y, &ny);
2229 /* Check consistency */
2230 quantity = opt->bPullx ? "position" : "force";
2231 if (nt < 1)
2233 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2235 if (bFirst || opt->verbose)
2237 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2238 for (i = 0; i < header->npullcrds; i++)
2240 printf("\tColumns for pull coordinate %d\n", i+1);
2241 printf("\t\treaction coordinate: %d\n"
2242 "\t\tcenter-of-mass of groups: %d\n"
2243 "\t\treference position column: %s\n",
2244 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2246 printf("\tFound %d times in %s\n", nt, fn);
2247 bFirst = FALSE;
2249 if (nColExpect != ny)
2251 gmx_fatal(FARGS, "Expected %d columns (including time column) in %s, but found %d."
2252 " Maybe you confused options -if and -ix ?", nColExpect, fn, ny);
2255 if (!bGetMinMax)
2257 assert(window);
2258 bins = opt->bins;
2259 min = opt->min;
2260 max = opt->max;
2261 if (nt > 1)
2263 window->dt = y[0][1]-y[0][0];
2265 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2267 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2270 /* Need to alocate memory and set up structure for windows */
2271 if (coordsel)
2273 /* Use only groups selected with option -is file */
2274 if (header->npullcrds != coordsel->n)
2276 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2277 header->npullcrds, coordsel->n);
2279 window->nPull = coordsel->nUse;
2281 else
2283 window->nPull = header->npullcrds;
2286 window->nBin = bins;
2287 snew(window->Histo, window->nPull);
2288 snew(window->z, window->nPull);
2289 snew(window->k, window->nPull);
2290 snew(window->pos, window->nPull);
2291 snew(window->N, window->nPull);
2292 snew(window->Ntot, window->nPull);
2293 snew(window->g, window->nPull);
2294 snew(window->bsWeight, window->nPull);
2295 window->bContrib = nullptr;
2297 if (opt->bCalcTauInt)
2299 snew(window->ztime, window->nPull);
2301 else
2303 window->ztime = nullptr;
2305 snew(lennow, window->nPull);
2307 for (g = 0; g < window->nPull; ++g)
2309 window->z [g] = 1;
2310 window->bsWeight [g] = 1.;
2311 window->N [g] = 0;
2312 window->Ntot [g] = 0;
2313 window->g [g] = 1.;
2314 snew(window->Histo[g], bins);
2316 if (opt->bCalcTauInt)
2318 window->ztime[g] = nullptr;
2322 /* Copying umbrella center and force const is more involved since not
2323 all pull groups from header (tpr file) may be used in window variable */
2324 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2326 if (coordsel && !coordsel->bUse[g])
2328 continue;
2330 window->k [gUsed] = header->pcrd[g].k;
2331 window->pos[gUsed] = header->pcrd[g].init_dist;
2332 gUsed++;
2335 else
2336 { /* only determine min and max */
2337 minfound = 1e20;
2338 maxfound = -1e20;
2339 min = max = bins = 0; /* Get rid of warnings */
2343 for (i = 0; i < nt; i++)
2345 /* Do you want that time frame? */
2346 t = 1.0/1000*( gmx::roundToInt64((y[0][i]*1000))); /* round time to fs */
2348 /* get time step of pdo file and get dstep from opt->dt */
2349 if (i == 0)
2351 time0 = t;
2353 else if (i == 1)
2355 dt = t-time0;
2356 if (opt->dt > 0.0)
2358 dstep = gmx::roundToInt(opt->dt/dt);
2359 if (dstep == 0)
2361 dstep = 1;
2364 if (!bGetMinMax)
2366 window->dt = dt*dstep;
2370 dt_ok = (i%dstep == 0);
2371 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2372 /*if (opt->verbose)
2373 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2374 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2375 if (timeok)
2377 /* Note: if coordsel == NULL:
2378 * all groups in pullf/x file are stored in this window, and gUsed == g
2379 * if coordsel != NULL:
2380 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2382 gUsed = -1;
2383 for (g = 0; g < header->npullcrds; ++g)
2385 /* was this group selected for application in WHAM? */
2386 if (coordsel && !coordsel->bUse[g])
2388 continue;
2390 gUsed++;
2392 if (opt->bPullf)
2394 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2395 force = y[g+1][i];
2396 pos = -force/header->pcrd[g].k + header->pcrd[g].init_dist;
2398 else
2400 /* Pick the correct column index.
2401 Note that there is always exactly one displacement column.
2403 column = 1;
2404 for (int j = 0; j < g; j++)
2406 column += nColThisCrd[j];
2408 pos = y[column][i];
2411 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2412 if (bGetMinMax)
2414 if (pos < minfound)
2416 minfound = pos;
2418 if (pos > maxfound)
2420 maxfound = pos;
2423 else
2425 if (gUsed >= window->nPull)
2427 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2428 gUsed, window->nPull);
2431 if (opt->bCalcTauInt && !bGetMinMax)
2433 /* save time series for autocorrelation analysis */
2434 ntot = window->Ntot[gUsed];
2435 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2436 if (ntot >= lennow[gUsed])
2438 lennow[gUsed] += blocklen;
2439 srenew(window->ztime[gUsed], lennow[gUsed]);
2441 window->ztime[gUsed][ntot] = pos;
2444 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2445 if (opt->bCycl)
2447 if (ibin < 0)
2449 while ( (ibin += bins) < 0)
2454 else if (ibin >= bins)
2456 while ( (ibin -= bins) >= bins)
2462 if (ibin >= 0 && ibin < bins)
2464 window->Histo[gUsed][ibin] += 1.;
2465 window->N[gUsed]++;
2467 window->Ntot[gUsed]++;
2471 else if (t > opt->tmax)
2473 if (opt->verbose)
2475 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2477 break;
2481 if (bGetMinMax)
2483 *mintmp = minfound;
2484 *maxtmp = maxfound;
2486 sfree(lennow);
2487 for (i = 0; i < ny; i++)
2489 sfree(y[i]);
2493 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2494 static void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2495 t_UmbrellaHeader* header,
2496 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2498 int i;
2499 real mintmp, maxtmp;
2501 printf("Reading %d tpr and pullf files\n", nfiles);
2503 /* min and max not given? */
2504 if (opt->bAuto)
2506 printf("Automatic determination of boundaries...\n");
2507 opt->min = 1e20;
2508 opt->max = -1e20;
2509 for (i = 0; i < nfiles; i++)
2511 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2513 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2515 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2516 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2518 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2520 read_pull_xf(fnPull[i], header, nullptr, opt, TRUE, &mintmp, &maxtmp,
2521 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2522 if (maxtmp > opt->max)
2524 opt->max = maxtmp;
2526 if (mintmp < opt->min)
2528 opt->min = mintmp;
2531 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2532 if (opt->bBoundsOnly)
2534 printf("Found option -boundsonly, now exiting.\n");
2535 exit (0);
2538 /* store stepsize in profile */
2539 opt->dz = (opt->max-opt->min)/opt->bins;
2541 bool foundData = false;
2542 for (i = 0; i < nfiles; i++)
2544 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2546 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2548 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2549 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2551 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2553 read_pull_xf(fnPull[i], header, window+i, opt, FALSE, nullptr, nullptr,
2554 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2555 if (window[i].Ntot[0] == 0)
2557 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2559 else
2561 foundData = true;
2564 if (!foundData)
2566 gmx_fatal(FARGS, "No data points were found in pullf/pullx files. Maybe you need to specify the -b option?\n");
2569 for (i = 0; i < nfiles; i++)
2571 sfree(fnTprs[i]);
2572 sfree(fnPull[i]);
2574 sfree(fnTprs);
2575 sfree(fnPull);
2578 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2580 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2581 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2583 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2585 int nlines, ny, i, ig;
2586 double **iact;
2588 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2589 nlines = read_xvg(fn, &iact, &ny);
2590 if (nlines != nwins)
2592 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2593 nlines, fn, nwins);
2595 for (i = 0; i < nlines; i++)
2597 if (window[i].nPull != ny)
2599 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2600 "number of pull groups is different in different simulations. That is not\n"
2601 "supported yet. Sorry.\n");
2603 for (ig = 0; ig < window[i].nPull; ig++)
2605 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2606 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2608 if (iact[ig][i] <= 0.0)
2610 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2617 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2619 * This is useful
2620 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2621 * that -in case of limited sampling- the ACT may be severely underestimated.
2622 * Note: the g=1+2tau are overwritten.
2623 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2624 * by the smoothing
2626 static void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2628 int i, ig, j, jg;
2629 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2631 /* only evaluate within +- 3sigma of the Gausian */
2632 siglim = 3.0*opt->sigSmoothIact;
2633 siglim2 = gmx::square(siglim);
2634 /* pre-factor of Gaussian */
2635 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2636 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2638 for (i = 0; i < nwins; i++)
2640 snew(window[i].tausmooth, window[i].nPull);
2641 for (ig = 0; ig < window[i].nPull; ig++)
2643 tausm = 0.;
2644 weight = 0;
2645 pos = window[i].pos[ig];
2646 for (j = 0; j < nwins; j++)
2648 for (jg = 0; jg < window[j].nPull; jg++)
2650 dpos2 = gmx::square(window[j].pos[jg]-pos);
2651 if (dpos2 < siglim2)
2653 w = gaufact*std::exp(-dpos2*invtwosig2);
2654 weight += w;
2655 tausm += w*window[j].tau[jg];
2656 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2657 w,dpos2,pos,gaufact,invtwosig2); */
2661 tausm /= weight;
2662 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2664 window[i].tausmooth[ig] = tausm;
2666 else
2668 window[i].tausmooth[ig] = window[i].tau[ig];
2670 window[i].g[ig] = 1+2*tausm/window[i].dt;
2675 //! Stop integrating autoccorelation function when ACF drops under this value
2676 #define WHAM_AC_ZERO_LIMIT 0.05
2678 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2680 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2681 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2683 int i, ig, ncorr, ntot, j, k, *count, restart;
2684 real *corr, c0, dt, tmp;
2685 real *ztime, av, tausteps;
2686 FILE *fp, *fpcorr = nullptr;
2688 if (opt->verbose)
2690 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2691 "time [ps]", "autocorrelation function", opt->oenv);
2694 printf("\n");
2695 for (i = 0; i < nwins; i++)
2697 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2698 fflush(stdout);
2699 ntot = window[i].Ntot[0];
2701 /* using half the maximum time as length of autocorrelation function */
2702 ncorr = ntot/2;
2703 if (ntot < 10)
2705 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2706 " points. Provide more pull data!", ntot);
2708 snew(corr, ncorr);
2709 /* snew(corrSq,ncorr); */
2710 snew(count, ncorr);
2711 dt = window[i].dt;
2712 snew(window[i].tau, window[i].nPull);
2713 restart = gmx::roundToInt(opt->acTrestart/dt);
2714 if (restart == 0)
2716 restart = 1;
2719 for (ig = 0; ig < window[i].nPull; ig++)
2721 if (ntot != window[i].Ntot[ig])
2723 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2724 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2726 ztime = window[i].ztime[ig];
2728 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2729 for (j = 0, av = 0; (j < ntot); j++)
2731 av += ztime[j];
2733 av /= ntot;
2734 for (k = 0; (k < ncorr); k++)
2736 corr[k] = 0.;
2737 count[k] = 0;
2739 for (j = 0; (j < ntot); j += restart)
2741 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2743 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2744 corr [k] += tmp;
2745 /* corrSq[k] += tmp*tmp; */
2746 count[k]++;
2749 /* divide by nr of frames for each time displacement */
2750 for (k = 0; (k < ncorr); k++)
2752 /* count probably = (ncorr-k+(restart-1))/restart; */
2753 corr[k] = corr[k]/count[k];
2754 /* variance of autocorrelation function */
2755 /* corrSq[k]=corrSq[k]/count[k]; */
2757 /* normalize such that corr[0] == 0 */
2758 c0 = 1./corr[0];
2759 for (k = 0; (k < ncorr); k++)
2761 corr[k] *= c0;
2762 /* corrSq[k]*=c0*c0; */
2765 /* write ACFs in verbose mode */
2766 if (fpcorr)
2768 for (k = 0; (k < ncorr); k++)
2770 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2772 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2775 /* esimate integrated correlation time, fitting is too unstable */
2776 tausteps = 0.5*corr[0];
2777 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2778 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2780 tausteps += corr[j];
2783 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2784 Kumar et al, eq. 28 ff. */
2785 window[i].tau[ig] = tausteps*dt;
2786 window[i].g[ig] = 1+2*tausteps;
2787 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2788 } /* ig loop */
2789 sfree(corr);
2790 sfree(count);
2792 printf(" done\n");
2793 if (fpcorr)
2795 xvgrclose(fpcorr);
2798 /* plot IACT along reaction coordinate */
2799 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2800 if (output_env_get_print_xvgr_codes(opt->oenv))
2802 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2803 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2804 for (i = 0; i < nwins; i++)
2806 fprintf(fp, "# %3d ", i);
2807 for (ig = 0; ig < window[i].nPull; ig++)
2809 fprintf(fp, " %11g", window[i].tau[ig]);
2811 fprintf(fp, "\n");
2814 for (i = 0; i < nwins; i++)
2816 for (ig = 0; ig < window[i].nPull; ig++)
2818 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2821 if (opt->sigSmoothIact > 0.0)
2823 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2824 opt->sigSmoothIact);
2825 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2826 smoothIact(window, nwins, opt);
2827 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2828 if (output_env_get_print_xvgr_codes(opt->oenv))
2830 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2831 fprintf(fp, "@ s1 symbol color 2\n");
2833 for (i = 0; i < nwins; i++)
2835 for (ig = 0; ig < window[i].nPull; ig++)
2837 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2841 xvgrclose(fp);
2842 printf("Wrote %s\n", fn);
2845 /*! \brief
2846 * compute average and sigma of each umbrella histogram
2848 static void averageSigma(t_UmbrellaWindow *window, int nwins)
2850 int i, ig, ntot, k;
2851 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2853 for (i = 0; i < nwins; i++)
2855 snew(window[i].aver, window[i].nPull);
2856 snew(window[i].sigma, window[i].nPull);
2858 ntot = window[i].Ntot[0];
2859 for (ig = 0; ig < window[i].nPull; ig++)
2861 ztime = window[i].ztime[ig];
2862 for (k = 0, av = 0.; k < ntot; k++)
2864 av += ztime[k];
2866 av /= ntot;
2867 for (k = 0, sum2 = 0.; k < ntot; k++)
2869 diff = ztime[k]-av;
2870 sum2 += diff*diff;
2872 sig = std::sqrt(sum2/ntot);
2873 window[i].aver[ig] = av;
2875 /* Note: This estimate for sigma is biased from the limited sampling.
2876 Correct sigma by n/(n-1) where n = number of independent
2877 samples. Only possible if IACT is known.
2879 if (window[i].tau)
2881 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2882 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2884 else
2886 window[i].sigma[ig] = sig;
2888 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2894 /*! \brief
2895 * Use histograms to compute average force on pull group.
2897 static void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2899 int i, j, bins = opt->bins, k;
2900 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2901 double posmirrored;
2903 dz = (max-min)/bins;
2904 ztot = opt->max-min;
2905 ztot_half = ztot/2;
2907 /* Compute average displacement from histograms */
2908 for (j = 0; j < nWindows; ++j)
2910 snew(window[j].forceAv, window[j].nPull);
2911 for (k = 0; k < window[j].nPull; ++k)
2913 displAv = 0.0;
2914 weight = 0.0;
2915 for (i = 0; i < opt->bins; ++i)
2917 temp = (1.0*i+0.5)*dz+min;
2918 distance = temp - window[j].pos[k];
2919 if (opt->bCycl)
2920 { /* in cyclic wham: */
2921 if (distance > ztot_half) /* |distance| < ztot_half */
2923 distance -= ztot;
2925 else if (distance < -ztot_half)
2927 distance += ztot;
2930 w = window[j].Histo[k][i]/window[j].g[k];
2931 displAv += w*distance;
2932 weight += w;
2933 /* Are we near min or max? We are getting wrong forces from the histgrams since
2934 the histograms are zero outside [min,max). Therefore, assume that the position
2935 on the other side of the histomgram center is equally likely. */
2936 if (!opt->bCycl)
2938 posmirrored = window[j].pos[k]-distance;
2939 if (posmirrored >= max || posmirrored < min)
2941 displAv += -w*distance;
2942 weight += w;
2946 displAv /= weight;
2948 /* average force from average displacement */
2949 window[j].forceAv[k] = displAv*window[j].k[k];
2950 /* sigma from average square displacement */
2951 /* window[j].sigma [k] = sqrt(displAv2); */
2952 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2957 /*! \brief
2958 * Check if the complete reaction coordinate is covered by the histograms
2960 static void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2961 t_UmbrellaOptions *opt)
2963 int i, ig, j, bins = opt->bins;
2964 bool bBoundary;
2965 real avcount = 0, z, relcount, *count;
2966 snew(count, opt->bins);
2968 for (j = 0; j < opt->bins; ++j)
2970 for (i = 0; i < nwins; i++)
2972 for (ig = 0; ig < window[i].nPull; ig++)
2974 count[j] += window[i].Histo[ig][j];
2977 avcount += 1.0*count[j];
2979 avcount /= bins;
2980 for (j = 0; j < bins; ++j)
2982 relcount = count[j]/avcount;
2983 z = (j+0.5)*opt->dz+opt->min;
2984 bBoundary = j<bins/20 || (bins-j)>bins/20;
2985 /* check for bins with no data */
2986 if (count[j] == 0)
2988 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2989 "You may not get a reasonable profile. Check your histograms!\n", j, z);
2991 /* and check for poor sampling */
2992 else if (relcount < 0.005 && !bBoundary)
2994 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
2997 sfree(count);
3000 /*! \brief Compute initial potential by integrating the average force
3002 * This speeds up the convergence by roughly a factor of 2
3004 static void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3006 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3007 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3008 FILE *fp;
3010 dz = (opt->max-min)/bins;
3012 printf("Getting initial potential by integration.\n");
3014 /* Compute average displacement from histograms */
3015 computeAverageForce(window, nWindows, opt);
3017 /* Get force for each bin from all histograms in this bin, or, alternatively,
3018 if no histograms are inside this bin, from the closest histogram */
3019 snew(pot, bins);
3020 snew(f, bins);
3021 for (j = 0; j < opt->bins; ++j)
3023 pos = (1.0*j+0.5)*dz+min;
3024 nHist = 0;
3025 fAv = 0.;
3026 distmin = 1e20;
3027 groupmin = winmin = 0;
3028 for (i = 0; i < nWindows; i++)
3030 for (ig = 0; ig < window[i].nPull; ig++)
3032 hispos = window[i].pos[ig];
3033 dist = std::abs(hispos-pos);
3034 /* average force within bin */
3035 if (dist < dz/2)
3037 nHist++;
3038 fAv += window[i].forceAv[ig];
3040 /* at the same time, remember closest histogram */
3041 if (dist < distmin)
3043 winmin = i;
3044 groupmin = ig;
3045 distmin = dist;
3049 /* if no histogram found in this bin, use closest histogram */
3050 if (nHist > 0)
3052 fAv = fAv/nHist;
3054 else
3056 fAv = window[winmin].forceAv[groupmin];
3058 f[j] = fAv;
3060 for (j = 1; j < opt->bins; ++j)
3062 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3065 /* cyclic wham: linearly correct possible offset */
3066 if (opt->bCycl)
3068 diff = (pot[bins-1]-pot[0])/(bins-1);
3069 for (j = 1; j < opt->bins; ++j)
3071 pot[j] -= j*diff;
3074 if (opt->verbose)
3076 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3077 for (j = 0; j < opt->bins; ++j)
3079 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3081 xvgrclose(fp);
3082 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3085 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3086 energy offsets which are usually determined by wham
3087 First: turn pot into probabilities:
3089 for (j = 0; j < opt->bins; ++j)
3091 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3093 calc_z(pot, window, nWindows, opt, TRUE);
3095 sfree(pot);
3096 sfree(f);
3099 //! Count number of words in an line
3100 static int wordcount(char *ptr)
3102 int i, n, is[2];
3103 int cur = 0;
3105 if (std::strlen(ptr) == 0)
3107 return 0;
3109 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3110 n = 1;
3111 for (i = 0; (ptr[i] != '\0'); i++)
3113 is[cur] = isspace(ptr[i]);
3114 if ((i > 0) && (is[cur] && !is[1-cur]))
3116 n++;
3118 cur = 1-cur;
3120 return n;
3123 /*! \brief Read input file for pull group selection (option -is)
3125 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3127 static void readPullCoordSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3129 FILE *fp;
3130 int i, iline, n, len = STRLEN, temp;
3131 char *ptr = nullptr, *tmpbuf = nullptr;
3132 char fmt[1024], fmtign[1024];
3133 int block = 1, sizenow;
3135 fp = gmx_ffopen(opt->fnCoordSel, "r");
3136 opt->coordsel = nullptr;
3138 snew(tmpbuf, len);
3139 sizenow = 0;
3140 iline = 0;
3141 while ( (ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3143 trim(ptr);
3144 n = wordcount(ptr);
3146 if (iline >= sizenow)
3148 sizenow += block;
3149 srenew(opt->coordsel, sizenow);
3151 opt->coordsel[iline].n = n;
3152 opt->coordsel[iline].nUse = 0;
3153 snew(opt->coordsel[iline].bUse, n);
3155 fmtign[0] = '\0';
3156 for (i = 0; i < n; i++)
3158 std::strcpy(fmt, fmtign);
3159 std::strcat(fmt, "%d");
3160 if (sscanf(ptr, fmt, &temp))
3162 opt->coordsel[iline].bUse[i] = (temp > 0);
3163 if (opt->coordsel[iline].bUse[i])
3165 opt->coordsel[iline].nUse++;
3168 std::strcat(fmtign, "%*s");
3170 iline++;
3172 opt->nCoordsel = iline;
3173 if (nTpr != opt->nCoordsel)
3175 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel,
3176 opt->fnCoordSel);
3179 printf("\nUse only these pull coordinates:\n");
3180 for (iline = 0; iline < nTpr; iline++)
3182 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse, opt->coordsel[iline].n);
3183 for (i = 0; i < opt->coordsel[iline].n; i++)
3185 if (opt->coordsel[iline].bUse[i])
3187 printf(" %d", i+1);
3190 printf("\n");
3192 printf("\n");
3194 sfree(tmpbuf);
3197 //! Boolean XOR
3198 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3200 //! Number of elements in fnm (used for command line parsing)
3201 #define NFILE asize(fnm)
3203 //! The main gmx wham routine
3204 int gmx_wham(int argc, char *argv[])
3206 const char *desc[] = {
3207 "[THISMODULE] is an analysis program that implements the Weighted",
3208 "Histogram Analysis Method (WHAM). It is intended to analyze",
3209 "output files generated by umbrella sampling simulations to ",
3210 "compute a potential of mean force (PMF).[PAR]",
3212 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3213 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3214 "and, if multiple coordinates need to be analyzed, all used the same",
3215 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3216 "At present, three input modes are supported.",
3218 "* With option [TT]-it[tt], the user provides a file which contains the",
3219 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3220 " AND, with option [TT]-ix[tt], a file which contains file names of",
3221 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3222 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3223 " first pullx, etc.",
3224 "* Same as the previous input mode, except that the the user",
3225 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3226 " From the pull force the position in the umbrella potential is",
3227 " computed. This does not work with tabulated umbrella potentials.",
3228 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3229 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3230 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3231 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3232 " must be similar to the following::",
3234 " # UMBRELLA 3.0",
3235 " # Component selection: 0 0 1",
3236 " # nSkip 1",
3237 " # Ref. Group 'TestAtom'",
3238 " # Nr. of pull groups 2",
3239 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3240 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3241 " #####",
3243 " The number of pull groups, umbrella positions, force constants, and names ",
3244 " may (of course) differ. Following the header, a time column and ",
3245 " a data column for each pull group follows (i.e. the displacement",
3246 " with respect to the umbrella center). Up to four pull groups are possible ",
3247 " per [REF].pdo[ref] file at present.[PAR]",
3248 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If only ",
3249 "some of the pull coordinates should be used, a pull coordinate selection file (option [TT]-is[tt]) can ",
3250 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3251 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr file. ",
3252 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. Example:",
3253 "If you have three tpr files, each containing 4 pull coordinates, but only pull coordinates 1 and 2 should be ",
3254 "used, coordsel.dat looks like this::",
3256 " 1 1 0 0",
3257 " 1 1 0 0",
3258 " 1 1 0 0",
3260 "By default, the output files are::",
3262 " [TT]-o[tt] PMF output file",
3263 " [TT]-hist[tt] Histograms output file",
3265 "Always check whether the histograms sufficiently overlap.[PAR]",
3266 "The umbrella potential is assumed to be harmonic and the force constants are ",
3267 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3268 "a tabulated potential can be provided with [TT]-tab[tt].",
3270 "WHAM options",
3271 "^^^^^^^^^^^^",
3273 "* [TT]-bins[tt] Number of bins used in analysis",
3274 "* [TT]-temp[tt] Temperature in the simulations",
3275 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3276 "* [TT]-auto[tt] Automatic determination of boundaries",
3277 "* [TT]-min,-max[tt] Boundaries of the profile",
3279 "The data points that are used to compute the profile",
3280 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3281 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3282 "umbrella window.[PAR]",
3283 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3284 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3285 "With energy output, the energy in the first bin is defined to be zero. ",
3286 "If you want the free energy at a different ",
3287 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3288 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3289 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3290 "[THISMODULE] will make use of the",
3291 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3292 "reaction coordinate will assumed be be neighbors.[PAR]",
3293 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3294 "which may be useful for, e.g. membranes.",
3296 "Parallelization",
3297 "^^^^^^^^^^^^^^^",
3299 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3300 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3302 "Autocorrelations",
3303 "^^^^^^^^^^^^^^^^",
3305 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3306 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3307 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3308 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3309 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3310 "Because the IACTs can be severely underestimated in case of limited ",
3311 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3312 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3313 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3314 "integration of the ACFs while the ACFs are larger 0.05.",
3315 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3316 "less robust) method such as fitting to a double exponential, you can ",
3317 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3318 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3319 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the respective file.",
3321 "Error analysis",
3322 "^^^^^^^^^^^^^^",
3324 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3325 "otherwise the statistical error may be substantially underestimated. ",
3326 "More background and examples for the bootstrap technique can be found in ",
3327 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3328 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3329 "Four bootstrapping methods are supported and ",
3330 "selected with [TT]-bs-method[tt].",
3332 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3333 " data points, and the bootstrap is carried out by assigning random weights to the ",
3334 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3335 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3336 " statistical error is underestimated.",
3337 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3338 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3339 " (allowing duplication, i.e. sampling with replacement).",
3340 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3341 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3342 " divided into blocks and only histograms within each block are mixed. Note that ",
3343 " the histograms within each block must be representative for all possible histograms, ",
3344 " otherwise the statistical error is underestimated.",
3345 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3346 " such that the generated data points are distributed according the given histograms ",
3347 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3348 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3349 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3350 " Note that this method may severely underestimate the error in case of limited sampling, ",
3351 " that is if individual histograms do not represent the complete phase space at ",
3352 " the respective positions.",
3353 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3354 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3355 " and width of the umbrella histograms. That method yields similar error estimates ",
3356 " like method [TT]traj[tt].",
3358 "Bootstrapping output:",
3360 "* [TT]-bsres[tt] Average profile and standard deviations",
3361 "* [TT]-bsprof[tt] All bootstrapping profiles",
3363 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3364 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3365 "the histograms."
3368 const char *en_unit[] = {nullptr, "kJ", "kCal", "kT", nullptr};
3369 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr};
3370 const char *en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3371 static t_UmbrellaOptions opt;
3373 t_pargs pa[] = {
3374 { "-min", FALSE, etREAL, {&opt.min},
3375 "Minimum coordinate in profile"},
3376 { "-max", FALSE, etREAL, {&opt.max},
3377 "Maximum coordinate in profile"},
3378 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3379 "Determine min and max automatically"},
3380 { "-bins", FALSE, etINT, {&opt.bins},
3381 "Number of bins in profile"},
3382 { "-temp", FALSE, etREAL, {&opt.Temperature},
3383 "Temperature"},
3384 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3385 "Tolerance"},
3386 { "-v", FALSE, etBOOL, {&opt.verbose},
3387 "Verbose mode"},
3388 { "-b", FALSE, etREAL, {&opt.tmin},
3389 "First time to analyse (ps)"},
3390 { "-e", FALSE, etREAL, {&opt.tmax},
3391 "Last time to analyse (ps)"},
3392 { "-dt", FALSE, etREAL, {&opt.dt},
3393 "Analyse only every dt ps"},
3394 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3395 "Write histograms and exit"},
3396 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3397 "Determine min and max and exit (with [TT]-auto[tt])"},
3398 { "-log", FALSE, etBOOL, {&opt.bLog},
3399 "Calculate the log of the profile before printing"},
3400 { "-unit", FALSE, etENUM, {en_unit},
3401 "Energy unit in case of log output" },
3402 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3403 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3404 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3405 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3406 { "-sym", FALSE, etBOOL, {&opt.bSym},
3407 "Symmetrize profile around z=0"},
3408 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3409 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3410 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3411 "Calculate integrated autocorrelation times and use in wham"},
3412 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3413 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3414 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3415 "When computing autocorrelation functions, restart computing every .. (ps)"},
3416 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3417 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3418 "during smoothing"},
3419 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3420 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3421 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3422 "Bootstrap method" },
3423 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3424 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3425 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3426 "Seed for bootstrapping. (-1 = use time)"},
3427 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3428 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3429 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3430 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3431 { "-stepout", FALSE, etINT, {&opt.stepchange},
3432 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3433 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3434 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3437 t_filenm fnm[] = {
3438 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3439 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3440 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3441 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3442 { efDAT, "-is", "coordsel", ffOPTRD}, /* input: select pull coords to use */
3443 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3444 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3445 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3446 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3447 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3448 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3449 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3452 int i, j, l, nfiles, nwins, nfiles2;
3453 t_UmbrellaHeader header;
3454 t_UmbrellaWindow * window = nullptr;
3455 double *profile, maxchange = 1e20;
3456 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3457 char **fninTpr, **fninPull, **fninPdo;
3458 const char *fnPull;
3459 FILE *histout, *profout;
3460 char xlabel[STRLEN], ylabel[256], title[256];
3462 opt.bins = 200;
3463 opt.verbose = FALSE;
3464 opt.bHistOnly = FALSE;
3465 opt.bCycl = FALSE;
3466 opt.tmin = 50;
3467 opt.tmax = 1e20;
3468 opt.dt = 0.0;
3469 opt.min = 0;
3470 opt.max = 0;
3471 opt.bAuto = TRUE;
3472 opt.nCoordsel = 0;
3473 opt.coordsel = nullptr;
3475 /* bootstrapping stuff */
3476 opt.nBootStrap = 0;
3477 opt.bsMethod = bsMethod_hist;
3478 opt.tauBootStrap = 0.0;
3479 opt.bsSeed = -1;
3480 opt.histBootStrapBlockLength = 8;
3481 opt.bs_verbose = FALSE;
3483 opt.bLog = TRUE;
3484 opt.unit = en_kJ;
3485 opt.zProf0 = 0.;
3486 opt.Temperature = 298;
3487 opt.Tolerance = 1e-6;
3488 opt.bBoundsOnly = FALSE;
3489 opt.bSym = FALSE;
3490 opt.bCalcTauInt = FALSE;
3491 opt.sigSmoothIact = 0.0;
3492 opt.bAllowReduceIact = TRUE;
3493 opt.bInitPotByIntegration = TRUE;
3494 opt.acTrestart = 1.0;
3495 opt.stepchange = 100;
3496 opt.stepUpdateContrib = 100;
3498 if (!parse_common_args(&argc, argv, 0,
3499 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &opt.oenv))
3501 return 0;
3504 opt.unit = nenum(en_unit);
3505 opt.bsMethod = nenum(en_bsMethod);
3507 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3509 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3510 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3511 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3512 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3513 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3514 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3515 if (opt.bTab && opt.bPullf)
3517 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3518 "Provide pullx.xvg or pdo files!");
3521 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3523 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3525 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3527 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3528 "\n\n Check gmx wham -h !");
3531 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3532 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3533 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3534 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3535 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3537 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3538 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3539 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3540 if ( (bMinSet || bMaxSet) && bAutoSet)
3542 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3545 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3547 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3550 if (bMinSet && opt.bAuto)
3552 printf("Note: min and max given, switching off -auto.\n");
3553 opt.bAuto = FALSE;
3556 if (opt.bTauIntGiven && opt.bCalcTauInt)
3558 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3559 "the autocorrelation times. Not both.");
3562 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3564 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3565 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3567 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3569 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3570 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3573 /* Reading gmx4/gmx5 pull output and tpr files */
3574 if (opt.bTpr || opt.bPullf || opt.bPullx)
3576 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3578 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3579 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3580 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3581 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3582 if (nfiles != nfiles2)
3584 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3585 opt.fnTpr, nfiles2, fnPull);
3588 /* Read file that selects the pull group to be used */
3589 if (opt.fnCoordSel != nullptr)
3591 readPullCoordSelection(&opt, fninTpr, nfiles);
3594 window = initUmbrellaWindows(nfiles);
3595 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3597 else
3598 { /* reading pdo files */
3599 if (opt.fnCoordSel != nullptr)
3601 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3602 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3604 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3605 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3606 window = initUmbrellaWindows(nfiles);
3607 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3610 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3611 We can therefore get the units for the xlabel from the first coordinate. */
3612 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3614 nwins = nfiles;
3616 /* enforce equal weight for all histograms? */
3617 if (opt.bHistEq)
3619 enforceEqualWeights(window, nwins);
3622 /* write histograms */
3623 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3624 xlabel, "count", opt.oenv);
3625 for (l = 0; l < opt.bins; ++l)
3627 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3628 for (i = 0; i < nwins; ++i)
3630 for (j = 0; j < window[i].nPull; ++j)
3632 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3635 fprintf(histout, "\n");
3637 xvgrclose(histout);
3638 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3639 if (opt.bHistOnly)
3641 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3642 return 0;
3645 /* Using tabulated umbrella potential */
3646 if (opt.bTab)
3648 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3651 /* Integrated autocorrelation times provided ? */
3652 if (opt.bTauIntGiven)
3654 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3657 /* Compute integrated autocorrelation times */
3658 if (opt.bCalcTauInt)
3660 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3663 /* calc average and sigma for each histogram
3664 (maybe required for bootstrapping. If not, this is fast anyhow) */
3665 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3667 averageSigma(window, nwins);
3670 /* Get initial potential by simple integration */
3671 if (opt.bInitPotByIntegration)
3673 guessPotByIntegration(window, nwins, &opt, xlabel);
3676 /* Check if complete reaction coordinate is covered */
3677 checkReactionCoordinateCovered(window, nwins, &opt);
3679 /* Calculate profile */
3680 snew(profile, opt.bins);
3681 if (opt.verbose)
3683 opt.stepchange = 1;
3685 i = 0;
3688 if ( (i%opt.stepUpdateContrib) == 0)
3690 setup_acc_wham(profile, window, nwins, &opt);
3692 if (maxchange < opt.Tolerance)
3694 bExact = TRUE;
3695 /* if (opt.verbose) */
3696 printf("Switched to exact iteration in iteration %d\n", i);
3698 calc_profile(profile, window, nwins, &opt, bExact);
3699 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3701 printf("\t%4d) Maximum change %e\n", i, maxchange);
3703 i++;
3705 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3706 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3708 /* calc error from Kumar's formula */
3709 /* Unclear how the error propagates along reaction coordinate, therefore
3710 commented out */
3711 /* calc_error_kumar(profile,window, nwins,&opt); */
3713 /* Write profile in energy units? */
3714 if (opt.bLog)
3716 prof_normalization_and_unit(profile, &opt);
3717 std::strcpy(ylabel, en_unit_label[opt.unit]);
3718 std::strcpy(title, "Umbrella potential");
3720 else
3722 std::strcpy(ylabel, "Density of states");
3723 std::strcpy(title, "Density of states");
3726 /* symmetrize profile around z=0? */
3727 if (opt.bSym)
3729 symmetrizeProfile(profile, &opt);
3732 /* write profile or density of states */
3733 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3734 for (i = 0; i < opt.bins; ++i)
3736 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3738 xvgrclose(profout);
3739 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3741 /* Bootstrap Method */
3742 if (opt.nBootStrap)
3744 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3745 opt2fn("-hist", NFILE, fnm),
3746 xlabel, ylabel, profile, window, nwins, &opt);
3749 sfree(profile);
3750 freeUmbrellaWindows(window, nfiles);
3752 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3753 please_cite(stdout, "Hub2010");
3755 return 0;