Update instructions in containers.rst
[gromacs.git] / src / gromacs / gmxana / gmx_wham.cpp
blob9162bbcd4e261732652a3293f19cd3f69ff4be6f
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
39 /*! \internal \file
40 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
42 * \author Jochen Hub <jhub@gwdg.de>
44 #include "gmxpre.h"
46 #include "config.h"
48 #include <cassert>
49 #include <cctype>
50 #include <cmath>
51 #include <cstdio>
52 #include <cstdlib>
53 #include <cstring>
55 #include <algorithm>
56 #include <sstream>
58 #include "gromacs/commandline/pargs.h"
59 #include "gromacs/fileio/tpxio.h"
60 #include "gromacs/fileio/xvgr.h"
61 #include "gromacs/gmxana/gmx_ana.h"
62 #include "gromacs/math/functions.h"
63 #include "gromacs/math/units.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/mdtypes/pull_params.h"
68 #include "gromacs/mdtypes/state.h"
69 #include "gromacs/pulling/pull.h"
70 #include "gromacs/random/tabulatednormaldistribution.h"
71 #include "gromacs/random/threefry.h"
72 #include "gromacs/random/uniformintdistribution.h"
73 #include "gromacs/random/uniformrealdistribution.h"
74 #include "gromacs/utility/arraysize.h"
75 #include "gromacs/utility/cstringutil.h"
76 #include "gromacs/utility/exceptions.h"
77 #include "gromacs/utility/fatalerror.h"
78 #include "gromacs/utility/futil.h"
79 #include "gromacs/utility/gmxomp.h"
80 #include "gromacs/utility/path.h"
81 #include "gromacs/utility/pleasecite.h"
82 #include "gromacs/utility/smalloc.h"
84 //! longest file names allowed in input files
85 #define WHAM_MAXFILELEN 2048
87 /*! \brief
88 * enum for energy units
90 enum
92 enSel,
93 en_kJ,
94 en_kCal,
95 en_kT,
96 enNr
98 /*! \brief
99 * enum for type of input files (pdos, tpr, or pullf)
101 enum
103 whamin_unknown,
104 whamin_tpr,
105 whamin_pullxf,
106 whamin_pdo
109 /*! \brief enum for bootstrapping method
111 * These bootstrap methods are supported:
112 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
113 * (bsMethod_BayesianHist)
114 * - bootstrap complete histograms (bsMethod_hist)
115 * - bootstrap trajectories from given umbrella histograms. This generates new
116 * "synthetic" histograms (bsMethod_traj)
117 * - bootstrap trajectories from Gaussian with mu/sigma computed from
118 * the respective histogram (bsMethod_trajGauss). This gives very similar
119 * results compared to bsMethod_traj.
121 * ********************************************************************
122 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
123 * JS Hub, BL de Groot, D van der Spoel
124 * g_wham - A free weighted histogram analysis implementation including
125 * robust error and autocorrelation estimates,
126 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
127 * ********************************************************************
129 enum
131 bsMethod_unknown,
132 bsMethod_BayesianHist,
133 bsMethod_hist,
134 bsMethod_traj,
135 bsMethod_trajGauss
138 //! Parameters of one pull coodinate
139 typedef struct
141 int pull_type; //!< such as constraint, umbrella, ...
142 int geometry; //!< such as distance, direction, cylinder
143 int ngroup; //!< the number of pull groups involved
144 ivec dim; //!< pull dimension with geometry distance
145 int ndim; //!< nr of pull_dim != 0
146 real k; //!< force constants in tpr file
147 real init_dist; //!< reference displacement
148 char coord_unit[256]; //!< unit of the displacement
149 } t_pullcoord;
151 //! Parameters of the umbrella potentials
152 typedef struct
155 * \name Using umbrella pull code since gromacs 4.x
157 /*!\{*/
158 int npullcrds; //!< nr of umbrella pull coordinates for reading
159 t_pullcoord* pcrd; //!< the pull coordinates
160 gmx_bool bPrintCOM; //!< COMs of pull groups writtn in pullx.xvg
161 gmx_bool bPrintRefValue; //!< Reference value for the coordinate written in pullx.xvg
162 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
164 /*!\}*/
166 * \name Using PDO files common until gromacs 3.x
168 /*!\{*/
169 int nSkip;
170 char Reference[256];
171 int nPull;
172 int nDim;
173 ivec Dims;
174 char PullName[4][256];
175 double UmbPos[4][3];
176 double UmbCons[4][3];
177 /*!\}*/
178 } t_UmbrellaHeader;
180 //! Data in the umbrella histograms
181 typedef struct
183 int nPull; //!< nr of pull groups in this pdo or pullf/x file
184 double** Histo; //!< nPull histograms
185 double** cum; //!< nPull cumulative distribution functions
186 int nBin; //!< nr of bins. identical to opt->bins
187 double* k; //!< force constants for the nPull coords
188 double* pos; //!< umbrella positions for the nPull coords
189 double* z; //!< z=(-Fi/kT) for the nPull coords. These values are iteratively computed during wham
190 int* N; //!< nr of data points in nPull histograms.
191 int* Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
193 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
195 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
196 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
198 double* g;
199 double* tau; //!< intetrated autocorrelation time (IACT)
200 double* tausmooth; //!< smoothed IACT
202 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
204 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
205 gmx_bool** bContrib;
206 real** ztime; //!< input data z(t) as a function of time. Required to compute ACTs
208 /*! \brief average force estimated from average displacement, fAv=dzAv*k
210 * Used for integration to guess the potential.
212 real* forceAv;
213 real* aver; //!< average of histograms
214 real* sigma; //!< stddev of histograms
215 double* bsWeight; //!< for bootstrapping complete histograms with continuous weights
216 } t_UmbrellaWindow;
218 //! Selection of pull coordinates to be used in WHAM (one structure for each tpr file)
219 typedef struct
221 int n; //!< total nr of pull coords in this tpr file
222 int nUse; //!< nr of pull coords used
223 gmx_bool* bUse; //!< boolean array of size n. =1 if used, =0 if not
224 } t_coordselection;
226 //! Parameters of WHAM
227 typedef struct UmbrellaOptions // NOLINT(clang-analyzer-optin.performance.Padding)
230 * \name Input stuff
232 /*!\{*/
233 const char *fnTpr, *fnPullf, *fnCoordSel;
234 const char *fnPdo, *fnPullx; //!< file names of input
235 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
236 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
238 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
239 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
240 int nCoordsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
241 t_coordselection* coordsel; //!< for each tpr file: which pull coordinates to use in WHAM?
242 /*!\}*/
244 * \name Basic WHAM options
246 /*!\{*/
247 int bins; //!< nr of bins, min, max, and dz of profile
248 real min, max, dz;
249 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
250 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
251 /*!\}*/
253 * \name Output control
255 /*!\{*/
256 gmx_bool bLog; //!< energy output (instead of probability) for profile
257 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
258 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
259 /*! \brief after wham, set prof to zero at this z-position.
260 * When bootstrapping, set zProf0 to a "stable" reference position.
262 real zProf0;
263 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
265 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
266 gmx_bool bAuto; //!< determine min and max automatically but do not exit
268 gmx_bool verbose; //!< more noisy wham mode
269 int stepchange; //!< print maximum change in prof after how many interations
270 gmx_output_env_t* oenv; //!< xvgr options
271 /*!\}*/
273 * \name Autocorrelation stuff
275 /*!\{*/
276 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
277 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
278 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
279 real acTrestart; //!< when computing ACT, time between restarting points
281 /* \brief Enforce the same weight for each umbella window, that is
282 * calculate with the same number of data points for
283 * each window. That can be reasonable, if the histograms
284 * have different length, but due to autocorrelation,
285 * a longer simulation should not have larger weightin wham.
287 gmx_bool bHistEq;
288 /*!\}*/
291 * \name Bootstrapping stuff
293 /*!\{*/
294 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
296 /* \brief bootstrap method
298 * if == bsMethod_hist, consider complete histograms as independent
299 * data points and, hence, only mix complete histograms.
300 * if == bsMethod_BayesianHist, consider complete histograms
301 * as independent data points, but assign random weights
302 * to the histograms during the bootstrapping ("Bayesian bootstrap")
303 * In case of long correlations (e.g., inside a channel), these
304 * will yield a more realistic error.
305 * if == bsMethod_traj(Gauss), generate synthetic histograms
306 * for each given
307 * histogram by generating an autocorrelated random sequence
308 * that is distributed according to the respective given
309 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
310 * (instead of from the umbrella histogram) to generate a new
311 * histogram.
313 int bsMethod;
315 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
316 real tauBootStrap;
318 /* \brief when mixing histograms, mix only histograms withing blocks
319 long the reaction coordinate xi. Avoids gaps along xi. */
320 int histBootStrapBlockLength;
322 int bsSeed; //!< random seed for bootstrapping
324 /* \brief Write cumulative distribution functions (CDFs) of histograms
325 and write the generated histograms for each bootstrap */
326 gmx_bool bs_verbose;
327 /*!\}*/
329 * \name tabulated umbrella potential stuff
331 /*!\{*/
332 gmx_bool bTab;
333 double * tabX, *tabY, tabMin, tabMax, tabDz;
334 int tabNbins;
335 /*!\}*/
336 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
337 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
338 } t_UmbrellaOptions;
340 //! Make an umbrella window (may contain several histograms)
341 static t_UmbrellaWindow* initUmbrellaWindows(int nwin)
343 t_UmbrellaWindow* win;
344 int i;
345 snew(win, nwin);
346 for (i = 0; i < nwin; i++)
348 win[i].Histo = win[i].cum = nullptr;
349 win[i].k = win[i].pos = win[i].z = nullptr;
350 win[i].N = win[i].Ntot = nullptr;
351 win[i].g = win[i].tau = win[i].tausmooth = nullptr;
352 win[i].bContrib = nullptr;
353 win[i].ztime = nullptr;
354 win[i].forceAv = nullptr;
355 win[i].aver = win[i].sigma = nullptr;
356 win[i].bsWeight = nullptr;
358 return win;
361 //! Delete an umbrella window (may contain several histograms)
362 static void freeUmbrellaWindows(t_UmbrellaWindow* win, int nwin)
364 int i, j;
365 for (i = 0; i < nwin; i++)
367 if (win[i].Histo)
369 for (j = 0; j < win[i].nPull; j++)
371 sfree(win[i].Histo[j]);
374 if (win[i].cum)
376 for (j = 0; j < win[i].nPull; j++)
378 sfree(win[i].cum[j]);
381 if (win[i].bContrib)
383 for (j = 0; j < win[i].nPull; j++)
385 sfree(win[i].bContrib[j]);
388 sfree(win[i].Histo);
389 sfree(win[i].cum);
390 sfree(win[i].k);
391 sfree(win[i].pos);
392 sfree(win[i].z);
393 sfree(win[i].N);
394 sfree(win[i].Ntot);
395 sfree(win[i].g);
396 sfree(win[i].tau);
397 sfree(win[i].tausmooth);
398 sfree(win[i].bContrib);
399 sfree(win[i].ztime);
400 sfree(win[i].forceAv);
401 sfree(win[i].aver);
402 sfree(win[i].sigma);
403 sfree(win[i].bsWeight);
405 sfree(win);
408 /*! \brief
409 * Read and setup tabulated umbrella potential
411 static void setup_tab(const char* fn, t_UmbrellaOptions* opt)
413 int i, ny, nl;
414 double** y;
416 printf("Setting up tabulated potential from file %s\n", fn);
417 nl = read_xvg(fn, &y, &ny);
418 opt->tabNbins = nl;
419 if (ny != 2)
421 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
423 opt->tabMin = y[0][0];
424 opt->tabMax = y[0][nl - 1];
425 opt->tabDz = (opt->tabMax - opt->tabMin) / (nl - 1);
426 if (opt->tabDz <= 0)
428 gmx_fatal(FARGS,
429 "The tabulated potential in %s must be provided in \n"
430 "ascending z-direction",
431 fn);
433 for (i = 0; i < nl - 1; i++)
435 if (std::abs(y[0][i + 1] - y[0][i] - opt->tabDz) > opt->tabDz / 1e6)
437 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", fn);
440 snew(opt->tabY, nl);
441 snew(opt->tabX, nl);
442 for (i = 0; i < nl; i++)
444 opt->tabX[i] = y[0][i];
445 opt->tabY[i] = y[1][i];
447 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n", opt->tabMin,
448 opt->tabMax, opt->tabDz);
451 //! Read the header of an PDO file (position, force const, nr of groups)
452 static void read_pdo_header(FILE* file, t_UmbrellaHeader* header, t_UmbrellaOptions* opt)
454 char line[2048];
455 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
456 int i;
457 std::istringstream ist;
459 /* line 1 */
460 if (fgets(line, 2048, file) == nullptr)
462 gmx_fatal(FARGS, "Error reading header from pdo file\n");
464 ist.str(line);
465 ist >> Buffer0 >> Buffer1 >> Buffer2;
466 if (std::strcmp(Buffer1, "UMBRELLA") != 0)
468 gmx_fatal(FARGS,
469 "This does not appear to be a valid pdo file. Found %s, expected %s\n"
470 "(Found in first line: `%s')\n",
471 Buffer1, "UMBRELLA", line);
473 if (std::strcmp(Buffer2, "3.0") != 0)
475 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
478 /* line 2 */
479 if (fgets(line, 2048, file) == nullptr)
481 gmx_fatal(FARGS, "Error reading header from pdo file\n");
483 ist.str(line);
484 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
485 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
487 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
488 if (header->nDim != 1)
490 gmx_fatal(FARGS, "Currently only supports one dimension");
493 /* line3 */
494 if (fgets(line, 2048, file) == nullptr)
496 gmx_fatal(FARGS, "Error reading header from pdo file\n");
498 ist.str(line);
499 ist >> Buffer0 >> Buffer1 >> header->nSkip;
501 /* line 4 */
502 if (fgets(line, 2048, file) == nullptr)
504 gmx_fatal(FARGS, "Error reading header from pdo file\n");
506 ist.str(line);
507 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
509 /* line 5 */
510 if (fgets(line, 2048, file) == nullptr)
512 gmx_fatal(FARGS, "Error reading header from pdo file\n");
514 ist.str(line);
515 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
517 if (opt->verbose)
519 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip, header->Reference);
522 for (i = 0; i < header->nPull; ++i)
524 if (fgets(line, 2048, file) == nullptr)
526 gmx_fatal(FARGS, "Error reading header from pdo file\n");
528 ist.str(line);
529 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
530 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
531 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
533 if (opt->verbose)
535 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n", i,
536 header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
540 if (fgets(line, 2048, file) == nullptr)
542 gmx_fatal(FARGS, "Cannot read from file\n");
544 ist.str(line);
545 ist >> Buffer3;
546 if (std::strcmp(Buffer3, "#####") != 0)
548 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
552 //! smarter fgets
553 static char* fgets3(FILE* fp, char ptr[], int* len)
555 char* p;
556 int slen;
558 if (fgets(ptr, *len - 1, fp) == nullptr)
560 return nullptr;
562 p = ptr;
563 while ((std::strchr(ptr, '\n') == nullptr) && (!feof(fp)))
565 /* This line is longer than len characters, let's increase len! */
566 *len += STRLEN;
567 p += STRLEN;
568 srenew(ptr, *len);
569 if (fgets(p - 1, STRLEN, fp) == nullptr)
571 break;
574 slen = std::strlen(ptr);
575 if (ptr[slen - 1] == '\n')
577 ptr[slen - 1] = '\0';
580 return ptr;
583 /*! \brief Read the data columns of and PDO file.
585 * TO DO: Get rid of the scanf function to avoid the clang warning.
586 * At the moment, this warning is avoided by hiding the format string
587 * the variable fmtlf.
589 static void read_pdo_data(FILE* file,
590 t_UmbrellaHeader* header,
591 int fileno,
592 t_UmbrellaWindow* win,
593 t_UmbrellaOptions* opt,
594 gmx_bool bGetMinMax,
595 real* mintmp,
596 real* maxtmp)
598 int i, inttemp, bins, count, ntot;
599 real minval, maxval, minfound = 1e20, maxfound = -1e20;
600 double temp, time, time0 = 0, dt;
601 char* ptr = nullptr;
602 t_UmbrellaWindow* window = nullptr;
603 gmx_bool timeok, dt_ok = true;
604 char * tmpbuf = nullptr, fmt[256], fmtign[256], fmtlf[5] = "%lf";
605 int len = STRLEN, dstep = 1;
606 const int blocklen = 4096;
607 int* lennow = nullptr;
609 if (!bGetMinMax)
611 bins = opt->bins;
612 minval = opt->min;
613 maxval = opt->max;
615 window = win + fileno;
616 /* Need to alocate memory and set up structure */
617 window->nPull = header->nPull;
618 window->nBin = bins;
620 snew(window->Histo, window->nPull);
621 snew(window->z, window->nPull);
622 snew(window->k, window->nPull);
623 snew(window->pos, window->nPull);
624 snew(window->N, window->nPull);
625 snew(window->Ntot, window->nPull);
626 snew(window->g, window->nPull);
627 snew(window->bsWeight, window->nPull);
629 window->bContrib = nullptr;
631 if (opt->bCalcTauInt)
633 snew(window->ztime, window->nPull);
635 else
637 window->ztime = nullptr;
639 snew(lennow, window->nPull);
641 for (i = 0; i < window->nPull; ++i)
643 window->z[i] = 1;
644 window->bsWeight[i] = 1.;
645 snew(window->Histo[i], bins);
646 window->k[i] = header->UmbCons[i][0];
647 window->pos[i] = header->UmbPos[i][0];
648 window->N[i] = 0;
649 window->Ntot[i] = 0;
650 window->g[i] = 1.;
651 if (opt->bCalcTauInt)
653 window->ztime[i] = nullptr;
657 /* Done with setup */
659 else
661 minfound = 1e20;
662 maxfound = -1e20;
663 minval = maxval = bins = 0; /* Get rid of warnings */
666 count = 0;
667 snew(tmpbuf, len);
668 while ((ptr = fgets3(file, tmpbuf, &len)) != nullptr)
670 trim(ptr);
672 if (ptr[0] == '#' || std::strlen(ptr) < 2)
674 continue;
677 /* Initiate format string */
678 fmtign[0] = '\0';
679 std::strcat(fmtign, "%*s");
681 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
682 /* Round time to fs */
683 time = 1.0 / 1000 * (gmx::roundToInt64(time * 1000));
685 /* get time step of pdo file */
686 if (count == 0)
688 time0 = time;
690 else if (count == 1)
692 dt = time - time0;
693 if (opt->dt > 0.0)
695 dstep = gmx::roundToInt(opt->dt / dt);
696 if (dstep == 0)
698 dstep = 1;
701 if (!bGetMinMax)
703 window->dt = dt * dstep;
706 count++;
708 dt_ok = ((count - 1) % dstep == 0);
709 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
710 /* if (opt->verbose)
711 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
712 time,opt->tmin, opt->tmax, dt_ok,timeok); */
714 if (timeok)
716 for (i = 0; i < header->nPull; ++i)
718 std::strcpy(fmt, fmtign);
719 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
720 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
721 if (sscanf(ptr, fmt, &temp))
723 temp += header->UmbPos[i][0];
724 if (bGetMinMax)
726 if (temp < minfound)
728 minfound = temp;
730 if (temp > maxfound)
732 maxfound = temp;
735 else
737 if (opt->bCalcTauInt)
739 /* save time series for autocorrelation analysis */
740 ntot = window->Ntot[i];
741 if (ntot >= lennow[i])
743 lennow[i] += blocklen;
744 srenew(window->ztime[i], lennow[i]);
746 window->ztime[i][ntot] = temp;
749 temp -= minval;
750 temp /= (maxval - minval);
751 temp *= bins;
752 temp = std::floor(temp);
754 inttemp = static_cast<int>(temp);
755 if (opt->bCycl)
757 if (inttemp < 0)
759 inttemp += bins;
761 else if (inttemp >= bins)
763 inttemp -= bins;
767 if (inttemp >= 0 && inttemp < bins)
769 window->Histo[i][inttemp] += 1.;
770 window->N[i]++;
772 window->Ntot[i]++;
777 if (time > opt->tmax)
779 if (opt->verbose)
781 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
783 break;
787 if (bGetMinMax)
789 *mintmp = minfound;
790 *maxtmp = maxfound;
793 sfree(lennow);
794 sfree(tmpbuf);
797 /*! \brief Set identical weights for all histograms
799 * Normally, the weight is given by the number data points in each
800 * histogram, together with the autocorrelation time. This can be overwritten
801 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
802 * an appropriate autocorrelation times instead of using this function.
804 static void enforceEqualWeights(t_UmbrellaWindow* window, int nWindows)
806 int i, k, j, NEnforced;
807 double ratio;
809 NEnforced = window[0].Ntot[0];
810 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
811 "non-weighted histogram analysis method. Ndata = %d\n",
812 NEnforced);
813 /* enforce all histograms to have the same weight as the very first histogram */
815 for (j = 0; j < nWindows; ++j)
817 for (k = 0; k < window[j].nPull; ++k)
819 ratio = 1.0 * NEnforced / window[j].Ntot[k];
820 for (i = 0; i < window[0].nBin; ++i)
822 window[j].Histo[k][i] *= ratio;
824 window[j].N[k] = gmx::roundToInt(ratio * window[j].N[k]);
829 /*! \brief Simple linear interpolation between two given tabulated points
831 static double tabulated_pot(double dist, t_UmbrellaOptions* opt)
833 int jl, ju;
834 double pl, pu, dz, dp;
836 jl = static_cast<int>(std::floor((dist - opt->tabMin) / opt->tabDz));
837 ju = jl + 1;
838 if (jl < 0 || ju >= opt->tabNbins)
840 gmx_fatal(FARGS,
841 "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
842 "Provide an extended table.",
843 dist, jl, ju);
845 pl = opt->tabY[jl];
846 pu = opt->tabY[ju];
847 dz = dist - opt->tabX[jl];
848 dp = (pu - pl) * dz / opt->tabDz;
849 return pl + dp;
853 /*! \brief
854 * Check which bins substiantially contribute (accelerates WHAM)
856 * Don't worry, that routine does not mean we compute the PMF in limited precision.
857 * After rapid convergence (using only substiantal contributions), we always switch to
858 * full precision.
860 static void setup_acc_wham(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
862 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
863 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
864 gmx_bool bAnyContrib;
865 static int bFirst = 1;
866 static double wham_contrib_lim;
868 if (bFirst)
870 for (i = 0; i < nWindows; ++i)
872 nGrptot += window[i].nPull;
874 wham_contrib_lim = opt->Tolerance / nGrptot;
877 ztot = opt->max - opt->min;
878 ztot_half = ztot / 2;
880 for (i = 0; i < nWindows; ++i)
882 if (!window[i].bContrib)
884 snew(window[i].bContrib, window[i].nPull);
886 for (j = 0; j < window[i].nPull; ++j)
888 if (!window[i].bContrib[j])
890 snew(window[i].bContrib[j], opt->bins);
892 bAnyContrib = FALSE;
893 for (k = 0; k < opt->bins; ++k)
895 temp = (1.0 * k + 0.5) * dz + min;
896 distance = temp - window[i].pos[j]; /* distance to umbrella center */
897 if (opt->bCycl)
898 { /* in cyclic wham: */
899 if (distance > ztot_half) /* |distance| < ztot_half */
901 distance -= ztot;
903 else if (distance < -ztot_half)
905 distance += ztot;
908 /* Note: there are two contributions to bin k in the wham equations:
909 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
910 ii) exp(- U/(BOLTZ*opt->Temperature))
911 where U is the umbrella potential
912 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
915 if (!opt->bTab)
917 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
919 else
921 U = tabulated_pot(distance, opt); /* Use tabulated potential */
923 contrib1 = profile[k] * std::exp(-U / (BOLTZ * opt->Temperature));
924 contrib2 = window[i].N[j] * std::exp(-U / (BOLTZ * opt->Temperature) + window[i].z[j]);
925 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
926 bAnyContrib = bAnyContrib || window[i].bContrib[j][k];
927 if (window[i].bContrib[j][k])
929 nContrib++;
931 nTot++;
933 /* If this histo is far outside min and max all bContrib may be FALSE,
934 causing a floating point exception later on. To avoid that, switch
935 them all to true.*/
936 if (!bAnyContrib)
938 for (k = 0; k < opt->bins; ++k)
940 window[i].bContrib[j][k] = TRUE;
945 if (bFirst)
947 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
948 "Evaluating only %d of %d expressions.\n\n",
949 wham_contrib_lim, nContrib, nTot);
952 if (opt->verbose)
954 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n", nContrib, nTot);
956 bFirst = 0;
959 //! Compute the PMF (one of the two main WHAM routines)
960 static void calc_profile(double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
962 double ztot_half, ztot, min = opt->min, dz = opt->dz;
964 ztot = opt->max - opt->min;
965 ztot_half = ztot / 2;
967 #pragma omp parallel
971 int nthreads = gmx_omp_get_max_threads();
972 int thread_id = gmx_omp_get_thread_num();
973 int i;
974 int i0 = thread_id * opt->bins / nthreads;
975 int i1 = std::min(opt->bins, ((thread_id + 1) * opt->bins) / nthreads);
977 for (i = i0; i < i1; ++i)
979 int j, k;
980 double num, denom, invg, temp = 0, distance, U = 0;
981 num = denom = 0.;
982 for (j = 0; j < nWindows; ++j)
984 for (k = 0; k < window[j].nPull; ++k)
986 invg = 1.0 / window[j].g[k] * window[j].bsWeight[k];
987 temp = (1.0 * i + 0.5) * dz + min;
988 num += invg * window[j].Histo[k][i];
990 if (!(bExact || window[j].bContrib[k][i]))
992 continue;
994 distance = temp - window[j].pos[k]; /* distance to umbrella center */
995 if (opt->bCycl)
996 { /* in cyclic wham: */
997 if (distance > ztot_half) /* |distance| < ztot_half */
999 distance -= ztot;
1001 else if (distance < -ztot_half)
1003 distance += ztot;
1007 if (!opt->bTab)
1009 U = 0.5 * window[j].k[k] * gmx::square(distance); /* harmonic potential assumed. */
1011 else
1013 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1015 denom += invg * window[j].N[k]
1016 * std::exp(-U / (BOLTZ * opt->Temperature) + window[j].z[k]);
1019 profile[i] = num / denom;
1022 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1026 //! Compute the free energy offsets z (one of the two main WHAM routines)
1027 static double calc_z(const double* profile, t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, gmx_bool bExact)
1029 double min = opt->min, dz = opt->dz, ztot_half, ztot;
1030 double maxglob = -1e20;
1032 ztot = opt->max - opt->min;
1033 ztot_half = ztot / 2;
1035 #pragma omp parallel
1039 int nthreads = gmx_omp_get_max_threads();
1040 int thread_id = gmx_omp_get_thread_num();
1041 int i;
1042 int i0 = thread_id * nWindows / nthreads;
1043 int i1 = std::min(nWindows, ((thread_id + 1) * nWindows) / nthreads);
1044 double maxloc = -1e20;
1046 for (i = i0; i < i1; ++i)
1048 double total = 0, temp, distance, U = 0;
1049 int j, k;
1051 for (j = 0; j < window[i].nPull; ++j)
1053 total = 0;
1054 for (k = 0; k < window[i].nBin; ++k)
1056 if (!(bExact || window[i].bContrib[j][k]))
1058 continue;
1060 temp = (1.0 * k + 0.5) * dz + min;
1061 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1062 if (opt->bCycl)
1063 { /* in cyclic wham: */
1064 if (distance > ztot_half) /* |distance| < ztot_half */
1066 distance -= ztot;
1068 else if (distance < -ztot_half)
1070 distance += ztot;
1074 if (!opt->bTab)
1076 U = 0.5 * window[i].k[j] * gmx::square(distance); /* harmonic potential assumed. */
1078 else
1080 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1082 total += profile[k] * std::exp(-U / (BOLTZ * opt->Temperature));
1084 /* Avoid floating point exception if window is far outside min and max */
1085 if (total != 0.0)
1087 total = -std::log(total);
1089 else
1091 total = 1000.0;
1093 temp = std::abs(total - window[i].z[j]);
1094 if (temp > maxloc)
1096 maxloc = temp;
1098 window[i].z[j] = total;
1101 /* Now get maximum maxloc from the threads and put in maxglob */
1102 if (maxloc > maxglob)
1104 #pragma omp critical
1106 if (maxloc > maxglob)
1108 maxglob = maxloc;
1113 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1116 return maxglob;
1119 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1120 static void symmetrizeProfile(double* profile, t_UmbrellaOptions* opt)
1122 int i, j, bins = opt->bins;
1123 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1124 double z, z1;
1126 if (min > 0. || max < 0.)
1128 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n", opt->min,
1129 opt->max);
1132 snew(prof2, bins);
1134 for (i = 0; i < bins; i++)
1136 z = min + (i + 0.5) * dz;
1137 zsym = -z;
1138 /* bin left of zsym */
1139 j = gmx::roundToInt((zsym - min) / dz) - 1;
1140 if (j >= 0 && (j + 1) < bins)
1142 /* interpolate profile linearly between bins j and j+1 */
1143 z1 = min + (j + 0.5) * dz;
1144 deltaz = zsym - z1;
1145 profsym = profile[j] + (profile[j + 1] - profile[j]) / dz * deltaz;
1146 /* average between left and right */
1147 prof2[i] = 0.5 * (profsym + profile[i]);
1149 else
1151 prof2[i] = profile[i];
1155 std::memcpy(profile, prof2, bins * sizeof(double));
1156 sfree(prof2);
1159 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1160 static void prof_normalization_and_unit(double* profile, t_UmbrellaOptions* opt)
1162 int i, bins, imin;
1163 double unit_factor = 1., diff;
1165 bins = opt->bins;
1167 /* Not log? Nothing to do! */
1168 if (!opt->bLog)
1170 return;
1173 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1174 if (opt->unit == en_kT)
1176 unit_factor = 1.0;
1178 else if (opt->unit == en_kJ)
1180 unit_factor = BOLTZ * opt->Temperature;
1182 else if (opt->unit == en_kCal)
1184 unit_factor = (BOLTZ / CAL2JOULE) * opt->Temperature;
1186 else
1188 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1191 for (i = 0; i < bins; i++)
1193 if (profile[i] > 0.0)
1195 profile[i] = -std::log(profile[i]) * unit_factor;
1199 /* shift to zero at z=opt->zProf0 */
1200 if (!opt->bProf0Set)
1202 diff = profile[0];
1204 else
1206 /* Get bin with shortest distance to opt->zProf0
1207 (-0.5 from bin position and +0.5 from rounding cancel) */
1208 imin = static_cast<int>((opt->zProf0 - opt->min) / opt->dz);
1209 if (imin < 0)
1211 imin = 0;
1213 else if (imin >= bins)
1215 imin = bins - 1;
1217 diff = profile[imin];
1220 /* Shift to zero */
1221 for (i = 0; i < bins; i++)
1223 profile[i] -= diff;
1227 //! Make an array of random integers (used for bootstrapping)
1228 static void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine* rng)
1230 gmx::UniformIntDistribution<int> dist(0, blockLength - 1);
1232 int ipull, blockBase, nr, ipullRandom;
1234 if (blockLength == 0)
1236 blockLength = nPull;
1239 for (ipull = 0; ipull < nPull; ipull++)
1241 blockBase = (ipull / blockLength) * blockLength;
1243 { /* make sure nothing bad happens in the last block */
1244 nr = dist(*rng); // [0,blockLength-1]
1245 ipullRandom = blockBase + nr;
1246 } while (ipullRandom >= nPull);
1247 if (ipullRandom < 0 || ipullRandom >= nPull)
1249 gmx_fatal(FARGS,
1250 "Ups, random iWin = %d, nPull = %d, nr = %d, "
1251 "blockLength = %d, blockBase = %d\n",
1252 ipullRandom, nPull, nr, blockLength, blockBase);
1254 randomArray[ipull] = ipullRandom;
1258 /*! \brief Set pull group information of a synthetic histogram
1260 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1261 * but it is not required if we bootstrap complete histograms.
1263 static void copy_pullgrp_to_synthwindow(t_UmbrellaWindow* synthWindow, t_UmbrellaWindow* thisWindow, int pullid)
1265 synthWindow->N[0] = thisWindow->N[pullid];
1266 synthWindow->Histo[0] = thisWindow->Histo[pullid];
1267 synthWindow->pos[0] = thisWindow->pos[pullid];
1268 synthWindow->z[0] = thisWindow->z[pullid];
1269 synthWindow->k[0] = thisWindow->k[pullid];
1270 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1271 synthWindow->g[0] = thisWindow->g[pullid];
1272 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1275 /*! \brief Calculate cumulative distribution function of of all histograms.
1277 * This allow to create random number sequences
1278 * which are distributed according to the histograms. Required to generate
1279 * the "synthetic" histograms for the Bootstrap method
1281 static void calc_cumulatives(t_UmbrellaWindow* window,
1282 int nWindows,
1283 t_UmbrellaOptions* opt,
1284 const char* fnhist,
1285 const char* xlabel)
1287 int i, j, k, nbin;
1288 double last;
1289 std::string fn;
1290 FILE* fp = nullptr;
1292 if (opt->bs_verbose)
1294 fn = gmx::Path::concatenateBeforeExtension(fnhist, "_cumul");
1295 fp = xvgropen(fn.c_str(), "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1298 nbin = opt->bins;
1299 for (i = 0; i < nWindows; i++)
1301 snew(window[i].cum, window[i].nPull);
1302 for (j = 0; j < window[i].nPull; j++)
1304 snew(window[i].cum[j], nbin + 1);
1305 window[i].cum[j][0] = 0.;
1306 for (k = 1; k <= nbin; k++)
1308 window[i].cum[j][k] = window[i].cum[j][k - 1] + window[i].Histo[j][k - 1];
1311 /* normalize CDFs. Ensure cum[nbin]==1 */
1312 last = window[i].cum[j][nbin];
1313 for (k = 0; k <= nbin; k++)
1315 window[i].cum[j][k] /= last;
1320 printf("Cumulative distribution functions of all histograms created.\n");
1321 if (opt->bs_verbose)
1323 for (k = 0; k <= nbin; k++)
1325 fprintf(fp, "%g\t", opt->min + k * opt->dz);
1326 for (i = 0; i < nWindows; i++)
1328 for (j = 0; j < window[i].nPull; j++)
1330 fprintf(fp, "%g\t", window[i].cum[j][k]);
1333 fprintf(fp, "\n");
1335 printf("Wrote cumulative distribution functions to %s\n", fn.c_str());
1336 xvgrclose(fp);
1341 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1343 * This is used to generate a random sequence distributed according to a histogram
1345 static void searchCumulative(const double xx[], int n, double x, int* j)
1347 int ju, jm, jl;
1349 jl = -1;
1350 ju = n;
1351 while (ju - jl > 1)
1353 jm = (ju + jl) >> 1;
1354 if (x >= xx[jm])
1356 jl = jm;
1358 else
1360 ju = jm;
1363 if (x == xx[0])
1365 *j = 0;
1367 else if (x == xx[n - 1])
1369 *j = n - 2;
1371 else
1373 *j = jl;
1377 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1378 static void create_synthetic_histo(t_UmbrellaWindow* synthWindow,
1379 t_UmbrellaWindow* thisWindow,
1380 int pullid,
1381 t_UmbrellaOptions* opt)
1383 int N, i, nbins, r_index, ibin;
1384 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1385 char errstr[1024];
1387 N = thisWindow->N[pullid];
1388 dt = thisWindow->dt;
1389 nbins = thisWindow->nBin;
1391 /* tau = autocorrelation time */
1392 if (opt->tauBootStrap > 0.0)
1394 tausteps = opt->tauBootStrap / dt;
1396 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1398 /* calc tausteps from g=1+2tausteps */
1399 g = thisWindow->g[pullid];
1400 tausteps = (g - 1) / 2;
1402 else
1404 sprintf(errstr,
1405 "When generating hypothetical trajectories from given umbrella histograms,\n"
1406 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1407 "cannot be predicted. You have 3 options:\n"
1408 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1409 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1410 std::strcat(errstr,
1411 " with option -iiact for all umbrella windows.\n"
1412 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1413 " Use option (3) only if you are sure what you're doing, you may severely\n"
1414 " underestimate the error if a too small ACT is given.\n");
1415 gmx_fatal(FARGS, "%s", errstr);
1418 synthWindow->N[0] = N;
1419 synthWindow->pos[0] = thisWindow->pos[pullid];
1420 synthWindow->z[0] = thisWindow->z[pullid];
1421 synthWindow->k[0] = thisWindow->k[pullid];
1422 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1423 synthWindow->g[0] = thisWindow->g[pullid];
1424 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1426 for (i = 0; i < nbins; i++)
1428 synthWindow->Histo[0][i] = 0.;
1431 if (opt->bsMethod == bsMethod_trajGauss)
1433 sig = thisWindow->sigma[pullid];
1434 mu = thisWindow->aver[pullid];
1437 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1438 Use the following:
1439 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1440 then
1441 z = a*x + sqrt(1-a^2)*y
1442 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1443 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1444 function
1445 C(t) = exp(-t/tau) with tau=-1/ln(a)
1447 Then, use error function to turn the Gaussian random variable into a uniformly
1448 distributed one in [0,1]. Eventually, use cumulative distribution function of
1449 histogram to get random variables distributed according to histogram.
1450 Note: The ACT of the flat distribution and of the generated histogram is not
1451 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1453 a = std::exp(-1.0 / tausteps);
1454 ap = std::sqrt(1.0 - a * a);
1455 invsqrt2 = 1.0 / std::sqrt(2.0);
1457 /* init random sequence */
1458 x = opt->normalDistribution(opt->rng);
1460 if (opt->bsMethod == bsMethod_traj)
1462 /* bootstrap points from the umbrella histograms */
1463 for (i = 0; i < N; i++)
1465 y = opt->normalDistribution(opt->rng);
1466 x = a * x + ap * y;
1467 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1468 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1470 r = 0.5 * (1 + std::erf(x * invsqrt2));
1471 searchCumulative(thisWindow->cum[pullid], nbins + 1, r, &r_index);
1472 synthWindow->Histo[0][r_index] += 1.;
1475 else if (opt->bsMethod == bsMethod_trajGauss)
1477 /* bootstrap points from a Gaussian with the same average and sigma
1478 as the respective umbrella histogram. The idea was, that -given
1479 limited sampling- the bootstrapped histograms are otherwise biased
1480 from the limited sampling of the US histos. However, bootstrapping from
1481 the Gaussian seems to yield a similar estimate. */
1482 i = 0;
1483 while (i < N)
1485 y = opt->normalDistribution(opt->rng);
1486 x = a * x + ap * y;
1487 z = x * sig + mu;
1488 ibin = static_cast<int>(std::floor((z - opt->min) / opt->dz));
1489 if (opt->bCycl)
1491 if (ibin < 0)
1493 while ((ibin += nbins) < 0) {}
1495 else if (ibin >= nbins)
1497 while ((ibin -= nbins) >= nbins) {}
1501 if (ibin >= 0 && ibin < nbins)
1503 synthWindow->Histo[0][ibin] += 1.;
1504 i++;
1508 else
1510 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1514 /*! \brief Write all histograms to a file
1516 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1517 * sets of bootstrapped histograms.
1519 static void print_histograms(const char* fnhist,
1520 t_UmbrellaWindow* window,
1521 int nWindows,
1522 int bs_index,
1523 t_UmbrellaOptions* opt,
1524 const char* xlabel)
1526 std::string fn, title;
1527 FILE* fp;
1528 int bins, l, i, j;
1530 if (bs_index >= 0)
1532 fn = gmx::Path::concatenateBeforeExtension(fnhist, gmx::formatString("_bs%d", bs_index));
1533 title = gmx::formatString("Umbrella histograms. Bootstrap #%d", bs_index);
1535 else
1537 fn = gmx_strdup(fnhist);
1538 title = gmx::formatString("Umbrella histograms");
1541 fp = xvgropen(fn.c_str(), title.c_str(), xlabel, "count", opt->oenv);
1542 bins = opt->bins;
1544 /* Write histograms */
1545 for (l = 0; l < bins; ++l)
1547 fprintf(fp, "%e\t", (l + 0.5) * opt->dz + opt->min);
1548 for (i = 0; i < nWindows; ++i)
1550 for (j = 0; j < window[i].nPull; ++j)
1552 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1555 fprintf(fp, "\n");
1558 xvgrclose(fp);
1559 printf("Wrote %s\n", fn.c_str());
1562 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1563 static void setRandomBsWeights(t_UmbrellaWindow* synthwin, int nAllPull, t_UmbrellaOptions* opt)
1565 int i;
1566 double* r;
1567 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1569 snew(r, nAllPull);
1571 /* generate ordered random numbers between 0 and nAllPull */
1572 for (i = 0; i < nAllPull - 1; i++)
1574 r[i] = dist(opt->rng);
1576 std::sort(r, r + nAllPull - 1);
1577 r[nAllPull - 1] = 1.0 * nAllPull;
1579 synthwin[0].bsWeight[0] = r[0];
1580 for (i = 1; i < nAllPull; i++)
1582 synthwin[i].bsWeight[0] = r[i] - r[i - 1];
1585 /* avoid to have zero weight by adding a tiny value */
1586 for (i = 0; i < nAllPull; i++)
1588 if (synthwin[i].bsWeight[0] < 1e-5)
1590 synthwin[i].bsWeight[0] = 1e-5;
1594 sfree(r);
1597 //! The main bootstrapping routine
1598 static void do_bootstrapping(const char* fnres,
1599 const char* fnprof,
1600 const char* fnhist,
1601 const char* xlabel,
1602 char* ylabel,
1603 double* profile,
1604 t_UmbrellaWindow* window,
1605 int nWindows,
1606 t_UmbrellaOptions* opt)
1608 t_UmbrellaWindow* synthWindow;
1609 double * bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1610 int i, j, *randomArray = nullptr, winid, pullid, ib;
1611 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1612 FILE* fp;
1613 gmx_bool bExact = FALSE;
1615 /* init random generator */
1616 if (opt->bsSeed == 0)
1618 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1620 opt->rng.seed(opt->bsSeed);
1622 snew(bsProfile, opt->bins);
1623 snew(bsProfiles_av, opt->bins);
1624 snew(bsProfiles_av2, opt->bins);
1626 /* Create array of all pull groups. Note that different windows
1627 may have different nr of pull groups
1628 First: Get total nr of pull groups */
1629 nAllPull = 0;
1630 for (i = 0; i < nWindows; i++)
1632 nAllPull += window[i].nPull;
1634 snew(allPull_winId, nAllPull);
1635 snew(allPull_pullId, nAllPull);
1636 iAllPull = 0;
1637 /* Setup one array of all pull groups */
1638 for (i = 0; i < nWindows; i++)
1640 for (j = 0; j < window[i].nPull; j++)
1642 allPull_winId[iAllPull] = i;
1643 allPull_pullId[iAllPull] = j;
1644 iAllPull++;
1648 /* setup stuff for synthetic windows */
1649 snew(synthWindow, nAllPull);
1650 for (i = 0; i < nAllPull; i++)
1652 synthWindow[i].nPull = 1;
1653 synthWindow[i].nBin = opt->bins;
1654 snew(synthWindow[i].Histo, 1);
1655 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1657 snew(synthWindow[i].Histo[0], opt->bins);
1659 snew(synthWindow[i].N, 1);
1660 snew(synthWindow[i].pos, 1);
1661 snew(synthWindow[i].z, 1);
1662 snew(synthWindow[i].k, 1);
1663 snew(synthWindow[i].bContrib, 1);
1664 snew(synthWindow[i].g, 1);
1665 snew(synthWindow[i].bsWeight, 1);
1668 switch (opt->bsMethod)
1670 case bsMethod_hist:
1671 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1672 please_cite(stdout, "Hub2006");
1673 break;
1674 case bsMethod_BayesianHist:
1675 /* just copy all histogams into synthWindow array */
1676 for (i = 0; i < nAllPull; i++)
1678 winid = allPull_winId[i];
1679 pullid = allPull_pullId[i];
1680 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1682 break;
1683 case bsMethod_traj:
1684 case bsMethod_trajGauss: calc_cumulatives(window, nWindows, opt, fnhist, xlabel); break;
1685 default: gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1688 /* do bootstrapping */
1689 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1690 for (ib = 0; ib < opt->nBootStrap; ib++)
1692 printf(" *******************************************\n"
1693 " ******** Start bootstrap nr %d ************\n"
1694 " *******************************************\n",
1695 ib + 1);
1697 switch (opt->bsMethod)
1699 case bsMethod_hist:
1700 /* bootstrap complete histograms from given histograms */
1701 srenew(randomArray, nAllPull);
1702 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1703 for (i = 0; i < nAllPull; i++)
1705 winid = allPull_winId[randomArray[i]];
1706 pullid = allPull_pullId[randomArray[i]];
1707 copy_pullgrp_to_synthwindow(synthWindow + i, window + winid, pullid);
1709 break;
1710 case bsMethod_BayesianHist:
1711 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1712 setRandomBsWeights(synthWindow, nAllPull, opt);
1713 break;
1714 case bsMethod_traj:
1715 case bsMethod_trajGauss:
1716 /* create new histos from given histos, that is generate new hypothetical
1717 trajectories */
1718 for (i = 0; i < nAllPull; i++)
1720 winid = allPull_winId[i];
1721 pullid = allPull_pullId[i];
1722 create_synthetic_histo(synthWindow + i, window + winid, pullid, opt);
1724 break;
1727 /* write histos in case of verbose output */
1728 if (opt->bs_verbose)
1730 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1733 /* do wham */
1734 i = 0;
1735 bExact = FALSE;
1736 maxchange = 1e20;
1737 std::memcpy(bsProfile, profile, opt->bins * sizeof(double)); /* use profile as guess */
1740 if ((i % opt->stepUpdateContrib) == 0)
1742 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1744 if (maxchange < opt->Tolerance)
1746 bExact = TRUE;
1748 if (((i % opt->stepchange) == 0 || i == 1) && i != 0)
1750 printf("\t%4d) Maximum change %e\n", i, maxchange);
1752 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1753 i++;
1754 } while ((maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance
1755 || !bExact);
1756 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1758 if (opt->bLog)
1760 prof_normalization_and_unit(bsProfile, opt);
1763 /* symmetrize profile around z=0 */
1764 if (opt->bSym)
1766 symmetrizeProfile(bsProfile, opt);
1769 /* save stuff to get average and stddev */
1770 for (i = 0; i < opt->bins; i++)
1772 tmp = bsProfile[i];
1773 bsProfiles_av[i] += tmp;
1774 bsProfiles_av2[i] += tmp * tmp;
1775 fprintf(fp, "%e\t%e\n", (i + 0.5) * opt->dz + opt->min, tmp);
1777 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1779 xvgrclose(fp);
1781 /* write average and stddev */
1782 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1783 if (output_env_get_print_xvgr_codes(opt->oenv))
1785 fprintf(fp, "@TYPE xydy\n");
1787 for (i = 0; i < opt->bins; i++)
1789 bsProfiles_av[i] /= opt->nBootStrap;
1790 bsProfiles_av2[i] /= opt->nBootStrap;
1791 tmp = bsProfiles_av2[i] - gmx::square(bsProfiles_av[i]);
1792 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1793 fprintf(fp, "%e\t%e\t%e\n", (i + 0.5) * opt->dz + opt->min, bsProfiles_av[i], stddev);
1795 xvgrclose(fp);
1796 printf("Wrote boot strap result to %s\n", fnres);
1799 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1800 static int whaminFileType(char* fn)
1802 int len;
1803 len = std::strlen(fn);
1804 if (std::strcmp(fn + len - 3, "tpr") == 0)
1806 return whamin_tpr;
1808 else if (std::strcmp(fn + len - 3, "xvg") == 0 || std::strcmp(fn + len - 6, "xvg.gz") == 0)
1810 return whamin_pullxf;
1812 else if (std::strcmp(fn + len - 3, "pdo") == 0 || std::strcmp(fn + len - 6, "pdo.gz") == 0)
1814 return whamin_pdo;
1816 else
1818 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1822 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1823 static void read_wham_in(const char* fn, char*** filenamesRet, int* nfilesRet, t_UmbrellaOptions* opt)
1825 char **filename = nullptr, tmp[WHAM_MAXFILELEN + 2];
1826 int nread, sizenow, i, block = 1;
1827 FILE* fp;
1829 fp = gmx_ffopen(fn, "r");
1830 nread = 0;
1831 sizenow = 0;
1832 while (fgets(tmp, sizeof(tmp), fp) != nullptr)
1834 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1836 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1838 if (nread >= sizenow)
1840 sizenow += block;
1841 srenew(filename, sizenow);
1842 for (i = sizenow - block; i < sizenow; i++)
1844 snew(filename[i], WHAM_MAXFILELEN);
1847 /* remove newline if there is one */
1848 if (tmp[std::strlen(tmp) - 1] == '\n')
1850 tmp[std::strlen(tmp) - 1] = '\0';
1852 std::strcpy(filename[nread], tmp);
1853 if (opt->verbose)
1855 printf("Found file %s in %s\n", filename[nread], fn);
1857 nread++;
1859 *filenamesRet = filename;
1860 *nfilesRet = nread;
1863 //! Open a file or a pipe to a gzipped file
1864 static FILE* open_pdo_pipe(const char* fn, t_UmbrellaOptions* opt, gmx_bool* bPipeOpen)
1866 char Buffer[2048], gunzip[1024], *Path = nullptr;
1867 FILE* pipe = nullptr;
1868 static gmx_bool bFirst = true;
1870 /* gzipped pdo file? */
1871 if ((std::strcmp(fn + std::strlen(fn) - 3, ".gz") == 0))
1873 /* search gunzip executable */
1874 if (!(Path = getenv("GMX_PATH_GZIP")))
1876 if (gmx_fexist("/bin/gunzip"))
1878 sprintf(gunzip, "%s", "/bin/gunzip");
1880 else if (gmx_fexist("/usr/bin/gunzip"))
1882 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1884 else
1886 gmx_fatal(FARGS,
1887 "Cannot find executable gunzip in /bin or /usr/bin.\n"
1888 "You may want to define the path to gunzip "
1889 "with the environment variable GMX_PATH_GZIP.");
1892 else
1894 sprintf(gunzip, "%s/gunzip", Path);
1895 if (!gmx_fexist(gunzip))
1897 gmx_fatal(FARGS,
1898 "Cannot find executable %s. Please define the path to gunzip"
1899 " in the environmental varialbe GMX_PATH_GZIP.",
1900 gunzip);
1903 if (bFirst)
1905 printf("Using gunzip executable %s\n", gunzip);
1906 bFirst = false;
1908 if (!gmx_fexist(fn))
1910 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1912 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1913 if (opt->verbose)
1915 printf("Executing command '%s'\n", Buffer);
1917 #if HAVE_PIPES
1918 if ((pipe = popen(Buffer, "r")) == nullptr)
1920 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1922 #else
1923 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1924 #endif
1925 *bPipeOpen = TRUE;
1927 else
1929 pipe = gmx_ffopen(fn, "r");
1930 *bPipeOpen = FALSE;
1933 return pipe;
1936 //! Close file or pipe
1937 static void pdo_close_file(FILE* fp)
1939 #if HAVE_PIPES
1940 pclose(fp);
1941 #else
1942 gmx_ffclose(fp);
1943 #endif
1946 //! Reading all pdo files
1947 static void read_pdo_files(char** fn, int nfiles, t_UmbrellaHeader* header, t_UmbrellaWindow* window, t_UmbrellaOptions* opt)
1949 FILE* file;
1950 real mintmp, maxtmp, done = 0.;
1951 int i;
1952 gmx_bool bPipeOpen;
1953 /* char Buffer0[1000]; */
1955 if (nfiles < 1)
1957 gmx_fatal(FARGS, "No files found. Hick.");
1960 /* if min and max are not given, get min and max from the input files */
1961 if (opt->bAuto)
1963 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1964 opt->min = 1e20;
1965 opt->max = -1e20;
1966 for (i = 0; i < nfiles; ++i)
1968 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1969 /*fgets(Buffer0,999,file);
1970 fprintf(stderr,"First line '%s'\n",Buffer0); */
1971 done = 100.0 * (i + 1) / nfiles;
1972 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done);
1973 fflush(stdout);
1974 if (opt->verbose)
1976 printf("\n");
1978 read_pdo_header(file, header, opt);
1979 /* here only determine min and max of this window */
1980 read_pdo_data(file, header, i, nullptr, opt, TRUE, &mintmp, &maxtmp);
1981 if (maxtmp > opt->max)
1983 opt->max = maxtmp;
1985 if (mintmp < opt->min)
1987 opt->min = mintmp;
1989 if (bPipeOpen)
1991 pdo_close_file(file);
1993 else
1995 gmx_ffclose(file);
1998 printf("\n");
1999 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2000 if (opt->bBoundsOnly)
2002 printf("Found option -boundsonly, now exiting.\n");
2003 exit(0);
2006 /* store stepsize in profile */
2007 opt->dz = (opt->max - opt->min) / opt->bins;
2009 /* Having min and max, we read in all files */
2010 /* Loop over all files */
2011 for (i = 0; i < nfiles; ++i)
2013 done = 100.0 * (i + 1) / nfiles;
2014 fprintf(stdout, "\rOpening %s ... [%2.0f%%]", fn[i], done);
2015 fflush(stdout);
2016 if (opt->verbose)
2018 printf("\n");
2020 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2021 read_pdo_header(file, header, opt);
2022 /* load data into window */
2023 read_pdo_data(file, header, i, window, opt, FALSE, nullptr, nullptr);
2024 if ((window + i)->Ntot[0] == 0)
2026 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2028 if (bPipeOpen)
2030 pdo_close_file(file);
2032 else
2034 gmx_ffclose(file);
2037 printf("\n");
2038 for (i = 0; i < nfiles; ++i)
2040 sfree(fn[i]);
2042 sfree(fn);
2045 //! translate 0/1 to N/Y to write pull dimensions
2046 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2048 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2049 static void read_tpr_header(const char* fn, t_UmbrellaHeader* header, t_UmbrellaOptions* opt, t_coordselection* coordsel)
2051 t_inputrec irInstance;
2052 t_inputrec* ir = &irInstance;
2053 t_state state;
2054 static int first = 1;
2056 /* printf("Reading %s \n",fn); */
2057 read_tpx_state(fn, ir, &state, nullptr);
2059 if (!ir->bPull)
2061 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2063 if (ir->pull->ncoord == 0)
2065 gmx_fatal(FARGS, "No pull coordinates found in %s", fn);
2068 /* Read overall pull info */
2069 header->npullcrds = ir->pull->ncoord;
2070 header->bPrintCOM = ir->pull->bPrintCOM;
2071 header->bPrintRefValue = ir->pull->bPrintRefValue;
2072 header->bPrintComp = ir->pull->bPrintComp;
2074 /* Read pull coordinates */
2075 snew(header->pcrd, header->npullcrds);
2076 for (int i = 0; i < ir->pull->ncoord; i++)
2078 header->pcrd[i].pull_type = ir->pull->coord[i].eType;
2079 header->pcrd[i].geometry = ir->pull->coord[i].eGeom;
2080 header->pcrd[i].ngroup = ir->pull->coord[i].ngroup;
2081 /* Angle type coordinates are handled fully in degrees in gmx wham.
2082 * The pull "distance" appears with a power of -2 in the force constant,
2083 * so we need to multiply with the internal units (radians for angle)
2084 * to user units (degrees for an angle) with the same power.
2086 header->pcrd[i].k =
2087 ir->pull->coord[i].k
2088 / gmx::square(pull_conversion_factor_internal2userinput(&ir->pull->coord[i]));
2089 header->pcrd[i].init_dist = ir->pull->coord[i].init;
2091 copy_ivec(ir->pull->coord[i].dim, header->pcrd[i].dim);
2092 header->pcrd[i].ndim =
2093 header->pcrd[i].dim[XX] + header->pcrd[i].dim[YY] + header->pcrd[i].dim[ZZ];
2095 std::strcpy(header->pcrd[i].coord_unit, pull_coordinate_units(&ir->pull->coord[i]));
2097 if (ir->efep != efepNO && ir->pull->coord[i].k != ir->pull->coord[i].kB)
2099 gmx_fatal(FARGS,
2100 "Seems like you did free-energy perturbation, and you perturbed the force "
2101 "constant."
2102 " This is not supported.\n");
2104 if (coordsel && (coordsel->n != ir->pull->ncoord))
2106 gmx_fatal(FARGS,
2107 "Found %d pull coordinates in %s, but %d columns in the respective line\n"
2108 "coordinate selection file (option -is)\n",
2109 ir->pull->ncoord, fn, coordsel->n);
2113 /* Check pull coords for consistency */
2114 int geom = -1;
2115 ivec thedim = { 0, 0, 0 };
2116 bool geometryIsSet = false;
2117 for (int i = 0; i < ir->pull->ncoord; i++)
2119 if (coordsel == nullptr || coordsel->bUse[i])
2121 if (header->pcrd[i].pull_type != epullUMBRELLA)
2123 gmx_fatal(FARGS,
2124 "%s: Pull coordinate %d is of type \"%s\", expected \"umbrella\". Only "
2125 "umbrella coodinates can enter WHAM.\n"
2126 "If you have umrella and non-umbrella coordinates, you can select the "
2127 "umbrella coordinates with gmx wham -is\n",
2128 fn, i + 1, epull_names[header->pcrd[i].pull_type]);
2130 if (!geometryIsSet)
2132 geom = header->pcrd[i].geometry;
2133 copy_ivec(header->pcrd[i].dim, thedim);
2134 geometryIsSet = true;
2136 if (geom != header->pcrd[i].geometry)
2138 gmx_fatal(FARGS,
2139 "%s: Your pull coordinates have different pull geometry (coordinate 1: "
2140 "%s, coordinate %d: %s)\n"
2141 "If you want to use only some pull coordinates in WHAM, please select "
2142 "them with option gmx wham -is\n",
2143 fn, epullg_names[geom], i + 1, epullg_names[header->pcrd[i].geometry]);
2145 if (thedim[XX] != header->pcrd[i].dim[XX] || thedim[YY] != header->pcrd[i].dim[YY]
2146 || thedim[ZZ] != header->pcrd[i].dim[ZZ])
2148 gmx_fatal(FARGS,
2149 "%s: Your pull coordinates have different pull dimensions (coordinate 1: "
2150 "%s %s %s, coordinate %d: %s %s %s)\n"
2151 "If you want to use only some pull coordinates in WHAM, please select "
2152 "them with option gmx wham -is\n",
2153 fn, int2YN(thedim[XX]), int2YN(thedim[YY]), int2YN(thedim[ZZ]), i + 1,
2154 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2155 int2YN(header->pcrd[i].dim[ZZ]));
2157 if (header->pcrd[i].geometry == epullgCYL)
2159 if (header->pcrd[i].dim[XX] || header->pcrd[i].dim[YY] || (!header->pcrd[i].dim[ZZ]))
2161 gmx_fatal(
2162 FARGS,
2163 "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2164 "However, found dimensions [%s %s %s]\n",
2165 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2166 int2YN(header->pcrd[i].dim[ZZ]));
2169 if (header->pcrd[i].k <= 0.0)
2171 gmx_fatal(FARGS,
2172 "%s: Pull coordinate %d has force constant of of %g.\n"
2173 "That doesn't seem to be an Umbrella tpr.\n",
2174 fn, i + 1, header->pcrd[i].k);
2179 if (opt->verbose || first)
2181 printf("\nFile %s, %d coordinates, with these options:\n", fn, header->npullcrds);
2182 int maxlen = 0;
2183 for (int i = 0; i < ir->pull->ncoord; i++)
2185 int lentmp = strlen(epullg_names[header->pcrd[i].geometry]);
2186 maxlen = (lentmp > maxlen) ? lentmp : maxlen;
2188 char fmt[STRLEN];
2189 sprintf(fmt,
2190 "\tGeometry %%-%ds k = %%-8g position = %%-8g dimensions [%%s %%s %%s] (%%d "
2191 "dimensions). Used: %%s\n",
2192 maxlen + 1);
2193 for (int i = 0; i < ir->pull->ncoord; i++)
2195 bool use = (coordsel == nullptr || coordsel->bUse[i]);
2196 printf(fmt, epullg_names[header->pcrd[i].geometry], header->pcrd[i].k, header->pcrd[i].init_dist,
2197 int2YN(header->pcrd[i].dim[XX]), int2YN(header->pcrd[i].dim[YY]),
2198 int2YN(header->pcrd[i].dim[ZZ]), header->pcrd[i].ndim, use ? "Yes" : "No");
2199 printf("\tPull group coordinates of %d groups expected in pullx files.\n",
2200 ir->pull->bPrintCOM ? header->pcrd[i].ngroup : 0);
2202 printf("\tReference value of the coordinate%s expected in pullx files.\n",
2203 header->bPrintRefValue ? "" : " not");
2205 if (!opt->verbose && first)
2207 printf("\tUse option -v to see this output for all input tpr files\n\n");
2210 first = 0;
2213 //! Read pullx.xvg or pullf.xvg
2214 static void read_pull_xf(const char* fn,
2215 t_UmbrellaHeader* header,
2216 t_UmbrellaWindow* window,
2217 t_UmbrellaOptions* opt,
2218 gmx_bool bGetMinMax,
2219 real* mintmp,
2220 real* maxtmp,
2221 t_coordselection* coordsel)
2223 double ** y = nullptr, pos = 0., t, force, time0 = 0., dt;
2224 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1;
2225 int nColExpect, ntot, column;
2226 real min, max, minfound = 1e20, maxfound = -1e20;
2227 gmx_bool dt_ok, timeok;
2228 const char* quantity;
2229 const int blocklen = 4096;
2230 int* lennow = nullptr;
2231 static gmx_bool bFirst = TRUE;
2234 * Data columns in pull output:
2235 * - in force output pullf.xvg:
2236 * No reference columns, one column per pull coordinate
2238 * - in position output pullx.xvg:
2239 * * optionally, ndim columns for COMs of all groups (depending on on mdp options pull-print-com);
2240 * * The displacement, always one column. Note: with pull-print-components = yes, the dx/dy/dz would
2241 * be written separately into pullx file, but this is not supported and throws an error below;
2242 * * optionally, the position of the reference coordinate (depending on pull-print-ref-value)
2245 if (header->bPrintComp && opt->bPullx)
2247 gmx_fatal(
2248 FARGS,
2249 "gmx wham cannot read pullx files if the components of the coordinate was written\n"
2250 "(mdp option pull-print-components). Provide the pull force files instead (with "
2251 "option -if).\n");
2254 int *nColThisCrd, *nColCOMCrd, *nColRefCrd;
2255 snew(nColThisCrd, header->npullcrds);
2256 snew(nColCOMCrd, header->npullcrds);
2257 snew(nColRefCrd, header->npullcrds);
2259 if (!opt->bPullx)
2261 /* pullf reading: simply one column per coordinate */
2262 for (g = 0; g < header->npullcrds; g++)
2264 nColThisCrd[g] = 1;
2265 nColCOMCrd[g] = 0;
2266 nColRefCrd[g] = 0;
2269 else
2271 /* pullx reading. Note explanation above. */
2272 for (g = 0; g < header->npullcrds; g++)
2274 nColRefCrd[g] = (header->bPrintRefValue ? 1 : 0);
2275 nColCOMCrd[g] = (header->bPrintCOM ? header->pcrd[g].ndim * header->pcrd[g].ngroup : 0);
2276 nColThisCrd[g] = 1 + nColCOMCrd[g] + nColRefCrd[g];
2280 nColExpect = 1; /* time column */
2281 for (g = 0; g < header->npullcrds; g++)
2283 nColExpect += nColThisCrd[g];
2286 /* read pullf or pullx. Could be optimized if min and max are given. */
2287 nt = read_xvg(fn, &y, &ny);
2289 /* Check consistency */
2290 quantity = opt->bPullx ? "position" : "force";
2291 if (nt < 1)
2293 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2295 if (bFirst || opt->verbose)
2297 printf("\nReading pull %s file %s, expecting %d columns:\n", quantity, fn, nColExpect);
2298 for (i = 0; i < header->npullcrds; i++)
2300 printf("\tColumns for pull coordinate %d\n", i + 1);
2301 printf("\t\treaction coordinate: %d\n"
2302 "\t\tcenter-of-mass of groups: %d\n"
2303 "\t\treference position column: %s\n",
2304 1, nColCOMCrd[i], (header->bPrintRefValue ? "Yes" : "No"));
2306 printf("\tFound %d times in %s\n", nt, fn);
2307 bFirst = FALSE;
2309 if (nColExpect != ny)
2311 gmx_fatal(FARGS,
2312 "Expected %d columns (including time column) in %s, but found %d."
2313 " Maybe you confused options -if and -ix ?",
2314 nColExpect, fn, ny);
2317 if (!bGetMinMax)
2319 assert(window);
2320 bins = opt->bins;
2321 min = opt->min;
2322 max = opt->max;
2323 if (nt > 1)
2325 window->dt = y[0][1] - y[0][0];
2327 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2329 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2332 /* Need to alocate memory and set up structure for windows */
2333 if (coordsel)
2335 /* Use only groups selected with option -is file */
2336 if (header->npullcrds != coordsel->n)
2338 gmx_fatal(FARGS,
2339 "tpr file contains %d pull groups, but expected %d from group selection "
2340 "file\n",
2341 header->npullcrds, coordsel->n);
2343 window->nPull = coordsel->nUse;
2345 else
2347 window->nPull = header->npullcrds;
2350 window->nBin = bins;
2351 snew(window->Histo, window->nPull);
2352 snew(window->z, window->nPull);
2353 snew(window->k, window->nPull);
2354 snew(window->pos, window->nPull);
2355 snew(window->N, window->nPull);
2356 snew(window->Ntot, window->nPull);
2357 snew(window->g, window->nPull);
2358 snew(window->bsWeight, window->nPull);
2359 window->bContrib = nullptr;
2361 if (opt->bCalcTauInt)
2363 snew(window->ztime, window->nPull);
2365 else
2367 window->ztime = nullptr;
2369 snew(lennow, window->nPull);
2371 for (g = 0; g < window->nPull; ++g)
2373 window->z[g] = 1;
2374 window->bsWeight[g] = 1.;
2375 window->N[g] = 0;
2376 window->Ntot[g] = 0;
2377 window->g[g] = 1.;
2378 snew(window->Histo[g], bins);
2380 if (opt->bCalcTauInt)
2382 window->ztime[g] = nullptr;
2386 /* Copying umbrella center and force const is more involved since not
2387 all pull groups from header (tpr file) may be used in window variable */
2388 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2390 if (coordsel && !coordsel->bUse[g])
2392 continue;
2394 window->k[gUsed] = header->pcrd[g].k;
2395 window->pos[gUsed] = header->pcrd[g].init_dist;
2396 gUsed++;
2399 else
2400 { /* only determine min and max */
2401 minfound = 1e20;
2402 maxfound = -1e20;
2403 min = max = bins = 0; /* Get rid of warnings */
2407 for (i = 0; i < nt; i++)
2409 /* Do you want that time frame? */
2410 t = 1.0 / 1000 * (gmx::roundToInt64((y[0][i] * 1000))); /* round time to fs */
2412 /* get time step of pdo file and get dstep from opt->dt */
2413 if (i == 0)
2415 time0 = t;
2417 else if (i == 1)
2419 dt = t - time0;
2420 if (opt->dt > 0.0)
2422 dstep = gmx::roundToInt(opt->dt / dt);
2423 if (dstep == 0)
2425 dstep = 1;
2428 if (!bGetMinMax)
2430 window->dt = dt * dstep;
2434 dt_ok = (i % dstep == 0);
2435 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2436 /*if (opt->verbose)
2437 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2438 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2439 if (timeok)
2441 /* Note: if coordsel == NULL:
2442 * all groups in pullf/x file are stored in this window, and gUsed == g
2443 * if coordsel != NULL:
2444 * only groups with coordsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2446 gUsed = -1;
2447 for (g = 0; g < header->npullcrds; ++g)
2449 /* was this group selected for application in WHAM? */
2450 if (coordsel && !coordsel->bUse[g])
2452 continue;
2454 gUsed++;
2456 if (opt->bPullf)
2458 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2459 force = y[g + 1][i];
2460 pos = -force / header->pcrd[g].k + header->pcrd[g].init_dist;
2462 else
2464 /* Pick the correct column index.
2465 Note that there is always exactly one displacement column.
2467 column = 1;
2468 for (int j = 0; j < g; j++)
2470 column += nColThisCrd[j];
2472 pos = y[column][i];
2475 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2476 if (bGetMinMax)
2478 if (pos < minfound)
2480 minfound = pos;
2482 if (pos > maxfound)
2484 maxfound = pos;
2487 else
2489 if (gUsed >= window->nPull)
2491 gmx_fatal(FARGS,
2492 "gUsed too large (%d, nPull=%d). This error should have been "
2493 "caught before.\n",
2494 gUsed, window->nPull);
2497 if (opt->bCalcTauInt && !bGetMinMax)
2499 /* save time series for autocorrelation analysis */
2500 ntot = window->Ntot[gUsed];
2501 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2502 if (ntot >= lennow[gUsed])
2504 lennow[gUsed] += blocklen;
2505 srenew(window->ztime[gUsed], lennow[gUsed]);
2507 window->ztime[gUsed][ntot] = pos;
2510 ibin = static_cast<int>(std::floor((pos - min) / (max - min) * bins));
2511 if (opt->bCycl)
2513 if (ibin < 0)
2515 while ((ibin += bins) < 0) {}
2517 else if (ibin >= bins)
2519 while ((ibin -= bins) >= bins) {}
2522 if (ibin >= 0 && ibin < bins)
2524 window->Histo[gUsed][ibin] += 1.;
2525 window->N[gUsed]++;
2527 window->Ntot[gUsed]++;
2531 else if (t > opt->tmax)
2533 if (opt->verbose)
2535 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2537 break;
2541 if (bGetMinMax)
2543 *mintmp = minfound;
2544 *maxtmp = maxfound;
2546 sfree(lennow);
2547 for (i = 0; i < ny; i++)
2549 sfree(y[i]);
2553 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2554 static void read_tpr_pullxf_files(char** fnTprs,
2555 char** fnPull,
2556 int nfiles,
2557 t_UmbrellaHeader* header,
2558 t_UmbrellaWindow* window,
2559 t_UmbrellaOptions* opt)
2561 int i;
2562 real mintmp, maxtmp;
2564 printf("Reading %d tpr and pullf files\n", nfiles);
2566 /* min and max not given? */
2567 if (opt->bAuto)
2569 printf("Automatic determination of boundaries...\n");
2570 opt->min = 1e20;
2571 opt->max = -1e20;
2572 for (i = 0; i < nfiles; i++)
2574 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2576 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2578 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2579 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2581 gmx_fatal(FARGS,
2582 "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2584 read_pull_xf(fnPull[i], header, nullptr, opt, TRUE, &mintmp, &maxtmp,
2585 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2586 if (maxtmp > opt->max)
2588 opt->max = maxtmp;
2590 if (mintmp < opt->min)
2592 opt->min = mintmp;
2595 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2596 if (opt->bBoundsOnly)
2598 printf("Found option -boundsonly, now exiting.\n");
2599 exit(0);
2602 /* store stepsize in profile */
2603 opt->dz = (opt->max - opt->min) / opt->bins;
2605 bool foundData = false;
2606 for (i = 0; i < nfiles; i++)
2608 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2610 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2612 read_tpr_header(fnTprs[i], header, opt, (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2613 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2615 gmx_fatal(FARGS,
2616 "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2618 read_pull_xf(fnPull[i], header, window + i, opt, FALSE, nullptr, nullptr,
2619 (opt->nCoordsel > 0) ? &opt->coordsel[i] : nullptr);
2620 if (window[i].Ntot[0] == 0)
2622 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2624 else
2626 foundData = true;
2629 if (!foundData)
2631 gmx_fatal(FARGS,
2632 "No data points were found in pullf/pullx files. Maybe you need to specify the "
2633 "-b option?\n");
2636 for (i = 0; i < nfiles; i++)
2638 sfree(fnTprs[i]);
2639 sfree(fnPull[i]);
2641 sfree(fnTprs);
2642 sfree(fnPull);
2645 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2647 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2648 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2650 static void readIntegratedAutocorrelationTimes(t_UmbrellaWindow* window, int nwins, const char* fn)
2652 int nlines, ny, i, ig;
2653 double** iact;
2655 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2656 nlines = read_xvg(fn, &iact, &ny);
2657 if (nlines != nwins)
2659 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2660 nlines, fn, nwins);
2662 for (i = 0; i < nlines; i++)
2664 if (window[i].nPull != ny)
2666 gmx_fatal(FARGS,
2667 "You are providing autocorrelation times with option -iiact and the\n"
2668 "number of pull groups is different in different simulations. That is not\n"
2669 "supported yet. Sorry.\n");
2671 for (ig = 0; ig < window[i].nPull; ig++)
2673 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2674 window[i].g[ig] = 1 + 2 * iact[ig][i] / window[i].dt;
2676 if (iact[ig][i] <= 0.0)
2678 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2685 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2687 * This is useful
2688 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2689 * that -in case of limited sampling- the ACT may be severely underestimated.
2690 * Note: the g=1+2tau are overwritten.
2691 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2692 * by the smoothing
2694 static void smoothIact(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
2696 int i, ig, j, jg;
2697 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2699 /* only evaluate within +- 3sigma of the Gausian */
2700 siglim = 3.0 * opt->sigSmoothIact;
2701 siglim2 = gmx::square(siglim);
2702 /* pre-factor of Gaussian */
2703 gaufact = 1.0 / (std::sqrt(2 * M_PI) * opt->sigSmoothIact);
2704 invtwosig2 = 0.5 / gmx::square(opt->sigSmoothIact);
2706 for (i = 0; i < nwins; i++)
2708 snew(window[i].tausmooth, window[i].nPull);
2709 for (ig = 0; ig < window[i].nPull; ig++)
2711 tausm = 0.;
2712 weight = 0;
2713 pos = window[i].pos[ig];
2714 for (j = 0; j < nwins; j++)
2716 for (jg = 0; jg < window[j].nPull; jg++)
2718 dpos2 = gmx::square(window[j].pos[jg] - pos);
2719 if (dpos2 < siglim2)
2721 w = gaufact * std::exp(-dpos2 * invtwosig2);
2722 weight += w;
2723 tausm += w * window[j].tau[jg];
2724 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2725 w,dpos2,pos,gaufact,invtwosig2); */
2729 tausm /= weight;
2730 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2732 window[i].tausmooth[ig] = tausm;
2734 else
2736 window[i].tausmooth[ig] = window[i].tau[ig];
2738 window[i].g[ig] = 1 + 2 * tausm / window[i].dt;
2743 //! Stop integrating autoccorelation function when ACF drops under this value
2744 #define WHAM_AC_ZERO_LIMIT 0.05
2746 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2748 static void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow* window,
2749 int nwins,
2750 t_UmbrellaOptions* opt,
2751 const char* fn,
2752 const char* xlabel)
2754 int i, ig, ncorr, ntot, j, k, *count, restart;
2755 real *corr, c0, dt, tmp;
2756 real *ztime, av, tausteps;
2757 FILE *fp, *fpcorr = nullptr;
2759 if (opt->verbose)
2761 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2762 "time [ps]", "autocorrelation function", opt->oenv);
2765 printf("\n");
2766 for (i = 0; i < nwins; i++)
2768 fprintf(stdout, "\rEstimating integrated autocorrelation times ... [%2.0f%%] ...",
2769 100. * (i + 1) / nwins);
2770 fflush(stdout);
2771 ntot = window[i].Ntot[0];
2773 /* using half the maximum time as length of autocorrelation function */
2774 ncorr = ntot / 2;
2775 if (ntot < 10)
2777 gmx_fatal(FARGS,
2778 "Tryig to estimtate autocorrelation time from only %d"
2779 " points. Provide more pull data!",
2780 ntot);
2782 snew(corr, ncorr);
2783 /* snew(corrSq,ncorr); */
2784 snew(count, ncorr);
2785 dt = window[i].dt;
2786 snew(window[i].tau, window[i].nPull);
2787 restart = gmx::roundToInt(opt->acTrestart / dt);
2788 if (restart == 0)
2790 restart = 1;
2793 for (ig = 0; ig < window[i].nPull; ig++)
2795 if (ntot != window[i].Ntot[ig])
2797 gmx_fatal(FARGS,
2798 "Encountered different nr of frames in different pull groups.\n"
2799 "That should not happen. (%d and %d)\n",
2800 ntot, window[i].Ntot[ig]);
2802 ztime = window[i].ztime[ig];
2804 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2805 for (j = 0, av = 0; (j < ntot); j++)
2807 av += ztime[j];
2809 av /= ntot;
2810 for (k = 0; (k < ncorr); k++)
2812 corr[k] = 0.;
2813 count[k] = 0;
2815 for (j = 0; (j < ntot); j += restart)
2817 for (k = 0; (k < ncorr) && (j + k < ntot); k++)
2819 tmp = (ztime[j] - av) * (ztime[j + k] - av);
2820 corr[k] += tmp;
2821 /* corrSq[k] += tmp*tmp; */
2822 count[k]++;
2825 /* divide by nr of frames for each time displacement */
2826 for (k = 0; (k < ncorr); k++)
2828 /* count probably = (ncorr-k+(restart-1))/restart; */
2829 corr[k] = corr[k] / count[k];
2830 /* variance of autocorrelation function */
2831 /* corrSq[k]=corrSq[k]/count[k]; */
2833 /* normalize such that corr[0] == 0 */
2834 c0 = 1. / corr[0];
2835 for (k = 0; (k < ncorr); k++)
2837 corr[k] *= c0;
2838 /* corrSq[k]*=c0*c0; */
2841 /* write ACFs in verbose mode */
2842 if (fpcorr)
2844 for (k = 0; (k < ncorr); k++)
2846 fprintf(fpcorr, "%g %g\n", k * dt, corr[k]);
2848 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2851 /* esimate integrated correlation time, fitting is too unstable */
2852 tausteps = 0.5 * corr[0];
2853 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2854 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2856 tausteps += corr[j];
2859 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2860 Kumar et al, eq. 28 ff. */
2861 window[i].tau[ig] = tausteps * dt;
2862 window[i].g[ig] = 1 + 2 * tausteps;
2863 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2864 } /* ig loop */
2865 sfree(corr);
2866 sfree(count);
2868 printf(" done\n");
2869 if (fpcorr)
2871 xvgrclose(fpcorr);
2874 /* plot IACT along reaction coordinate */
2875 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2876 if (output_env_get_print_xvgr_codes(opt->oenv))
2878 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2879 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2880 for (i = 0; i < nwins; i++)
2882 fprintf(fp, "# %3d ", i);
2883 for (ig = 0; ig < window[i].nPull; ig++)
2885 fprintf(fp, " %11g", window[i].tau[ig]);
2887 fprintf(fp, "\n");
2890 for (i = 0; i < nwins; i++)
2892 for (ig = 0; ig < window[i].nPull; ig++)
2894 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2897 if (opt->sigSmoothIact > 0.0)
2899 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = "
2900 "%g\n",
2901 opt->sigSmoothIact);
2902 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2903 smoothIact(window, nwins, opt);
2904 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2905 if (output_env_get_print_xvgr_codes(opt->oenv))
2907 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2908 fprintf(fp, "@ s1 symbol color 2\n");
2910 for (i = 0; i < nwins; i++)
2912 for (ig = 0; ig < window[i].nPull; ig++)
2914 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2918 xvgrclose(fp);
2919 printf("Wrote %s\n", fn);
2922 /*! \brief
2923 * compute average and sigma of each umbrella histogram
2925 static void averageSigma(t_UmbrellaWindow* window, int nwins)
2927 int i, ig, ntot, k;
2928 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2930 for (i = 0; i < nwins; i++)
2932 snew(window[i].aver, window[i].nPull);
2933 snew(window[i].sigma, window[i].nPull);
2935 ntot = window[i].Ntot[0];
2936 for (ig = 0; ig < window[i].nPull; ig++)
2938 ztime = window[i].ztime[ig];
2939 for (k = 0, av = 0.; k < ntot; k++)
2941 av += ztime[k];
2943 av /= ntot;
2944 for (k = 0, sum2 = 0.; k < ntot; k++)
2946 diff = ztime[k] - av;
2947 sum2 += diff * diff;
2949 sig = std::sqrt(sum2 / ntot);
2950 window[i].aver[ig] = av;
2952 /* Note: This estimate for sigma is biased from the limited sampling.
2953 Correct sigma by n/(n-1) where n = number of independent
2954 samples. Only possible if IACT is known.
2956 if (window[i].tau)
2958 nSamplesIndep = window[i].N[ig] / (window[i].tau[ig] / window[i].dt);
2959 window[i].sigma[ig] = sig * nSamplesIndep / (nSamplesIndep - 1);
2961 else
2963 window[i].sigma[ig] = sig;
2965 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2971 /*! \brief
2972 * Use histograms to compute average force on pull group.
2974 static void computeAverageForce(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt)
2976 int i, j, bins = opt->bins, k;
2977 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2978 double posmirrored;
2980 dz = (max - min) / bins;
2981 ztot = opt->max - min;
2982 ztot_half = ztot / 2;
2984 /* Compute average displacement from histograms */
2985 for (j = 0; j < nWindows; ++j)
2987 snew(window[j].forceAv, window[j].nPull);
2988 for (k = 0; k < window[j].nPull; ++k)
2990 displAv = 0.0;
2991 weight = 0.0;
2992 for (i = 0; i < opt->bins; ++i)
2994 temp = (1.0 * i + 0.5) * dz + min;
2995 distance = temp - window[j].pos[k];
2996 if (opt->bCycl)
2997 { /* in cyclic wham: */
2998 if (distance > ztot_half) /* |distance| < ztot_half */
3000 distance -= ztot;
3002 else if (distance < -ztot_half)
3004 distance += ztot;
3007 w = window[j].Histo[k][i] / window[j].g[k];
3008 displAv += w * distance;
3009 weight += w;
3010 /* Are we near min or max? We are getting wrong forces from the histgrams since
3011 the histograms are zero outside [min,max). Therefore, assume that the position
3012 on the other side of the histomgram center is equally likely. */
3013 if (!opt->bCycl)
3015 posmirrored = window[j].pos[k] - distance;
3016 if (posmirrored >= max || posmirrored < min)
3018 displAv += -w * distance;
3019 weight += w;
3023 displAv /= weight;
3025 /* average force from average displacement */
3026 window[j].forceAv[k] = displAv * window[j].k[k];
3027 /* sigma from average square displacement */
3028 /* window[j].sigma [k] = sqrt(displAv2); */
3029 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
3034 /*! \brief
3035 * Check if the complete reaction coordinate is covered by the histograms
3037 static void checkReactionCoordinateCovered(t_UmbrellaWindow* window, int nwins, t_UmbrellaOptions* opt)
3039 int i, ig, j, bins = opt->bins;
3040 bool bBoundary;
3041 real avcount = 0, z, relcount, *count;
3042 snew(count, opt->bins);
3044 for (j = 0; j < opt->bins; ++j)
3046 for (i = 0; i < nwins; i++)
3048 for (ig = 0; ig < window[i].nPull; ig++)
3050 count[j] += window[i].Histo[ig][j];
3053 avcount += 1.0 * count[j];
3055 avcount /= bins;
3056 for (j = 0; j < bins; ++j)
3058 relcount = count[j] / avcount;
3059 z = (j + 0.5) * opt->dz + opt->min;
3060 bBoundary = j < bins / 20 || (bins - j) > bins / 20;
3061 /* check for bins with no data */
3062 if (count[j] == 0)
3064 fprintf(stderr,
3065 "\nWARNING, no data point in bin %d (z=%g) !\n"
3066 "You may not get a reasonable profile. Check your histograms!\n",
3067 j, z);
3069 /* and check for poor sampling */
3070 else if (relcount < 0.005 && !bBoundary)
3072 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3075 sfree(count);
3078 /*! \brief Compute initial potential by integrating the average force
3080 * This speeds up the convergence by roughly a factor of 2
3082 static void guessPotByIntegration(t_UmbrellaWindow* window, int nWindows, t_UmbrellaOptions* opt, const char* xlabel)
3084 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3085 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3086 FILE* fp;
3088 dz = (opt->max - min) / bins;
3090 printf("Getting initial potential by integration.\n");
3092 /* Compute average displacement from histograms */
3093 computeAverageForce(window, nWindows, opt);
3095 /* Get force for each bin from all histograms in this bin, or, alternatively,
3096 if no histograms are inside this bin, from the closest histogram */
3097 snew(pot, bins);
3098 snew(f, bins);
3099 for (j = 0; j < opt->bins; ++j)
3101 pos = (1.0 * j + 0.5) * dz + min;
3102 nHist = 0;
3103 fAv = 0.;
3104 distmin = 1e20;
3105 groupmin = winmin = 0;
3106 for (i = 0; i < nWindows; i++)
3108 for (ig = 0; ig < window[i].nPull; ig++)
3110 hispos = window[i].pos[ig];
3111 dist = std::abs(hispos - pos);
3112 /* average force within bin */
3113 if (dist < dz / 2)
3115 nHist++;
3116 fAv += window[i].forceAv[ig];
3118 /* at the same time, remember closest histogram */
3119 if (dist < distmin)
3121 winmin = i;
3122 groupmin = ig;
3123 distmin = dist;
3127 /* if no histogram found in this bin, use closest histogram */
3128 if (nHist > 0)
3130 fAv = fAv / nHist;
3132 else
3134 fAv = window[winmin].forceAv[groupmin];
3136 f[j] = fAv;
3138 for (j = 1; j < opt->bins; ++j)
3140 pot[j] = pot[j - 1] - 0.5 * dz * (f[j - 1] + f[j]);
3143 /* cyclic wham: linearly correct possible offset */
3144 if (opt->bCycl)
3146 diff = (pot[bins - 1] - pot[0]) / (bins - 1);
3147 for (j = 1; j < opt->bins; ++j)
3149 pot[j] -= j * diff;
3152 if (opt->verbose)
3154 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3155 for (j = 0; j < opt->bins; ++j)
3157 fprintf(fp, "%g %g\n", (j + 0.5) * dz + opt->min, pot[j]);
3159 xvgrclose(fp);
3160 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3163 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3164 energy offsets which are usually determined by wham
3165 First: turn pot into probabilities:
3167 for (j = 0; j < opt->bins; ++j)
3169 pot[j] = std::exp(-pot[j] / (BOLTZ * opt->Temperature));
3171 calc_z(pot, window, nWindows, opt, TRUE);
3173 sfree(pot);
3174 sfree(f);
3177 //! Count number of words in an line
3178 static int wordcount(char* ptr)
3180 int i, n, is[2];
3181 int cur = 0;
3183 if (std::strlen(ptr) == 0)
3185 return 0;
3187 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3188 n = 1;
3189 for (i = 0; (ptr[i] != '\0'); i++)
3191 is[cur] = isspace(ptr[i]);
3192 if ((i > 0) && (is[cur] && !is[1 - cur]))
3194 n++;
3196 cur = 1 - cur;
3198 return n;
3201 /*! \brief Read input file for pull group selection (option -is)
3203 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3205 static void readPullCoordSelection(t_UmbrellaOptions* opt, char** fnTpr, int nTpr)
3207 FILE* fp;
3208 int i, iline, n, len = STRLEN, temp;
3209 char *ptr = nullptr, *tmpbuf = nullptr;
3210 char fmt[1024], fmtign[1024];
3211 int block = 1, sizenow;
3213 fp = gmx_ffopen(opt->fnCoordSel, "r");
3214 opt->coordsel = nullptr;
3216 snew(tmpbuf, len);
3217 sizenow = 0;
3218 iline = 0;
3219 while ((ptr = fgets3(fp, tmpbuf, &len)) != nullptr)
3221 trim(ptr);
3222 n = wordcount(ptr);
3224 if (iline >= sizenow)
3226 sizenow += block;
3227 srenew(opt->coordsel, sizenow);
3229 opt->coordsel[iline].n = n;
3230 opt->coordsel[iline].nUse = 0;
3231 snew(opt->coordsel[iline].bUse, n);
3233 fmtign[0] = '\0';
3234 for (i = 0; i < n; i++)
3236 std::strcpy(fmt, fmtign);
3237 std::strcat(fmt, "%d");
3238 if (sscanf(ptr, fmt, &temp))
3240 opt->coordsel[iline].bUse[i] = (temp > 0);
3241 if (opt->coordsel[iline].bUse[i])
3243 opt->coordsel[iline].nUse++;
3246 std::strcat(fmtign, "%*s");
3248 iline++;
3250 opt->nCoordsel = iline;
3251 if (nTpr != opt->nCoordsel)
3253 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nCoordsel, opt->fnCoordSel);
3256 printf("\nUse only these pull coordinates:\n");
3257 for (iline = 0; iline < nTpr; iline++)
3259 printf("%s (%d of %d coordinates):", fnTpr[iline], opt->coordsel[iline].nUse,
3260 opt->coordsel[iline].n);
3261 for (i = 0; i < opt->coordsel[iline].n; i++)
3263 if (opt->coordsel[iline].bUse[i])
3265 printf(" %d", i + 1);
3268 printf("\n");
3270 printf("\n");
3272 sfree(tmpbuf);
3275 //! Boolean XOR
3276 #define WHAMBOOLXOR(a, b) (((!(a)) && (b)) || ((a) && (!(b))))
3278 //! Number of elements in fnm (used for command line parsing)
3279 #define NFILE asize(fnm)
3281 //! The main gmx wham routine
3282 int gmx_wham(int argc, char* argv[])
3284 const char* desc[] = {
3285 "[THISMODULE] is an analysis program that implements the Weighted",
3286 "Histogram Analysis Method (WHAM). It is intended to analyze",
3287 "output files generated by umbrella sampling simulations to ",
3288 "compute a potential of mean force (PMF).[PAR]",
3290 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3291 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3292 "and, if multiple coordinates need to be analyzed, all used the same",
3293 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3294 "At present, three input modes are supported.",
3296 "* With option [TT]-it[tt], the user provides a file which contains the",
3297 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3298 " AND, with option [TT]-ix[tt], a file which contains file names of",
3299 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3300 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3301 " first pullx, etc.",
3302 "* Same as the previous input mode, except that the user",
3303 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3304 " From the pull force the position in the umbrella potential is",
3305 " computed. This does not work with tabulated umbrella potentials.",
3306 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] ",
3307 " files, i.e.",
3308 " the GROMACS 3.3 umbrella output files. If you have some unusual",
3309 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3310 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file ",
3311 " header must be similar to the following::",
3313 " # UMBRELLA 3.0",
3314 " # Component selection: 0 0 1",
3315 " # nSkip 1",
3316 " # Ref. Group 'TestAtom'",
3317 " # Nr. of pull groups 2",
3318 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3319 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3320 " #####",
3322 " The number of pull groups, umbrella positions, force constants, and names ",
3323 " may (of course) differ. Following the header, a time column and ",
3324 " a data column for each pull group follows (i.e. the displacement",
3325 " with respect to the umbrella center). Up to four pull groups are possible ",
3326 " per [REF].pdo[ref] file at present.[PAR]",
3327 "By default, all pull coordinates found in all pullx/pullf files are used in WHAM. If ",
3328 "only ",
3329 "some of the pull coordinates should be used, a pull coordinate selection file (option ",
3330 "[TT]-is[tt]) can ",
3331 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3332 "Each of these lines must contain one digit (0 or 1) for each pull coordinate in the tpr ",
3333 "file. ",
3334 "Here, 1 indicates that the pull coordinate is used in WHAM, and 0 means it is omitted. ",
3335 "Example:",
3336 "If you have three tpr files, each containing 4 pull coordinates, but only pull ",
3337 "coordinates 1 and 2 should be ",
3338 "used, coordsel.dat looks like this::",
3340 " 1 1 0 0",
3341 " 1 1 0 0",
3342 " 1 1 0 0",
3344 "By default, the output files are::",
3346 " [TT]-o[tt] PMF output file",
3347 " [TT]-hist[tt] Histograms output file",
3349 "Always check whether the histograms sufficiently overlap.[PAR]",
3350 "The umbrella potential is assumed to be harmonic and the force constants are ",
3351 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force ",
3352 "was applied ",
3353 "a tabulated potential can be provided with [TT]-tab[tt].",
3355 "WHAM options",
3356 "^^^^^^^^^^^^",
3358 "* [TT]-bins[tt] Number of bins used in analysis",
3359 "* [TT]-temp[tt] Temperature in the simulations",
3360 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3361 "* [TT]-auto[tt] Automatic determination of boundaries",
3362 "* [TT]-min,-max[tt] Boundaries of the profile",
3364 "The data points that are used to compute the profile",
3365 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3366 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3367 "umbrella window.[PAR]",
3368 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3369 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3370 "With energy output, the energy in the first bin is defined to be zero. ",
3371 "If you want the free energy at a different ",
3372 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3373 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3374 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3375 "[THISMODULE] will make use of the",
3376 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3377 "reaction coordinate will assumed be be neighbors.[PAR]",
3378 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3379 "which may be useful for, e.g. membranes.",
3381 "Parallelization",
3382 "^^^^^^^^^^^^^^^",
3384 "If available, the number of OpenMP threads used by gmx wham can be controlled by setting",
3385 "the [TT]OMP_NUM_THREADS[tt] environment variable.",
3387 "Autocorrelations",
3388 "^^^^^^^^^^^^^^^^",
3390 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3391 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3392 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3393 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3394 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3395 "Because the IACTs can be severely underestimated in case of limited ",
3396 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3397 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3398 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3399 "integration of the ACFs while the ACFs are larger 0.05.",
3400 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3401 "less robust) method such as fitting to a double exponential, you can ",
3402 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3403 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3404 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull coordinate in the ",
3405 "respective file.",
3407 "Error analysis",
3408 "^^^^^^^^^^^^^^",
3410 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3411 "otherwise the statistical error may be substantially underestimated. ",
3412 "More background and examples for the bootstrap technique can be found in ",
3413 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3414 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3415 "Four bootstrapping methods are supported and ",
3416 "selected with [TT]-bs-method[tt].",
3418 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3419 " data points, and the bootstrap is carried out by assigning random weights to the ",
3420 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3421 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3422 " statistical error is underestimated.",
3423 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3424 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3425 " (allowing duplication, i.e. sampling with replacement).",
3426 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3427 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3428 " divided into blocks and only histograms within each block are mixed. Note that ",
3429 " the histograms within each block must be representative for all possible histograms, ",
3430 " otherwise the statistical error is underestimated.",
3431 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3432 " such that the generated data points are distributed according the given histograms ",
3433 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3434 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3435 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3436 " Note that this method may severely underestimate the error in case of limited ",
3437 " sampling, ",
3438 " that is if individual histograms do not represent the complete phase space at ",
3439 " the respective positions.",
3440 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3441 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3442 " and width of the umbrella histograms. That method yields similar error estimates ",
3443 " like method [TT]traj[tt].",
3445 "Bootstrapping output:",
3447 "* [TT]-bsres[tt] Average profile and standard deviations",
3448 "* [TT]-bsprof[tt] All bootstrapping profiles",
3450 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3451 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3452 "the histograms."
3455 const char* en_unit[] = { nullptr, "kJ", "kCal", "kT", nullptr };
3456 const char* en_unit_label[] = { "", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", nullptr };
3457 const char* en_bsMethod[] = { nullptr, "b-hist", "hist", "traj", "traj-gauss", nullptr };
3458 static t_UmbrellaOptions opt;
3460 t_pargs pa[] = {
3461 { "-min", FALSE, etREAL, { &opt.min }, "Minimum coordinate in profile" },
3462 { "-max", FALSE, etREAL, { &opt.max }, "Maximum coordinate in profile" },
3463 { "-auto", FALSE, etBOOL, { &opt.bAuto }, "Determine min and max automatically" },
3464 { "-bins", FALSE, etINT, { &opt.bins }, "Number of bins in profile" },
3465 { "-temp", FALSE, etREAL, { &opt.Temperature }, "Temperature" },
3466 { "-tol", FALSE, etREAL, { &opt.Tolerance }, "Tolerance" },
3467 { "-v", FALSE, etBOOL, { &opt.verbose }, "Verbose mode" },
3468 { "-b", FALSE, etREAL, { &opt.tmin }, "First time to analyse (ps)" },
3469 { "-e", FALSE, etREAL, { &opt.tmax }, "Last time to analyse (ps)" },
3470 { "-dt", FALSE, etREAL, { &opt.dt }, "Analyse only every dt ps" },
3471 { "-histonly", FALSE, etBOOL, { &opt.bHistOnly }, "Write histograms and exit" },
3472 { "-boundsonly",
3473 FALSE,
3474 etBOOL,
3475 { &opt.bBoundsOnly },
3476 "Determine min and max and exit (with [TT]-auto[tt])" },
3477 { "-log", FALSE, etBOOL, { &opt.bLog }, "Calculate the log of the profile before printing" },
3478 { "-unit", FALSE, etENUM, { en_unit }, "Energy unit in case of log output" },
3479 { "-zprof0",
3480 FALSE,
3481 etREAL,
3482 { &opt.zProf0 },
3483 "Define profile to 0.0 at this position (with [TT]-log[tt])" },
3484 { "-cycl",
3485 FALSE,
3486 etBOOL,
3487 { &opt.bCycl },
3488 "Create cyclic/periodic profile. Assumes min and max are the same point." },
3489 { "-sym", FALSE, etBOOL, { &opt.bSym }, "Symmetrize profile around z=0" },
3490 { "-hist-eq",
3491 FALSE,
3492 etBOOL,
3493 { &opt.bHistEq },
3494 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)" },
3495 { "-ac",
3496 FALSE,
3497 etBOOL,
3498 { &opt.bCalcTauInt },
3499 "Calculate integrated autocorrelation times and use in wham" },
3500 { "-acsig",
3501 FALSE,
3502 etREAL,
3503 { &opt.sigSmoothIact },
3504 "Smooth autocorrelation times along reaction coordinate with Gaussian of this "
3505 "[GRK]sigma[grk]" },
3506 { "-ac-trestart",
3507 FALSE,
3508 etREAL,
3509 { &opt.acTrestart },
3510 "When computing autocorrelation functions, restart computing every .. (ps)" },
3511 { "-acred",
3512 FALSE,
3513 etBOOL,
3514 { &opt.bAllowReduceIact },
3515 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3516 "during smoothing" },
3517 { "-nBootstrap",
3518 FALSE,
3519 etINT,
3520 { &opt.nBootStrap },
3521 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3522 { "-bs-method", FALSE, etENUM, { en_bsMethod }, "Bootstrap method" },
3523 { "-bs-tau",
3524 FALSE,
3525 etREAL,
3526 { &opt.tauBootStrap },
3527 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is "
3528 "unknown." },
3529 { "-bs-seed", FALSE, etINT, { &opt.bsSeed }, "Seed for bootstrapping. (-1 = use time)" },
3530 { "-histbs-block",
3531 FALSE,
3532 etINT,
3533 { &opt.histBootStrapBlockLength },
3534 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]." },
3535 { "-vbs",
3536 FALSE,
3537 etBOOL,
3538 { &opt.bs_verbose },
3539 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap." },
3540 { "-stepout",
3541 FALSE,
3542 etINT,
3543 { &opt.stepchange },
3544 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])" },
3545 { "-updateContr",
3546 FALSE,
3547 etINT,
3548 { &opt.stepUpdateContrib },
3549 "HIDDENUpdate table with significan contributions to WHAM every ... iterations" },
3552 t_filenm fnm[] = {
3553 { efDAT, "-ix", "pullx-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3554 { efDAT, "-if", "pullf-files", ffOPTRD }, /* wham input: pullf.xvg's and tprs */
3555 { efDAT, "-it", "tpr-files", ffOPTRD }, /* wham input: tprs */
3556 { efDAT, "-ip", "pdo-files", ffOPTRD }, /* wham input: pdo files (gmx3 style) */
3557 { efDAT, "-is", "coordsel", ffOPTRD }, /* input: select pull coords to use */
3558 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3559 { efXVG, "-hist", "histo", ffWRITE }, /* output file for histograms */
3560 { efXVG, "-oiact", "iact", ffOPTWR }, /* writing integrated autocorrelation times */
3561 { efDAT, "-iiact", "iact-in", ffOPTRD }, /* reading integrated autocorrelation times */
3562 { efXVG, "-bsres", "bsResult", ffOPTWR }, /* average and errors of bootstrap analysis */
3563 { efXVG, "-bsprof", "bsProfs", ffOPTWR }, /* output file for bootstrap profiles */
3564 { efDAT, "-tab", "umb-pot", ffOPTRD }, /* Tabulated umbrella potential (if not harmonic) */
3567 int i, j, l, nfiles, nwins, nfiles2;
3568 t_UmbrellaHeader header;
3569 t_UmbrellaWindow* window = nullptr;
3570 double * profile, maxchange = 1e20;
3571 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3572 char ** fninTpr, **fninPull, **fninPdo;
3573 const char* fnPull;
3574 FILE * histout, *profout;
3575 char xlabel[STRLEN], ylabel[256], title[256];
3577 opt.bins = 200;
3578 opt.verbose = FALSE;
3579 opt.bHistOnly = FALSE;
3580 opt.bCycl = FALSE;
3581 opt.tmin = 50;
3582 opt.tmax = 1e20;
3583 opt.dt = 0.0;
3584 opt.min = 0;
3585 opt.max = 0;
3586 opt.bAuto = TRUE;
3587 opt.nCoordsel = 0;
3588 opt.coordsel = nullptr;
3590 /* bootstrapping stuff */
3591 opt.nBootStrap = 0;
3592 opt.bsMethod = bsMethod_hist;
3593 opt.tauBootStrap = 0.0;
3594 opt.bsSeed = -1;
3595 opt.histBootStrapBlockLength = 8;
3596 opt.bs_verbose = FALSE;
3598 opt.bLog = TRUE;
3599 opt.unit = en_kJ;
3600 opt.zProf0 = 0.;
3601 opt.Temperature = 298;
3602 opt.Tolerance = 1e-6;
3603 opt.bBoundsOnly = FALSE;
3604 opt.bSym = FALSE;
3605 opt.bCalcTauInt = FALSE;
3606 opt.sigSmoothIact = 0.0;
3607 opt.bAllowReduceIact = TRUE;
3608 opt.bInitPotByIntegration = TRUE;
3609 opt.acTrestart = 1.0;
3610 opt.stepchange = 100;
3611 opt.stepUpdateContrib = 100;
3613 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr,
3614 &opt.oenv))
3616 return 0;
3619 opt.unit = nenum(en_unit);
3620 opt.bsMethod = nenum(en_bsMethod);
3622 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3624 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3625 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3626 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3627 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3628 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3629 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3630 if (opt.bTab && opt.bPullf)
3632 gmx_fatal(FARGS,
3633 "Force input does not work with tabulated potentials. "
3634 "Provide pullx.xvg or pdo files!");
3637 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3639 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3641 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3643 gmx_fatal(FARGS,
3644 "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3645 "\n\n Check gmx wham -h !");
3648 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3649 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3650 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3651 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3652 opt.fnCoordSel = opt2fn_null("-is", NFILE, fnm);
3654 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3655 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3656 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3657 if ((bMinSet || bMaxSet) && bAutoSet)
3659 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3662 if ((bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3664 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3667 if (bMinSet && opt.bAuto)
3669 printf("Note: min and max given, switching off -auto.\n");
3670 opt.bAuto = FALSE;
3673 if (opt.bTauIntGiven && opt.bCalcTauInt)
3675 gmx_fatal(FARGS,
3676 "Either read (option -iiact) or calculate (option -ac) the\n"
3677 "the autocorrelation times. Not both.");
3680 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3682 gmx_fatal(FARGS,
3683 "Either compute autocorrelation times (ACTs) (option -ac) or "
3684 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3686 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3688 gmx_fatal(FARGS,
3689 "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3690 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3693 /* Reading gmx4/gmx5 pull output and tpr files */
3694 if (opt.bTpr || opt.bPullf || opt.bPullx)
3696 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3698 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3699 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3700 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n", nfiles, nfiles2,
3701 opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3702 if (nfiles != nfiles2)
3704 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles, opt.fnTpr, nfiles2, fnPull);
3707 /* Read file that selects the pull group to be used */
3708 if (opt.fnCoordSel != nullptr)
3710 readPullCoordSelection(&opt, fninTpr, nfiles);
3713 window = initUmbrellaWindows(nfiles);
3714 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3716 else
3717 { /* reading pdo files */
3718 if (opt.fnCoordSel != nullptr)
3720 gmx_fatal(FARGS,
3721 "Reading a -is file is not supported with PDO input files.\n"
3722 "Use awk or a similar tool to pick the required pull groups from your PDO "
3723 "files\n");
3725 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3726 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3727 window = initUmbrellaWindows(nfiles);
3728 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3731 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
3732 We can therefore get the units for the xlabel from the first coordinate. */
3733 sprintf(xlabel, "\\xx\\f{} (%s)", header.pcrd[0].coord_unit);
3735 nwins = nfiles;
3737 /* enforce equal weight for all histograms? */
3738 if (opt.bHistEq)
3740 enforceEqualWeights(window, nwins);
3743 /* write histograms */
3744 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms", xlabel, "count", opt.oenv);
3745 for (l = 0; l < opt.bins; ++l)
3747 fprintf(histout, "%e\t", (l + 0.5) / opt.bins * (opt.max - opt.min) + opt.min);
3748 for (i = 0; i < nwins; ++i)
3750 for (j = 0; j < window[i].nPull; ++j)
3752 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3755 fprintf(histout, "\n");
3757 xvgrclose(histout);
3758 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3759 if (opt.bHistOnly)
3761 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3762 return 0;
3765 /* Using tabulated umbrella potential */
3766 if (opt.bTab)
3768 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3771 /* Integrated autocorrelation times provided ? */
3772 if (opt.bTauIntGiven)
3774 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3777 /* Compute integrated autocorrelation times */
3778 if (opt.bCalcTauInt)
3780 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3783 /* calc average and sigma for each histogram
3784 (maybe required for bootstrapping. If not, this is fast anyhow) */
3785 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3787 averageSigma(window, nwins);
3790 /* Get initial potential by simple integration */
3791 if (opt.bInitPotByIntegration)
3793 guessPotByIntegration(window, nwins, &opt, xlabel);
3796 /* Check if complete reaction coordinate is covered */
3797 checkReactionCoordinateCovered(window, nwins, &opt);
3799 /* Calculate profile */
3800 snew(profile, opt.bins);
3801 if (opt.verbose)
3803 opt.stepchange = 1;
3805 i = 0;
3808 if ((i % opt.stepUpdateContrib) == 0)
3810 setup_acc_wham(profile, window, nwins, &opt);
3812 if (maxchange < opt.Tolerance)
3814 bExact = TRUE;
3815 /* if (opt.verbose) */
3816 printf("Switched to exact iteration in iteration %d\n", i);
3818 calc_profile(profile, window, nwins, &opt, bExact);
3819 if (((i % opt.stepchange) == 0 || i == 1) && i != 0)
3821 printf("\t%4d) Maximum change %e\n", i, maxchange);
3823 i++;
3824 } while ((maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3825 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3827 /* calc error from Kumar's formula */
3828 /* Unclear how the error propagates along reaction coordinate, therefore
3829 commented out */
3830 /* calc_error_kumar(profile,window, nwins,&opt); */
3832 /* Write profile in energy units? */
3833 if (opt.bLog)
3835 prof_normalization_and_unit(profile, &opt);
3836 std::strcpy(ylabel, en_unit_label[opt.unit]);
3837 std::strcpy(title, "Umbrella potential");
3839 else
3841 std::strcpy(ylabel, "Density of states");
3842 std::strcpy(title, "Density of states");
3845 /* symmetrize profile around z=0? */
3846 if (opt.bSym)
3848 symmetrizeProfile(profile, &opt);
3851 /* write profile or density of states */
3852 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3853 for (i = 0; i < opt.bins; ++i)
3855 fprintf(profout, "%e\t%e\n", (i + 0.5) / opt.bins * (opt.max - opt.min) + opt.min, profile[i]);
3857 xvgrclose(profout);
3858 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3860 /* Bootstrap Method */
3861 if (opt.nBootStrap)
3863 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3864 opt2fn("-hist", NFILE, fnm), xlabel, ylabel, profile, window, nwins, &opt);
3867 sfree(profile);
3868 freeUmbrellaWindows(window, nfiles);
3870 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3871 please_cite(stdout, "Hub2010");
3873 return 0;