Remove unused function generate_excls and make clean_excls static
[gromacs.git] / src / gromacs / gmxana / gmx_bar.cpp
blob16d70d70c5176c85244f731fbe5d1ccfca1990e8
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 #include <cctype>
38 #include <cmath>
39 #include <cstdlib>
40 #include <cstring>
42 #include <algorithm>
43 #include <limits>
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/enxio.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/math/units.h"
51 #include "gromacs/math/utilities.h"
52 #include "gromacs/mdlib/energyoutput.h"
53 #include "gromacs/mdtypes/md_enums.h"
54 #include "gromacs/trajectory/energyframe.h"
55 #include "gromacs/utility/arraysize.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/dir_separator.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/snprintf.h"
64 /* Structure for the names of lambda vector components */
65 typedef struct lambda_components_t
67 char **names; /* Array of strings with names for the lambda vector
68 components */
69 int N; /* The number of components */
70 int Nalloc; /* The number of allocated components */
71 } lambda_components_t;
73 /* Structure for a lambda vector or a dhdl derivative direction */
74 typedef struct lambda_vec_t
76 double *val; /* The lambda vector component values. Only valid if
77 dhdl == -1 */
78 int dhdl; /* The coordinate index for the derivative described by this
79 structure, or -1 */
80 const lambda_components_t *lc; /* the associated lambda_components
81 structure */
82 int index; /* The state number (init-lambda-state) of this lambda
83 vector, if known. If not, it is set to -1 */
84 } lambda_vec_t;
86 /* the dhdl.xvg data from a simulation */
87 typedef struct xvg_t
89 const char *filename;
90 int ftp; /* file type */
91 int nset; /* number of lambdas, including dhdl */
92 int *np; /* number of data points (du or hists) per lambda */
93 int np_alloc; /* number of points (du or hists) allocated */
94 double temp; /* temperature */
95 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
96 double *t; /* the times (of second index for y) */
97 double **y; /* the dU values. y[0] holds the derivative, while
98 further ones contain the energy differences between
99 the native lambda and the 'foreign' lambdas. */
100 lambda_vec_t native_lambda; /* the native lambda */
102 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
103 } xvg_t;
106 typedef struct hist_t
108 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
109 double dx[2]; /* the histogram spacing. The reverse
110 dx is the negative of the forward dx.*/
111 int64_t x0[2]; /* the (forward + reverse) histogram start
112 point(s) as int */
114 int nbin[2]; /* the (forward+reverse) number of bins */
115 int64_t sum; /* the total number of counts. Must be
116 the same for forward + reverse. */
117 int nhist; /* number of hist datas (forward or reverse) */
119 double start_time, delta_time; /* start time, end time of histogram */
120 } hist_t;
123 /* an aggregate of samples for partial free energy calculation */
124 typedef struct samples_t
126 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
127 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
128 double temp; /* the temperature */
129 gmx_bool derivative; /* whether this sample is a derivative */
131 /* The samples come either as either delta U lists: */
132 int ndu; /* the number of delta U samples */
133 double *du; /* the delta u's */
134 double *t; /* the times associated with those samples, or: */
135 double start_time, delta_time; /*start time and delta time for linear time*/
137 /* or as histograms: */
138 hist_t *hist; /* a histogram */
140 /* allocation data: (not NULL for data 'owned' by this struct) */
141 double *du_alloc, *t_alloc; /* allocated delta u arrays */
142 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
143 hist_t *hist_alloc; /* allocated hist */
145 int64_t ntot; /* total number of samples */
146 const char *filename; /* the file name this sample comes from */
147 } samples_t;
149 /* a sample range (start to end for du-style data, or boolean
150 for both du-style data and histograms */
151 typedef struct sample_range_t
153 int start, end; /* start and end index for du style data */
154 gmx_bool use; /* whether to use this sample */
156 samples_t *s; /* the samples this range belongs to */
157 } sample_range_t;
160 /* a collection of samples for a partial free energy calculation
161 (i.e. the collection of samples from one native lambda to one
162 foreign lambda) */
163 typedef struct sample_coll_t
165 lambda_vec_t *native_lambda; /* these should be the same for all samples
166 in the histogram */
167 lambda_vec_t *foreign_lambda; /* collection */
168 double temp; /* the temperature */
170 int nsamples; /* the number of samples */
171 samples_t **s; /* the samples themselves */
172 sample_range_t *r; /* the sample ranges */
173 int nsamples_alloc; /* number of allocated samples */
175 int64_t ntot; /* total number of samples in the ranges of
176 this collection */
178 struct sample_coll_t *next, *prev; /* next and previous in the list */
179 } sample_coll_t;
181 /* all the samples associated with a lambda point */
182 typedef struct lambda_data_t
184 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
185 double temp; /* temperature */
187 sample_coll_t *sc; /* the samples */
189 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
191 struct lambda_data_t *next, *prev; /* the next and prev in the list */
192 } lambda_data_t;
194 /* Top-level data structure of simulation data */
195 typedef struct sim_data_t
197 lambda_data_t *lb; /* a lambda data linked list */
198 lambda_data_t lb_head; /* The head element of the linked list */
200 lambda_components_t lc; /* the allowed components of the lambda
201 vectors */
202 } sim_data_t;
204 /* Top-level data structure with calculated values. */
205 typedef struct {
206 sample_coll_t *a, *b; /* the simulation data */
208 double dg; /* the free energy difference */
209 double dg_err; /* the free energy difference */
211 double dg_disc_err; /* discretization error */
212 double dg_histrange_err; /* histogram range error */
214 double sa; /* relative entropy of b in state a */
215 double sa_err; /* error in sa */
216 double sb; /* relative entropy of a in state b */
217 double sb_err; /* error in sb */
219 double dg_stddev; /* expected dg stddev per sample */
220 double dg_stddev_err; /* error in dg_stddev */
221 } barres_t;
224 /* Initialize a lambda_components structure */
225 static void lambda_components_init(lambda_components_t *lc)
227 lc->N = 0;
228 lc->Nalloc = 2;
229 snew(lc->names, lc->Nalloc);
232 /* Add a component to a lambda_components structure */
233 static void lambda_components_add(lambda_components_t *lc,
234 const char *name, size_t name_length)
236 while (lc->N + 1 > lc->Nalloc)
238 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
239 srenew(lc->names, lc->Nalloc);
241 snew(lc->names[lc->N], name_length+1);
242 std::strncpy(lc->names[lc->N], name, name_length);
243 lc->N++;
246 /* check whether a component with index 'index' matches the given name, or
247 is also NULL. Returns TRUE if this is the case.
248 the string name does not need to end */
249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
250 int index,
251 const char *name,
252 size_t name_length)
254 size_t len;
255 if (!lc || index >= lc->N)
257 return FALSE;
259 if ((name == nullptr) && (lc->names[index] == nullptr))
261 return TRUE;
263 if (((name != nullptr) && (lc->names[index] == nullptr)) ||
264 ((name == nullptr) && (lc->names[index] != nullptr)))
266 return FALSE;
268 GMX_RELEASE_ASSERT((name != nullptr) || (name_length == 0),
269 "If name is empty, the length of the substring to examine within it must be zero");
270 len = std::strlen(lc->names[index]);
271 if (len != name_length)
273 return FALSE;
275 if (name_length == 0)
277 // Everything matches a zero-length substring. This branch is
278 // needed because name could in principle be nullptr.
279 return TRUE;
281 return std::strncmp(lc->names[index], name, name_length) == 0;
284 /* Find the index of a given lambda component name, or -1 if not found */
285 static int lambda_components_find(const lambda_components_t *lc,
286 const char *name,
287 size_t name_length)
289 int i;
291 for (i = 0; i < lc->N; i++)
293 if (std::strncmp(lc->names[i], name, name_length) == 0)
295 return i;
298 return -1;
303 /* initialize a lambda vector */
304 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
306 snew(lv->val, lc->N);
307 lv->index = -1;
308 lv->dhdl = -1;
309 lv->lc = lc;
312 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
314 int i;
316 lambda_vec_init(lv, orig->lc);
317 lv->dhdl = orig->dhdl;
318 lv->index = orig->index;
319 for (i = 0; i < lv->lc->N; i++)
321 lv->val[i] = orig->val[i];
325 /* write a lambda vec to a preallocated string */
326 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
328 int i;
330 str[0] = 0; /* reset the string */
331 if (lv->dhdl < 0)
333 if (named)
335 str += sprintf(str, "delta H to ");
337 if (lv->lc->N > 1)
339 str += sprintf(str, "(");
341 for (i = 0; i < lv->lc->N; i++)
343 str += sprintf(str, "%g", lv->val[i]);
344 if (i < lv->lc->N-1)
346 str += sprintf(str, ", ");
349 if (lv->lc->N > 1)
351 sprintf(str, ")");
354 else
356 /* this lambda vector describes a derivative */
357 str += sprintf(str, "dH/dl");
358 if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
360 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
365 /* write a shortened version of the lambda vec to a preallocated string */
366 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
368 if (lv->index >= 0)
370 sprintf(str, "%6d", lv->index);
372 else
374 if (lv->dhdl < 0)
376 sprintf(str, "%6.3f", lv->val[0]);
378 else
380 sprintf(str, "dH/dl[%d]", lv->dhdl);
385 /* write an intermediate version of two lambda vecs to a preallocated string */
386 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
387 const lambda_vec_t *b, char *str)
389 str[0] = 0;
390 if ( (a->index >= 0) && (b->index >= 0) )
392 sprintf(str, "%6.3f", (a->index+b->index)/2.0);
394 else
396 if ( (a->dhdl < 0) && (b->dhdl < 0) )
398 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
404 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
405 a and b must describe non-derivative lambda points */
406 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
408 int i;
409 double ret = 0.;
411 if ( (a->dhdl > 0) || (b->dhdl > 0) )
413 gmx_fatal(FARGS,
414 "Trying to calculate the difference between derivatives instead of lambda points");
416 if (a->lc != b->lc)
418 gmx_fatal(FARGS,
419 "Trying to calculate the difference lambdas with differing basis set");
421 for (i = 0; i < a->lc->N; i++)
423 double df = a->val[i] - b->val[i];
424 ret += df*df;
426 return std::sqrt(ret);
430 /* check whether two lambda vectors are the same */
431 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
433 int i;
435 if (a->lc != b->lc)
437 return FALSE;
439 if (a->dhdl < 0)
441 for (i = 0; i < a->lc->N; i++)
443 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
445 return FALSE;
448 return TRUE;
450 else
452 /* they're derivatives, so we check whether the indices match */
453 return (a->dhdl == b->dhdl);
457 /* Compare the sort order of two foreign lambda vectors
459 returns 1 if a is 'bigger' than b,
460 returns 0 if they're the same,
461 returns -1 if a is 'smaller' than b.*/
462 static int lambda_vec_cmp_foreign(const lambda_vec_t *a,
463 const lambda_vec_t *b)
465 int i;
466 double norm_a = 0, norm_b = 0;
467 gmx_bool different = FALSE;
469 if (a->lc != b->lc)
471 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
473 /* if either one has an index we sort based on that */
474 if ((a->index >= 0) || (b->index >= 0))
476 if (a->index == b->index)
478 return 0;
480 return (a->index > b->index) ? 1 : -1;
482 if (a->dhdl >= 0 || b->dhdl >= 0)
484 /* lambda vectors that are derivatives always sort higher than those
485 without derivatives */
486 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
488 return (a->dhdl >= 0) ? 1 : -1;
490 return 0;
493 /* neither has an index, so we can only sort on the lambda components,
494 which is only valid if there is one component */
495 for (i = 0; i < a->lc->N; i++)
497 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
499 different = TRUE;
501 norm_a += a->val[i]*a->val[i];
502 norm_b += b->val[i]*b->val[i];
504 if (!different)
506 return 0;
508 return (norm_a > norm_b) ? 1 : -1;
511 /* Compare the sort order of two native lambda vectors
513 returns 1 if a is 'bigger' than b,
514 returns 0 if they're the same,
515 returns -1 if a is 'smaller' than b.*/
516 static int lambda_vec_cmp_native(const lambda_vec_t *a,
517 const lambda_vec_t *b)
519 if (a->lc != b->lc)
521 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
523 /* if either one has an index we sort based on that */
524 if ((a->index >= 0) || (b->index >= 0))
526 if (a->index == b->index)
528 return 0;
530 return (a->index > b->index) ? 1 : -1;
532 /* neither has an index, so we can only sort on the lambda components,
533 which is only valid if there is one component */
534 if (a->lc->N > 1)
536 gmx_fatal(FARGS,
537 "Can't compare lambdas with no index and > 1 component");
539 if (a->dhdl >= 0 || b->dhdl >= 0)
541 gmx_fatal(FARGS,
542 "Can't compare native lambdas that are derivatives");
544 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
546 return 0;
548 return a->val[0] > b->val[0] ? 1 : -1;
554 static void hist_init(hist_t *h, int nhist, int *nbin)
556 int i;
557 if (nhist > 2)
559 gmx_fatal(FARGS, "histogram with more than two sets of data!");
561 for (i = 0; i < nhist; i++)
563 snew(h->bin[i], nbin[i]);
564 h->x0[i] = 0;
565 h->nbin[i] = nbin[i];
566 h->start_time = h->delta_time = 0;
567 h->dx[i] = 0;
569 h->sum = 0;
570 h->nhist = nhist;
573 static void xvg_init(xvg_t *ba)
575 ba->filename = nullptr;
576 ba->nset = 0;
577 ba->np_alloc = 0;
578 ba->np = nullptr;
579 ba->y = nullptr;
582 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
583 lambda_vec_t *foreign_lambda, double temp,
584 gmx_bool derivative, const char *filename)
586 s->native_lambda = native_lambda;
587 s->foreign_lambda = foreign_lambda;
588 s->temp = temp;
589 s->derivative = derivative;
591 s->ndu = 0;
592 s->du = nullptr;
593 s->t = nullptr;
594 s->start_time = s->delta_time = 0;
595 s->hist = nullptr;
596 s->du_alloc = nullptr;
597 s->t_alloc = nullptr;
598 s->hist_alloc = nullptr;
599 s->ndu_alloc = 0;
600 s->nt_alloc = 0;
602 s->ntot = 0;
603 s->filename = filename;
606 static void sample_range_init(sample_range_t *r, samples_t *s)
608 r->start = 0;
609 r->end = s->ndu;
610 r->use = TRUE;
611 r->s = nullptr;
614 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
615 lambda_vec_t *foreign_lambda, double temp)
617 sc->native_lambda = native_lambda;
618 sc->foreign_lambda = foreign_lambda;
619 sc->temp = temp;
621 sc->nsamples = 0;
622 sc->s = nullptr;
623 sc->r = nullptr;
624 sc->nsamples_alloc = 0;
626 sc->ntot = 0;
627 sc->next = sc->prev = nullptr;
630 static void sample_coll_destroy(sample_coll_t *sc)
632 /* don't free the samples themselves */
633 sfree(sc->r);
634 sfree(sc->s);
638 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
639 double temp)
641 l->lambda = native_lambda;
642 l->temp = temp;
644 l->next = nullptr;
645 l->prev = nullptr;
647 l->sc = &(l->sc_head);
649 sample_coll_init(l->sc, native_lambda, nullptr, 0.);
650 l->sc->next = l->sc;
651 l->sc->prev = l->sc;
654 static void barres_init(barres_t *br)
656 br->dg = 0;
657 br->dg_err = 0;
658 br->sa = 0;
659 br->sa_err = 0;
660 br->sb = 0;
661 br->sb_err = 0;
662 br->dg_stddev = 0;
663 br->dg_stddev_err = 0;
665 br->a = nullptr;
666 br->b = nullptr;
670 /* calculate the total number of samples in a sample collection */
671 static void sample_coll_calc_ntot(sample_coll_t *sc)
673 int i;
675 sc->ntot = 0;
676 for (i = 0; i < sc->nsamples; i++)
678 if (sc->r[i].use)
680 if (sc->s[i]->hist)
682 sc->ntot += sc->s[i]->ntot;
684 else
686 sc->ntot += sc->r[i].end - sc->r[i].start;
693 /* find the barsamples_t associated with a lambda that corresponds to
694 a specific foreign lambda */
695 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
696 lambda_vec_t *foreign_lambda)
698 sample_coll_t *sc = l->sc->next;
700 while (sc != l->sc)
702 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
704 return sc;
706 sc = sc->next;
709 return nullptr;
712 /* insert li into an ordered list of lambda_colls */
713 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
715 sample_coll_t *scn = l->sc->next;
716 while ( (scn != l->sc) )
718 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
720 break;
722 scn = scn->next;
724 /* now insert it before the found scn */
725 sc->next = scn;
726 sc->prev = scn->prev;
727 scn->prev->next = sc;
728 scn->prev = sc;
731 /* insert li into an ordered list of lambdas */
732 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
734 lambda_data_t *lc = head->next;
735 while (lc != head)
737 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
739 break;
741 lc = lc->next;
743 /* now insert ourselves before the found lc */
744 li->next = lc;
745 li->prev = lc->prev;
746 lc->prev->next = li;
747 lc->prev = li;
750 /* insert a sample and a sample_range into a sample_coll. The
751 samples are stored as a pointer, the range is copied. */
752 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
753 sample_range_t *r)
755 /* first check if it belongs here */
756 GMX_ASSERT(sc->next->s, "Next not properly initialized!");
757 if (sc->temp != s->temp)
759 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
760 s->filename, sc->next->s[0]->filename);
762 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
764 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
765 s->filename, sc->next->s[0]->filename);
767 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
769 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
770 s->filename, sc->next->s[0]->filename);
773 /* check if there's room */
774 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
776 sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
777 srenew(sc->s, sc->nsamples_alloc);
778 srenew(sc->r, sc->nsamples_alloc);
780 sc->s[sc->nsamples] = s;
781 sc->r[sc->nsamples] = *r;
782 sc->nsamples++;
784 sample_coll_calc_ntot(sc);
787 /* insert a sample into a lambda_list, creating the right sample_coll if
788 neccesary */
789 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
791 gmx_bool found = FALSE;
792 sample_coll_t *sc;
793 sample_range_t r;
795 lambda_data_t *l = head->next;
797 /* first search for the right lambda_data_t */
798 while (l != head)
800 if (lambda_vec_same(l->lambda, s->native_lambda) )
802 found = TRUE;
803 break;
805 l = l->next;
808 if (!found)
810 snew(l, 1); /* allocate a new one */
811 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
812 lambda_data_insert_lambda(head, l); /* add it to the list */
815 /* now look for a sample collection */
816 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
817 if (!sc)
819 snew(sc, 1); /* allocate a new one */
820 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
821 lambda_data_insert_sample_coll(l, sc);
824 /* now insert the samples into the sample coll */
825 sample_range_init(&r, s);
826 sample_coll_insert_sample(sc, s, &r);
830 /* make a histogram out of a sample collection */
831 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
832 int *nbin_alloc, int *nbin,
833 double *dx, double *xmin, int nbin_default)
835 int i, j, k;
836 gmx_bool dx_set = FALSE;
837 gmx_bool xmin_set = FALSE;
839 gmx_bool xmax_set = FALSE;
840 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
841 limits of a histogram */
842 double xmax = -1;
844 /* first determine dx and xmin; try the histograms */
845 for (i = 0; i < sc->nsamples; i++)
847 if (sc->s[i]->hist)
849 hist_t *hist = sc->s[i]->hist;
850 for (k = 0; k < hist->nhist; k++)
852 double hdx = hist->dx[k];
853 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
855 /* we use the biggest dx*/
856 if ( (!dx_set) || hist->dx[0] > *dx)
858 dx_set = TRUE;
859 *dx = hist->dx[0];
861 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
863 xmin_set = TRUE;
864 *xmin = (hist->x0[k]*hdx);
867 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
869 xmax_set = TRUE;
870 xmax = xmax_now;
871 if (hist->bin[k][hist->nbin[k]-1] != 0)
873 xmax_set_hard = TRUE;
876 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
878 xmax_set_hard = TRUE;
879 xmax = xmax_now;
884 /* and the delta us */
885 for (i = 0; i < sc->nsamples; i++)
887 if (sc->s[i]->ndu > 0)
889 /* determine min and max */
890 int starti = sc->r[i].start;
891 int endi = sc->r[i].end;
892 double du_xmin = sc->s[i]->du[starti];
893 double du_xmax = sc->s[i]->du[starti];
894 for (j = starti+1; j < endi; j++)
896 if (sc->s[i]->du[j] < du_xmin)
898 du_xmin = sc->s[i]->du[j];
900 if (sc->s[i]->du[j] > du_xmax)
902 du_xmax = sc->s[i]->du[j];
906 /* and now change the limits */
907 if ( (!xmin_set) || (du_xmin < *xmin) )
909 xmin_set = TRUE;
910 *xmin = du_xmin;
912 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
914 xmax_set = TRUE;
915 xmax = du_xmax;
920 if (!xmax_set || !xmin_set)
922 *nbin = 0;
923 return;
927 if (!dx_set)
929 *nbin = nbin_default;
930 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
931 be 0, and we count from 0 */
933 else
935 *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
938 if (*nbin > *nbin_alloc)
940 *nbin_alloc = *nbin;
941 srenew(*bin, *nbin_alloc);
944 /* reset the histogram */
945 for (i = 0; i < (*nbin); i++)
947 (*bin)[i] = 0;
950 /* now add the actual data */
951 for (i = 0; i < sc->nsamples; i++)
953 if (sc->s[i]->hist)
955 hist_t *hist = sc->s[i]->hist;
956 for (k = 0; k < hist->nhist; k++)
958 double hdx = hist->dx[k];
959 double xmin_hist = hist->x0[k]*hdx;
960 for (j = 0; j < hist->nbin[k]; j++)
962 /* calculate the bin corresponding to the middle of the
963 original bin */
964 double x = hdx*(j+0.5) + xmin_hist;
965 int binnr = static_cast<int>((x-(*xmin))/(*dx));
967 if (binnr >= *nbin || binnr < 0)
969 binnr = (*nbin)-1;
972 (*bin)[binnr] += hist->bin[k][j];
976 else
978 int starti = sc->r[i].start;
979 int endi = sc->r[i].end;
980 for (j = starti; j < endi; j++)
982 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
983 if (binnr >= *nbin || binnr < 0)
985 binnr = (*nbin)-1;
988 (*bin)[binnr]++;
994 /* write a collection of histograms to a file */
995 static void sim_data_histogram(sim_data_t *sd, const char *filename,
996 int nbin_default, const gmx_output_env_t *oenv)
998 char label_x[STRLEN];
999 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1000 const char *title = "N(\\DeltaH)";
1001 const char *label_y = "Samples";
1002 FILE *fp;
1003 lambda_data_t *bl;
1004 int nsets = 0;
1005 char **setnames = nullptr;
1006 gmx_bool first_set = FALSE;
1007 /* histogram data: */
1008 int *hist = nullptr;
1009 int nbin = 0;
1010 int nbin_alloc = 0;
1011 double dx = 0;
1012 double minval = 0;
1013 int i;
1014 lambda_data_t *bl_head = sd->lb;
1016 printf("\nWriting histogram to %s\n", filename);
1017 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1019 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1021 /* first get all the set names */
1022 bl = bl_head->next;
1023 /* iterate over all lambdas */
1024 while (bl != bl_head)
1026 sample_coll_t *sc = bl->sc->next;
1028 /* iterate over all samples */
1029 while (sc != bl->sc)
1031 char buf[STRLEN], buf2[STRLEN];
1033 nsets++;
1034 srenew(setnames, nsets);
1035 snew(setnames[nsets-1], STRLEN);
1036 if (sc->foreign_lambda->dhdl < 0)
1038 lambda_vec_print(sc->native_lambda, buf, FALSE);
1039 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1040 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1041 deltag, lambda, buf2, lambda, buf);
1043 else
1045 lambda_vec_print(sc->native_lambda, buf, FALSE);
1046 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1047 dhdl, lambda, buf);
1049 sc = sc->next;
1052 bl = bl->next;
1054 xvgr_legend(fp, nsets, setnames, oenv);
1057 /* now make the histograms */
1058 bl = bl_head->next;
1059 /* iterate over all lambdas */
1060 while (bl != bl_head)
1062 sample_coll_t *sc = bl->sc->next;
1064 /* iterate over all samples */
1065 while (sc != bl->sc)
1067 if (!first_set)
1069 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1072 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1073 nbin_default);
1075 for (i = 0; i < nbin; i++)
1077 double xmin = i*dx + minval;
1078 double xmax = (i+1)*dx + minval;
1080 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1083 first_set = FALSE;
1084 sc = sc->next;
1087 bl = bl->next;
1090 if (hist)
1092 sfree(hist);
1095 xvgrclose(fp);
1098 static int
1099 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1101 int n = 0;
1103 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1104 if (lambda->index >= 0)
1106 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1108 if (lambda->dhdl >= 0)
1110 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1112 else
1114 int i;
1115 for (i = 0; i < lambda->lc->N; i++)
1117 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1120 return n;
1123 /* create a collection (array) of barres_t object given a ordered linked list
1124 of barlamda_t sample collections */
1125 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1126 gmx_bool use_dhdl)
1128 lambda_data_t *bl;
1129 int nlambda = 0;
1130 barres_t *res;
1131 gmx_bool dhdl = FALSE;
1132 gmx_bool first = TRUE;
1133 lambda_data_t *bl_head = sd->lb;
1135 /* first count the lambdas */
1136 bl = bl_head->next;
1137 while (bl != bl_head)
1139 nlambda++;
1140 bl = bl->next;
1142 snew(res, nlambda-1);
1144 /* next put the right samples in the res */
1145 *nres = 0;
1146 bl = bl_head->next->next; /* we start with the second one. */
1147 while (bl != bl_head)
1149 sample_coll_t *sc, *scprev;
1150 barres_t *br = &(res[*nres]);
1151 /* there is always a previous one. we search for that as a foreign
1152 lambda: */
1153 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1154 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1156 barres_init(br);
1158 if (use_dhdl)
1160 /* we use dhdl */
1162 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1163 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1165 if (first)
1167 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1168 dhdl = TRUE;
1170 if (!dhdl)
1172 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1175 else if (!scprev && !sc)
1177 char descX[STRLEN], descY[STRLEN];
1178 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1179 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1181 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1184 /* normal delta H */
1185 if (!scprev)
1187 char descX[STRLEN], descY[STRLEN];
1188 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1189 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1190 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1192 if (!sc)
1194 char descX[STRLEN], descY[STRLEN];
1195 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1196 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1197 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1199 br->a = scprev;
1200 br->b = sc;
1202 first = FALSE;
1203 (*nres)++;
1204 bl = bl->next;
1206 return res;
1209 /* estimate the maximum discretization error */
1210 static double barres_list_max_disc_err(barres_t *res, int nres)
1212 int i, j;
1213 double disc_err = 0.;
1214 double delta_lambda;
1216 for (i = 0; i < nres; i++)
1218 barres_t *br = &(res[i]);
1220 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1221 br->a->native_lambda);
1223 for (j = 0; j < br->a->nsamples; j++)
1225 if (br->a->s[j]->hist)
1227 double Wfac = 1.;
1228 if (br->a->s[j]->derivative)
1230 Wfac = delta_lambda;
1233 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1236 for (j = 0; j < br->b->nsamples; j++)
1238 if (br->b->s[j]->hist)
1240 double Wfac = 1.;
1241 if (br->b->s[j]->derivative)
1243 Wfac = delta_lambda;
1245 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1249 return disc_err;
1253 /* impose start and end times on a sample collection, updating sample_ranges */
1254 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1255 double end_t)
1257 int i;
1258 for (i = 0; i < sc->nsamples; i++)
1260 samples_t *s = sc->s[i];
1261 sample_range_t *r = &(sc->r[i]);
1262 if (s->hist)
1264 double end_time = s->hist->delta_time*s->hist->sum +
1265 s->hist->start_time;
1266 if (s->hist->start_time < begin_t || end_time > end_t)
1268 r->use = FALSE;
1271 else
1273 if (!s->t)
1275 double end_time;
1276 if (s->start_time < begin_t)
1278 r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1280 end_time = s->delta_time*s->ndu + s->start_time;
1281 if (end_time > end_t)
1283 r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1286 else
1288 int j;
1289 for (j = 0; j < s->ndu; j++)
1291 if (s->t[j] < begin_t)
1293 r->start = j;
1296 if (s->t[j] >= end_t)
1298 r->end = j;
1299 break;
1303 if (r->start > r->end)
1305 r->use = FALSE;
1309 sample_coll_calc_ntot(sc);
1312 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1314 double first_t, last_t;
1315 double begin_t, end_t;
1316 lambda_data_t *lc;
1317 lambda_data_t *head = sd->lb;
1318 int j;
1320 if (begin <= 0 && end < 0)
1322 return;
1325 /* first determine the global start and end times */
1326 first_t = -1;
1327 last_t = -1;
1328 lc = head->next;
1329 while (lc != head)
1331 sample_coll_t *sc = lc->sc->next;
1332 while (sc != lc->sc)
1334 for (j = 0; j < sc->nsamples; j++)
1336 double start_t, end_t;
1338 start_t = sc->s[j]->start_time;
1339 end_t = sc->s[j]->start_time;
1340 if (sc->s[j]->hist)
1342 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1344 else
1346 if (sc->s[j]->t)
1348 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1350 else
1352 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1356 if (start_t < first_t || first_t < 0)
1358 first_t = start_t;
1360 if (end_t > last_t)
1362 last_t = end_t;
1365 sc = sc->next;
1367 lc = lc->next;
1370 /* calculate the actual times */
1371 if (begin > 0)
1373 begin_t = begin;
1375 else
1377 begin_t = first_t;
1380 if (end > 0)
1382 end_t = end;
1384 else
1386 end_t = last_t;
1388 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1390 if (begin_t > end_t)
1392 return;
1394 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1396 /* then impose them */
1397 lc = head->next;
1398 while (lc != head)
1400 sample_coll_t *sc = lc->sc->next;
1401 while (sc != lc->sc)
1403 sample_coll_impose_times(sc, begin_t, end_t);
1404 sc = sc->next;
1406 lc = lc->next;
1411 /* create subsample i out of ni from an existing sample_coll */
1412 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1413 sample_coll_t *sc_orig,
1414 int i, int ni)
1416 int j;
1418 int64_t ntot_start;
1419 int64_t ntot_end;
1420 int64_t ntot_so_far;
1422 *sc = *sc_orig; /* just copy all fields */
1424 /* allocate proprietary memory */
1425 snew(sc->s, sc_orig->nsamples);
1426 snew(sc->r, sc_orig->nsamples);
1428 /* copy the samples */
1429 for (j = 0; j < sc_orig->nsamples; j++)
1431 sc->s[j] = sc_orig->s[j];
1432 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1435 /* now fix start and end fields */
1436 /* the casts avoid possible overflows */
1437 ntot_start = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1438 ntot_end = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1439 ntot_so_far = 0;
1440 for (j = 0; j < sc->nsamples; j++)
1442 int64_t ntot_add;
1443 int64_t new_start, new_end;
1445 if (sc->r[j].use)
1447 if (sc->s[j]->hist)
1449 ntot_add = sc->s[j]->hist->sum;
1451 else
1453 ntot_add = sc->r[j].end - sc->r[j].start;
1456 else
1458 ntot_add = 0;
1461 if (!sc->s[j]->hist)
1463 if (ntot_so_far < ntot_start)
1465 /* adjust starting point */
1466 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1468 else
1470 new_start = sc->r[j].start;
1472 /* adjust end point */
1473 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1474 if (new_end > sc->r[j].end)
1476 new_end = sc->r[j].end;
1479 /* check if we're in range at all */
1480 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1482 new_start = 0;
1483 new_end = 0;
1485 /* and write the new range */
1486 GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1487 GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1488 sc->r[j].start = static_cast<int>(new_start);
1489 sc->r[j].end = static_cast<int>(new_end);
1491 else
1493 if (sc->r[j].use)
1495 double overlap;
1496 double ntot_start_norm, ntot_end_norm;
1497 /* calculate the amount of overlap of the
1498 desired range (ntot_start -- ntot_end) onto
1499 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1501 /* first calculate normalized bounds
1502 (where 0 is the start of the hist range, and 1 the end) */
1503 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1504 ntot_end_norm = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1506 /* now fix the boundaries */
1507 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1508 ntot_end_norm = std::max(0.0, std::min(1.0, ntot_end_norm));
1510 /* and calculate the overlap */
1511 overlap = ntot_end_norm - ntot_start_norm;
1513 if (overlap > 0.95) /* we allow for 5% slack */
1515 sc->r[j].use = TRUE;
1517 else if (overlap < 0.05)
1519 sc->r[j].use = FALSE;
1521 else
1523 return FALSE;
1527 ntot_so_far += ntot_add;
1529 sample_coll_calc_ntot(sc);
1531 return TRUE;
1534 /* calculate minimum and maximum work values in sample collection */
1535 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1536 double *Wmin, double *Wmax)
1538 int i, j;
1540 *Wmin = std::numeric_limits<float>::max();
1541 *Wmax = -std::numeric_limits<float>::max();
1543 for (i = 0; i < sc->nsamples; i++)
1545 samples_t *s = sc->s[i];
1546 sample_range_t *r = &(sc->r[i]);
1547 if (r->use)
1549 if (!s->hist)
1551 for (j = r->start; j < r->end; j++)
1553 *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1554 *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1557 else
1559 int hd = 0; /* determine the histogram direction: */
1560 double dx;
1561 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1563 hd = 1;
1565 dx = s->hist->dx[hd];
1567 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1569 *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1570 *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1571 /* look for the highest value bin with values */
1572 if (s->hist->bin[hd][j] > 0)
1574 *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1575 *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1576 break;
1584 /* Initialize a sim_data structure */
1585 static void sim_data_init(sim_data_t *sd)
1587 /* make linked list */
1588 sd->lb = &(sd->lb_head);
1589 sd->lb->next = sd->lb;
1590 sd->lb->prev = sd->lb;
1592 lambda_components_init(&(sd->lc));
1596 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1598 int i;
1599 double sum;
1601 sum = 0;
1603 for (i = 0; i < n; i++)
1605 sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1608 return sum;
1611 /* calculate the BAR average given a histogram
1613 if type== 0, calculate the best estimate for the average,
1614 if type==-1, calculate the minimum possible value given the histogram
1615 if type== 1, calculate the maximum possible value given the histogram */
1616 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1617 int type)
1619 double sum = 0.;
1620 int i;
1621 int maxbin;
1622 /* normalization factor multiplied with bin width and
1623 number of samples (we normalize through M): */
1624 double normdx = 1.;
1625 int hd = 0; /* determine the histogram direction: */
1626 double dx;
1628 if ( (hist->nhist > 1) && (Wfac < 0) )
1630 hd = 1;
1632 dx = hist->dx[hd];
1633 maxbin = hist->nbin[hd]-1;
1634 if (type == 1)
1636 maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1639 for (i = 0; i < maxbin; i++)
1641 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1642 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1644 sum += pxdx/(1. + std::exp(x + sbMmDG));
1647 return sum;
1650 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1651 double temp, double tol, int type)
1653 double kT, beta, M;
1654 int i;
1655 double Wfac1, Wfac2, Wmin, Wmax;
1656 double DG0, DG1, DG2, dDG1;
1657 double n1, n2; /* numbers of samples as doubles */
1659 kT = BOLTZ*temp;
1660 beta = 1/kT;
1662 /* count the numbers of samples */
1663 n1 = ca->ntot;
1664 n2 = cb->ntot;
1666 M = std::log(n1/n2);
1668 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1669 if (ca->foreign_lambda->dhdl < 0)
1671 /* this is the case when the delta U were calculated directly
1672 (i.e. we're not scaling dhdl) */
1673 Wfac1 = beta;
1674 Wfac2 = beta;
1676 else
1678 /* we're using dhdl, so delta_lambda needs to be a
1679 multiplication factor. */
1680 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1681 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1682 ca->native_lambda);
1683 if (cb->native_lambda->lc->N > 1)
1685 gmx_fatal(FARGS,
1686 "Can't (yet) do multi-component dhdl interpolation");
1689 Wfac1 = beta*delta_lambda;
1690 Wfac2 = -beta*delta_lambda;
1693 if (beta < 1)
1695 /* We print the output both in kT and kJ/mol.
1696 * Here we determine DG in kT, so when beta < 1
1697 * the precision has to be increased.
1699 tol *= beta;
1702 /* Calculate minimum and maximum work to give an initial estimate of
1703 * delta G as their average.
1706 double Wmin1, Wmin2, Wmax1, Wmax2;
1707 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1708 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1710 Wmin = std::min(Wmin1, Wmin2);
1711 Wmax = std::max(Wmax1, Wmax2);
1714 DG0 = Wmin;
1715 DG2 = Wmax;
1717 if (debug)
1719 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1721 /* We approximate by bisection: given our initial estimates
1722 we keep checking whether the halfway point is greater or
1723 smaller than what we get out of the BAR averages.
1725 For the comparison we can use twice the tolerance. */
1726 while (DG2 - DG0 > 2*tol)
1728 DG1 = 0.5*(DG0 + DG2);
1730 /* calculate the BAR averages */
1731 dDG1 = 0.;
1733 for (i = 0; i < ca->nsamples; i++)
1735 samples_t *s = ca->s[i];
1736 sample_range_t *r = &(ca->r[i]);
1737 if (r->use)
1739 if (s->hist)
1741 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1743 else
1745 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1746 Wfac1, (M-DG1));
1750 for (i = 0; i < cb->nsamples; i++)
1752 samples_t *s = cb->s[i];
1753 sample_range_t *r = &(cb->r[i]);
1754 if (r->use)
1756 if (s->hist)
1758 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1760 else
1762 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1763 Wfac2, -(M-DG1));
1768 if (dDG1 < 0)
1770 DG0 = DG1;
1772 else
1774 DG2 = DG1;
1776 if (debug)
1778 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1782 return 0.5*(DG0 + DG2);
1785 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1786 double temp, double dg, double *sa, double *sb)
1788 int i, j;
1789 double W_ab = 0.;
1790 double W_ba = 0.;
1791 double kT, beta;
1792 double Wfac1, Wfac2;
1793 double n1, n2;
1795 kT = BOLTZ*temp;
1796 beta = 1/kT;
1798 /* count the numbers of samples */
1799 n1 = ca->ntot;
1800 n2 = cb->ntot;
1802 /* to ensure the work values are the same as during the delta_G */
1803 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1804 if (ca->foreign_lambda->dhdl < 0)
1806 /* this is the case when the delta U were calculated directly
1807 (i.e. we're not scaling dhdl) */
1808 Wfac1 = beta;
1809 Wfac2 = beta;
1811 else
1813 /* we're using dhdl, so delta_lambda needs to be a
1814 multiplication factor. */
1815 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1816 ca->native_lambda);
1817 Wfac1 = beta*delta_lambda;
1818 Wfac2 = -beta*delta_lambda;
1821 /* first calculate the average work in both directions */
1822 for (i = 0; i < ca->nsamples; i++)
1824 samples_t *s = ca->s[i];
1825 sample_range_t *r = &(ca->r[i]);
1826 if (r->use)
1828 if (!s->hist)
1830 for (j = r->start; j < r->end; j++)
1832 W_ab += Wfac1*s->du[j];
1835 else
1837 /* normalization factor multiplied with bin width and
1838 number of samples (we normalize through M): */
1839 double normdx = 1.;
1840 double dx;
1841 int hd = 0; /* histogram direction */
1842 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1844 hd = 1;
1846 dx = s->hist->dx[hd];
1848 for (j = 0; j < s->hist->nbin[0]; j++)
1850 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1851 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1852 W_ab += pxdx*x;
1857 W_ab /= n1;
1859 for (i = 0; i < cb->nsamples; i++)
1861 samples_t *s = cb->s[i];
1862 sample_range_t *r = &(cb->r[i]);
1863 if (r->use)
1865 if (!s->hist)
1867 for (j = r->start; j < r->end; j++)
1869 W_ba += Wfac1*s->du[j];
1872 else
1874 /* normalization factor multiplied with bin width and
1875 number of samples (we normalize through M): */
1876 double normdx = 1.;
1877 double dx;
1878 int hd = 0; /* histogram direction */
1879 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1881 hd = 1;
1883 dx = s->hist->dx[hd];
1885 for (j = 0; j < s->hist->nbin[0]; j++)
1887 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1888 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1889 W_ba += pxdx*x;
1894 W_ba /= n2;
1896 /* then calculate the relative entropies */
1897 *sa = (W_ab - dg);
1898 *sb = (W_ba + dg);
1901 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1902 double temp, double dg, double *stddev)
1904 int i, j;
1905 double M;
1906 double sigmafact = 0.;
1907 double kT, beta;
1908 double Wfac1, Wfac2;
1909 double n1, n2;
1911 kT = BOLTZ*temp;
1912 beta = 1/kT;
1914 /* count the numbers of samples */
1915 n1 = ca->ntot;
1916 n2 = cb->ntot;
1918 /* to ensure the work values are the same as during the delta_G */
1919 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1920 if (ca->foreign_lambda->dhdl < 0)
1922 /* this is the case when the delta U were calculated directly
1923 (i.e. we're not scaling dhdl) */
1924 Wfac1 = beta;
1925 Wfac2 = beta;
1927 else
1929 /* we're using dhdl, so delta_lambda needs to be a
1930 multiplication factor. */
1931 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1932 ca->native_lambda);
1933 Wfac1 = beta*delta_lambda;
1934 Wfac2 = -beta*delta_lambda;
1937 M = std::log(n1/n2);
1940 /* calculate average in both directions */
1941 for (i = 0; i < ca->nsamples; i++)
1943 samples_t *s = ca->s[i];
1944 sample_range_t *r = &(ca->r[i]);
1945 if (r->use)
1947 if (!s->hist)
1949 for (j = r->start; j < r->end; j++)
1951 sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1954 else
1956 /* normalization factor multiplied with bin width and
1957 number of samples (we normalize through M): */
1958 double normdx = 1.;
1959 double dx;
1960 int hd = 0; /* histogram direction */
1961 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1963 hd = 1;
1965 dx = s->hist->dx[hd];
1967 for (j = 0; j < s->hist->nbin[0]; j++)
1969 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1970 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1972 sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1977 for (i = 0; i < cb->nsamples; i++)
1979 samples_t *s = cb->s[i];
1980 sample_range_t *r = &(cb->r[i]);
1981 if (r->use)
1983 if (!s->hist)
1985 for (j = r->start; j < r->end; j++)
1987 sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1990 else
1992 /* normalization factor multiplied with bin width and
1993 number of samples (we normalize through M): */
1994 double normdx = 1.;
1995 double dx;
1996 int hd = 0; /* histogram direction */
1997 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1999 hd = 1;
2001 dx = s->hist->dx[hd];
2003 for (j = 0; j < s->hist->nbin[0]; j++)
2005 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2006 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2008 sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2014 sigmafact /= (n1 + n2);
2017 /* Eq. 10 from
2018 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2019 *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2024 static void calc_bar(barres_t *br, double tol,
2025 int npee_min, int npee_max, gmx_bool *bEE,
2026 double *partsum)
2028 int npee, p;
2029 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2030 for calculated quantities */
2031 double temp = br->a->temp;
2032 int i;
2033 double dg_min, dg_max;
2034 gmx_bool have_hist = FALSE;
2036 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2038 br->dg_disc_err = 0.;
2039 br->dg_histrange_err = 0.;
2041 /* check if there are histograms */
2042 for (i = 0; i < br->a->nsamples; i++)
2044 if (br->a->r[i].use && br->a->s[i]->hist)
2046 have_hist = TRUE;
2047 break;
2050 if (!have_hist)
2052 for (i = 0; i < br->b->nsamples; i++)
2054 if (br->b->r[i].use && br->b->s[i]->hist)
2056 have_hist = TRUE;
2057 break;
2062 /* calculate histogram-specific errors */
2063 if (have_hist)
2065 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2066 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2068 if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2070 /* the histogram range error is the biggest of the differences
2071 between the best estimate and the extremes */
2072 br->dg_histrange_err = std::abs(dg_max - dg_min);
2074 br->dg_disc_err = 0.;
2075 for (i = 0; i < br->a->nsamples; i++)
2077 if (br->a->s[i]->hist)
2079 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2082 for (i = 0; i < br->b->nsamples; i++)
2084 if (br->b->s[i]->hist)
2086 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2090 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2092 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2094 dg_sig2 = 0;
2095 sa_sig2 = 0;
2096 sb_sig2 = 0;
2097 stddev_sig2 = 0;
2099 *bEE = TRUE;
2101 sample_coll_t ca, cb;
2103 /* initialize the samples */
2104 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2105 br->a->temp);
2106 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2107 br->b->temp);
2109 for (npee = npee_min; npee <= npee_max; npee++)
2111 double dgs = 0;
2112 double dgs2 = 0;
2113 double dsa = 0;
2114 double dsb = 0;
2115 double dsa2 = 0;
2116 double dsb2 = 0;
2117 double dstddev = 0;
2118 double dstddev2 = 0;
2121 for (p = 0; p < npee; p++)
2123 double dgp;
2124 double stddevc;
2125 double sac, sbc;
2126 gmx_bool cac, cbc;
2128 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2129 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2131 if (!cac || !cbc)
2133 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2134 *bEE = FALSE;
2135 if (cac)
2137 sample_coll_destroy(&ca);
2139 if (cbc)
2141 sample_coll_destroy(&cb);
2143 return;
2146 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2147 dgs += dgp;
2148 dgs2 += dgp*dgp;
2150 partsum[npee*(npee_max+1)+p] += dgp;
2152 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2153 dsa += sac;
2154 dsa2 += sac*sac;
2155 dsb += sbc;
2156 dsb2 += sbc*sbc;
2157 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2159 dstddev += stddevc;
2160 dstddev2 += stddevc*stddevc;
2162 sample_coll_destroy(&ca);
2163 sample_coll_destroy(&cb);
2165 dgs /= npee;
2166 dgs2 /= npee;
2167 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2169 dsa /= npee;
2170 dsa2 /= npee;
2171 dsb /= npee;
2172 dsb2 /= npee;
2173 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2174 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2176 dstddev /= npee;
2177 dstddev2 /= npee;
2178 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2180 br->dg_err = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2181 br->sa_err = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2182 br->sb_err = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2183 br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2188 static double bar_err(int nbmin, int nbmax, const double *partsum)
2190 int nb, b;
2191 double svar, s, s2, dg;
2193 svar = 0;
2194 for (nb = nbmin; nb <= nbmax; nb++)
2196 s = 0;
2197 s2 = 0;
2198 for (b = 0; b < nb; b++)
2200 dg = partsum[nb*(nbmax+1)+b];
2201 s += dg;
2202 s2 += dg*dg;
2204 s /= nb;
2205 s2 /= nb;
2206 svar += (s2 - s*s)/(nb - 1);
2209 return std::sqrt(svar/(nbmax + 1 - nbmin));
2213 /* Seek the end of an identifier (consecutive non-spaces), followed by
2214 an optional number of spaces or '='-signs. Returns a pointer to the
2215 first non-space value found after that. Returns NULL if the string
2216 ends before that.
2218 static const char *find_value(const char *str)
2220 gmx_bool name_end_found = FALSE;
2222 /* if the string is a NULL pointer, return a NULL pointer. */
2223 if (str == nullptr)
2225 return nullptr;
2227 while (*str != '\0')
2229 /* first find the end of the name */
2230 if (!name_end_found)
2232 if (std::isspace(*str) || (*str == '=') )
2234 name_end_found = TRUE;
2237 else
2239 if (!( std::isspace(*str) || (*str == '=') ))
2241 return str;
2244 str++;
2246 return nullptr;
2250 /* read a vector-notation description of a lambda vector */
2251 static gmx_bool read_lambda_compvec(const char *str,
2252 lambda_vec_t *lv,
2253 const lambda_components_t *lc_in,
2254 lambda_components_t *lc_out,
2255 const char **end,
2256 const char *fn)
2258 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2259 components, or to check them */
2260 gmx_bool start_reached = FALSE; /* whether the start of component names
2261 has been reached */
2262 gmx_bool vector = FALSE; /* whether there are multiple components */
2263 int n = 0; /* current component number */
2264 const char *val_start = nullptr; /* start of the component name, or NULL
2265 if not in a value */
2266 char *strtod_end;
2267 gmx_bool OK = TRUE;
2269 if (end)
2271 *end = str;
2275 if (lc_out && lc_out->N == 0)
2277 initialize_lc = TRUE;
2280 if (lc_in == nullptr)
2282 lc_in = lc_out;
2285 while (true)
2287 if (!start_reached)
2289 if (std::isalnum(*str))
2291 vector = FALSE;
2292 start_reached = TRUE;
2293 val_start = str;
2295 else if (*str == '(')
2297 vector = TRUE;
2298 start_reached = TRUE;
2300 else if (!std::isspace(*str))
2302 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2305 else
2307 if (val_start)
2309 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2311 /* end of value */
2312 if (lv == nullptr)
2314 if (initialize_lc)
2316 lambda_components_add(lc_out, val_start,
2317 (str-val_start));
2319 else
2321 if (!lambda_components_check(lc_out, n, val_start,
2322 (str-val_start)))
2324 return FALSE;
2328 else
2330 /* add a vector component to lv */
2331 lv->val[n] = strtod(val_start, &strtod_end);
2332 if (val_start == strtod_end)
2334 gmx_fatal(FARGS,
2335 "Error reading lambda vector in %s", fn);
2338 /* reset for the next identifier */
2339 val_start = nullptr;
2340 n++;
2341 if (!vector)
2343 return OK;
2347 else if (std::isalnum(*str))
2349 val_start = str;
2351 if (*str == ')')
2353 str++;
2354 if (end)
2356 *end = str;
2358 if (!vector)
2360 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2362 else
2364 GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2365 if (n == lc_in->N)
2367 return OK;
2369 else if (lv == nullptr)
2371 return FALSE;
2373 else
2375 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2376 fn);
2377 return FALSE;
2383 if (*str == '\0')
2385 break;
2387 str++;
2388 if (end)
2390 *end = str;
2393 if (vector)
2395 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2396 return FALSE;
2398 return OK;
2401 /* read and check the component names from a string */
2402 static gmx_bool read_lambda_components(const char *str,
2403 lambda_components_t *lc,
2404 const char **end,
2405 const char *fn)
2407 return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2410 /* read an initialized lambda vector from a string */
2411 static gmx_bool read_lambda_vector(const char *str,
2412 lambda_vec_t *lv,
2413 const char **end,
2414 const char *fn)
2416 return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2421 /* deduce lambda value from legend.
2422 fn = the file name
2423 legend = the legend string
2424 ba = the xvg data
2425 lam = the initialized lambda vector
2426 returns whether to use the data in this set.
2428 static gmx_bool legend2lambda(const char *fn,
2429 const char *legend,
2430 lambda_vec_t *lam)
2432 const char *ptr = nullptr, *ptr2 = nullptr;
2433 gmx_bool ok = FALSE;
2434 gmx_bool bdhdl = FALSE;
2435 const char *tostr = " to ";
2437 if (legend == nullptr)
2439 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2442 /* look for the last 'to': */
2443 ptr2 = legend;
2446 ptr2 = std::strstr(ptr2, tostr);
2447 if (ptr2 != nullptr)
2449 ptr = ptr2;
2450 ptr2++;
2453 while (ptr2 != nullptr && *ptr2 != '\0');
2455 if (ptr)
2457 ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2459 else
2461 /* look for the = sign */
2462 ptr = std::strrchr(legend, '=');
2463 if (!ptr)
2465 /* otherwise look for the last space */
2466 ptr = std::strrchr(legend, ' ');
2470 if (std::strstr(legend, "dH"))
2472 ok = TRUE;
2473 bdhdl = TRUE;
2475 else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2477 ok = TRUE;
2478 bdhdl = FALSE;
2480 else /*if (std::strstr(legend, "pV"))*/
2482 return FALSE;
2484 if (!ptr)
2486 ok = FALSE;
2489 if (!ok)
2491 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2493 if (!bdhdl)
2495 ptr = find_value(ptr);
2496 if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2498 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2501 else
2503 int dhdl_index;
2504 const char *end;
2506 ptr = std::strrchr(legend, '=');
2507 end = ptr;
2508 if (ptr)
2510 /* there must be a component name */
2511 ptr--;
2512 if (ptr < legend)
2514 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2516 /* now backtrack to the start of the identifier */
2517 while (isspace(*ptr))
2519 end = ptr;
2520 ptr--;
2521 if (ptr < legend)
2523 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2526 while (!std::isspace(*ptr))
2528 ptr--;
2529 if (ptr < legend)
2531 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2534 ptr++;
2535 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2536 if (dhdl_index < 0)
2538 char buf[STRLEN];
2539 std::strncpy(buf, ptr, (end-ptr));
2540 buf[(end-ptr)] = '\0';
2541 gmx_fatal(FARGS,
2542 "Did not find lambda component for '%s' in %s",
2543 buf, fn);
2546 else
2548 if (lam->lc->N > 1)
2550 gmx_fatal(FARGS,
2551 "dhdl without component name with >1 lambda component in %s",
2552 fn);
2554 dhdl_index = 0;
2556 lam->dhdl = dhdl_index;
2558 return TRUE;
2561 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2562 lambda_components_t *lc)
2564 gmx_bool bFound;
2565 const char *ptr;
2566 char *end;
2567 double native_lambda;
2569 bFound = FALSE;
2571 /* first check for a state string */
2572 ptr = std::strstr(subtitle, "state");
2573 if (ptr)
2575 int index = -1;
2576 const char *val_end;
2578 /* the new 4.6 style lambda vectors */
2579 ptr = find_value(ptr);
2580 if (ptr)
2582 index = std::strtol(ptr, &end, 10);
2583 if (ptr == end)
2585 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2586 return FALSE;
2588 ptr = end;
2590 else
2592 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2593 return FALSE;
2595 /* now find the lambda vector component names */
2596 while (*ptr != '(' && !std::isalnum(*ptr))
2598 ptr++;
2599 if (*ptr == '\0')
2601 gmx_fatal(FARGS,
2602 "Incomplete lambda vector component data in %s", fn);
2603 return FALSE;
2606 val_end = ptr;
2607 if (!read_lambda_components(ptr, lc, &val_end, fn))
2609 gmx_fatal(FARGS,
2610 "lambda vector components in %s don't match those previously read",
2611 fn);
2613 ptr = find_value(val_end);
2614 if (!ptr)
2616 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2617 return FALSE;
2619 lambda_vec_init(&(ba->native_lambda), lc);
2620 if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2622 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2624 ba->native_lambda.index = index;
2625 bFound = TRUE;
2627 else
2629 /* compatibility mode: check for lambda in other ways. */
2630 /* plain text lambda string */
2631 ptr = std::strstr(subtitle, "lambda");
2632 if (ptr == nullptr)
2634 /* xmgrace formatted lambda string */
2635 ptr = std::strstr(subtitle, "\\xl\\f{}");
2637 if (ptr == nullptr)
2639 /* xmgr formatted lambda string */
2640 ptr = std::strstr(subtitle, "\\8l\\4");
2642 if (ptr != nullptr)
2644 ptr = std::strstr(ptr, "=");
2646 if (ptr != nullptr)
2648 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2649 /* add the lambda component name as an empty string */
2650 if (lc->N > 0)
2652 if (!lambda_components_check(lc, 0, "", 0))
2654 gmx_fatal(FARGS,
2655 "lambda vector components in %s don't match those previously read",
2656 fn);
2659 else
2661 lambda_components_add(lc, "", 0);
2663 lambda_vec_init(&(ba->native_lambda), lc);
2664 ba->native_lambda.val[0] = native_lambda;
2668 return bFound;
2671 static void read_bar_xvg_lowlevel(const char *fn, const real *temp, xvg_t *ba,
2672 lambda_components_t *lc)
2674 int i;
2675 char *subtitle, **legend, *ptr;
2676 int np;
2677 gmx_bool native_lambda_read = FALSE;
2678 char buf[STRLEN];
2680 xvg_init(ba);
2682 ba->filename = fn;
2684 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2685 if (!ba->y)
2687 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2689 /* Reorder the data */
2690 ba->t = ba->y[0];
2691 for (i = 1; i < ba->nset; i++)
2693 ba->y[i-1] = ba->y[i];
2695 ba->nset--;
2697 snew(ba->np, ba->nset);
2698 for (i = 0; i < ba->nset; i++)
2700 ba->np[i] = np;
2703 ba->temp = -1;
2704 if (subtitle != nullptr)
2706 /* try to extract temperature */
2707 ptr = std::strstr(subtitle, "T =");
2708 if (ptr != nullptr)
2710 ptr += 3;
2711 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2713 if (ba->temp <= 0)
2715 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2716 ba->temp, fn);
2721 if (ba->temp < 0)
2723 if (*temp <= 0)
2725 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2727 ba->temp = *temp;
2730 /* Try to deduce lambda from the subtitle */
2731 if (subtitle)
2733 if (subtitle2lambda(subtitle, ba, fn, lc))
2735 native_lambda_read = TRUE;
2738 snew(ba->lambda, ba->nset);
2739 if (legend == nullptr)
2741 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2742 if (ba->nset == 1)
2744 ba->lambda[0] = ba->native_lambda;
2746 else
2748 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2751 else
2753 for (i = 0; i < ba->nset; )
2755 /* Read lambda from the legend */
2756 lambda_vec_init( &(ba->lambda[i]), lc );
2757 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2758 gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2759 if (use)
2761 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2762 i++;
2764 else
2766 int j;
2767 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2768 for (j = i+1; j < ba->nset; j++)
2770 ba->y[j-1] = ba->y[j];
2771 legend[j-1] = legend[j];
2773 ba->nset--;
2778 if (!native_lambda_read)
2780 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2783 if (legend != nullptr)
2785 for (i = 0; i < ba->nset-1; i++)
2787 sfree(legend[i]);
2789 sfree(legend);
2793 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2795 xvg_t *barsim;
2796 samples_t *s;
2797 int i;
2799 snew(barsim, 1);
2801 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2803 if (barsim->nset < 1)
2805 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2808 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2810 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2812 *temp = barsim->temp;
2814 /* now create a series of samples_t */
2815 snew(s, barsim->nset);
2816 for (i = 0; i < barsim->nset; i++)
2818 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2819 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2820 &(barsim->lambda[i])),
2821 fn);
2822 s[i].du = barsim->y[i];
2823 s[i].ndu = barsim->np[i];
2824 s[i].t = barsim->t;
2826 lambda_data_list_insert_sample(sd->lb, s+i);
2829 char buf[STRLEN];
2831 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2832 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2833 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2834 for (i = 0; i < barsim->nset; i++)
2836 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2837 printf(" %s (%d pts)\n", buf, s[i].ndu);
2840 printf("\n\n");
2843 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2844 double start_time, double delta_time,
2845 lambda_vec_t *native_lambda, double temp,
2846 double *last_t, const char *filename)
2848 int i, j;
2849 lambda_vec_t *foreign_lambda;
2850 int type;
2851 samples_t *s; /* convenience pointer */
2852 int startj;
2854 /* check the block types etc. */
2855 if ( (blk->nsub < 3) ||
2856 (blk->sub[0].type != xdr_datatype_int) ||
2857 (blk->sub[1].type != xdr_datatype_double) ||
2859 (blk->sub[2].type != xdr_datatype_float) &&
2860 (blk->sub[2].type != xdr_datatype_double)
2861 ) ||
2862 (blk->sub[0].nr < 1) ||
2863 (blk->sub[1].nr < 1) )
2865 gmx_fatal(FARGS,
2866 "Unexpected/corrupted block data in file %s around time %f.",
2867 filename, start_time);
2870 snew(foreign_lambda, 1);
2871 lambda_vec_init(foreign_lambda, native_lambda->lc);
2872 lambda_vec_copy(foreign_lambda, native_lambda);
2873 type = blk->sub[0].ival[0];
2874 if (type == dhbtDH)
2876 for (i = 0; i < native_lambda->lc->N; i++)
2878 foreign_lambda->val[i] = blk->sub[1].dval[i];
2881 else
2883 if (blk->sub[0].nr > 1)
2885 foreign_lambda->dhdl = blk->sub[0].ival[1];
2887 else
2889 foreign_lambda->dhdl = 0;
2893 if (!*smp)
2895 /* initialize the samples structure if it's empty. */
2896 snew(*smp, 1);
2897 samples_init(*smp, native_lambda, foreign_lambda, temp,
2898 type == dhbtDHDL, filename);
2899 (*smp)->start_time = start_time;
2900 (*smp)->delta_time = delta_time;
2903 /* set convenience pointer */
2904 s = *smp;
2906 /* now double check */
2907 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2909 char buf[STRLEN], buf2[STRLEN];
2910 lambda_vec_print(foreign_lambda, buf, FALSE);
2911 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2912 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2913 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2914 filename, start_time);
2917 /* make room for the data */
2918 if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2920 s->ndu_alloc += (s->ndu_alloc < static_cast<size_t>(blk->sub[2].nr)) ?
2921 blk->sub[2].nr*2 : s->ndu_alloc;
2922 srenew(s->du_alloc, s->ndu_alloc);
2923 s->du = s->du_alloc;
2925 startj = s->ndu;
2926 s->ndu += blk->sub[2].nr;
2927 s->ntot += blk->sub[2].nr;
2928 *ndu = blk->sub[2].nr;
2930 /* and copy the data*/
2931 for (j = 0; j < blk->sub[2].nr; j++)
2933 if (blk->sub[2].type == xdr_datatype_float)
2935 s->du[startj+j] = blk->sub[2].fval[j];
2937 else
2939 s->du[startj+j] = blk->sub[2].dval[j];
2942 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2944 *last_t = start_time + blk->sub[2].nr*delta_time;
2948 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2949 double start_time, double delta_time,
2950 lambda_vec_t *native_lambda, double temp,
2951 double *last_t, const char *filename)
2953 int i, j;
2954 samples_t *s;
2955 int nhist;
2956 lambda_vec_t *foreign_lambda;
2957 int type;
2958 int nbins[2];
2960 /* check the block types etc. */
2961 if ( (blk->nsub < 2) ||
2962 (blk->sub[0].type != xdr_datatype_double) ||
2963 (blk->sub[1].type != xdr_datatype_int64) ||
2964 (blk->sub[0].nr < 2) ||
2965 (blk->sub[1].nr < 2) )
2967 gmx_fatal(FARGS,
2968 "Unexpected/corrupted block data in file %s around time %f",
2969 filename, start_time);
2972 nhist = blk->nsub-2;
2973 if (nhist == 0)
2975 return nullptr;
2977 if (nhist > 2)
2979 gmx_fatal(FARGS,
2980 "Unexpected/corrupted block data in file %s around time %f",
2981 filename, start_time);
2984 snew(s, 1);
2985 *nsamples = 1;
2987 snew(foreign_lambda, 1);
2988 lambda_vec_init(foreign_lambda, native_lambda->lc);
2989 lambda_vec_copy(foreign_lambda, native_lambda);
2990 type = static_cast<int>(blk->sub[1].lval[1]);
2991 if (type == dhbtDH)
2993 double old_foreign_lambda;
2995 old_foreign_lambda = blk->sub[0].dval[0];
2996 if (old_foreign_lambda >= 0)
2998 foreign_lambda->val[0] = old_foreign_lambda;
2999 if (foreign_lambda->lc->N > 1)
3001 gmx_fatal(FARGS,
3002 "Single-component lambda in multi-component file %s",
3003 filename);
3006 else
3008 for (i = 0; i < native_lambda->lc->N; i++)
3010 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3014 else
3016 if (foreign_lambda->lc->N > 1)
3018 if (blk->sub[1].nr < 3 + nhist)
3020 gmx_fatal(FARGS,
3021 "Missing derivative coord in multi-component file %s",
3022 filename);
3024 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3026 else
3028 foreign_lambda->dhdl = 0;
3032 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3033 filename);
3034 snew(s->hist, 1);
3036 for (i = 0; i < nhist; i++)
3038 nbins[i] = blk->sub[i+2].nr;
3041 hist_init(s->hist, nhist, nbins);
3043 for (i = 0; i < nhist; i++)
3045 s->hist->x0[i] = blk->sub[1].lval[2+i];
3046 s->hist->dx[i] = blk->sub[0].dval[1];
3047 if (i == 1)
3049 s->hist->dx[i] = -s->hist->dx[i];
3053 s->hist->start_time = start_time;
3054 s->hist->delta_time = delta_time;
3055 s->start_time = start_time;
3056 s->delta_time = delta_time;
3058 for (i = 0; i < nhist; i++)
3060 int64_t sum = 0;
3062 for (j = 0; j < s->hist->nbin[i]; j++)
3064 int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3066 s->hist->bin[i][j] = binv;
3067 sum += binv;
3070 if (i == 0)
3072 s->ntot = sum;
3073 s->hist->sum = sum;
3075 else
3077 if (s->ntot != sum)
3079 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3080 filename);
3085 if (start_time + s->hist->sum*delta_time > *last_t)
3087 *last_t = start_time + s->hist->sum*delta_time;
3089 return s;
3093 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3095 int i, j;
3096 ener_file_t fp;
3097 t_enxframe *fr;
3098 int nre;
3099 gmx_enxnm_t *enm = nullptr;
3100 double first_t = -1;
3101 double last_t = -1;
3102 samples_t **samples_rawdh = nullptr; /* contains samples for raw delta_h */
3103 int *nhists = nullptr; /* array to keep count & print at end */
3104 int *npts = nullptr; /* array to keep count & print at end */
3105 lambda_vec_t **lambdas = nullptr; /* array to keep count & print at end */
3106 lambda_vec_t *native_lambda;
3107 int nsamples = 0;
3108 lambda_vec_t start_lambda;
3110 fp = open_enx(fn, "r");
3111 do_enxnms(fp, &nre, &enm);
3112 snew(fr, 1);
3114 snew(native_lambda, 1);
3115 start_lambda.lc = nullptr;
3116 start_lambda.val = nullptr;
3118 while (do_enx(fp, fr))
3120 /* count the data blocks */
3121 int nblocks_raw = 0;
3122 int nblocks_hist = 0;
3123 int nlam = 0;
3124 int k;
3125 /* DHCOLL block information: */
3126 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3127 double rtemp = 0;
3129 /* count the blocks and handle collection information: */
3130 for (i = 0; i < fr->nblock; i++)
3132 if (fr->block[i].id == enxDHHIST)
3134 nblocks_hist++;
3136 if (fr->block[i].id == enxDH)
3138 nblocks_raw++;
3140 if (fr->block[i].id == enxDHCOLL)
3142 nlam++;
3143 if ( (fr->block[i].nsub < 1) ||
3144 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3145 (fr->block[i].sub[0].nr < 5))
3147 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3150 /* read the data from the DHCOLL block */
3151 rtemp = fr->block[i].sub[0].dval[0];
3152 start_time = fr->block[i].sub[0].dval[1];
3153 delta_time = fr->block[i].sub[0].dval[2];
3154 old_start_lambda = fr->block[i].sub[0].dval[3];
3155 delta_lambda = fr->block[i].sub[0].dval[4];
3157 if (delta_lambda > 0)
3159 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3161 if ( ( *temp != rtemp) && (*temp > 0) )
3163 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3165 *temp = rtemp;
3167 if (old_start_lambda >= 0)
3169 if (sd->lc.N > 0)
3171 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3173 gmx_fatal(FARGS,
3174 "lambda vector components in %s don't match those previously read",
3175 fn);
3178 else
3180 lambda_components_add(&(sd->lc), "", 0);
3182 if (!start_lambda.lc)
3184 lambda_vec_init(&start_lambda, &(sd->lc));
3186 start_lambda.val[0] = old_start_lambda;
3188 else
3190 /* read lambda vector */
3191 int n_lambda_vec;
3192 gmx_bool check = (sd->lc.N > 0);
3193 if (fr->block[i].nsub < 2)
3195 gmx_fatal(FARGS,
3196 "No lambda vector, but start_lambda=%f\n",
3197 old_start_lambda);
3199 n_lambda_vec = fr->block[i].sub[1].ival[1];
3200 for (j = 0; j < n_lambda_vec; j++)
3202 const char *name =
3203 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3204 if (check)
3206 /* check the components */
3207 lambda_components_check(&(sd->lc), j, name,
3208 std::strlen(name));
3210 else
3212 lambda_components_add(&(sd->lc), name,
3213 std::strlen(name));
3216 lambda_vec_init(&start_lambda, &(sd->lc));
3217 start_lambda.index = fr->block[i].sub[1].ival[0];
3218 for (j = 0; j < n_lambda_vec; j++)
3220 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3223 if (first_t < 0)
3225 first_t = start_time;
3230 if (nlam != 1)
3232 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3234 if (nblocks_raw > 0 && nblocks_hist > 0)
3236 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3239 if (nsamples == 0)
3241 /* this is the first round; allocate the associated data
3242 structures */
3243 /*native_lambda=start_lambda;*/
3244 lambda_vec_init(native_lambda, &(sd->lc));
3245 lambda_vec_copy(native_lambda, &start_lambda);
3246 nsamples = nblocks_raw+nblocks_hist;
3247 snew(nhists, nsamples);
3248 snew(npts, nsamples);
3249 snew(lambdas, nsamples);
3250 snew(samples_rawdh, nsamples);
3251 for (i = 0; i < nsamples; i++)
3253 nhists[i] = 0;
3254 npts[i] = 0;
3255 lambdas[i] = nullptr;
3256 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3257 ones contain values */
3260 else
3262 // nsamples > 0 means this is NOT the first iteration
3264 /* check the native lambda */
3265 if (!lambda_vec_same(&start_lambda, native_lambda) )
3267 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3268 fn, native_lambda->val[0], start_lambda.val[0], start_time);
3270 /* check the number of samples against the previous number */
3271 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3273 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3274 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3276 /* check whether last iterations's end time matches with
3277 the currrent start time */
3278 if ( (std::abs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3280 /* it didn't. We need to store our samples and reallocate */
3281 for (i = 0; i < nsamples; i++)
3283 if (samples_rawdh[i])
3285 /* insert it into the existing list */
3286 lambda_data_list_insert_sample(sd->lb,
3287 samples_rawdh[i]);
3288 /* and make sure we'll allocate a new one this time
3289 around */
3290 samples_rawdh[i] = nullptr;
3296 /* and read them */
3297 k = 0; /* counter for the lambdas, etc. arrays */
3298 for (i = 0; i < fr->nblock; i++)
3300 if (fr->block[i].id == enxDH)
3302 int type = (fr->block[i].sub[0].ival[0]);
3303 if (type == dhbtDH || type == dhbtDHDL)
3305 int ndu;
3306 read_edr_rawdh_block(&(samples_rawdh[k]),
3307 &ndu,
3308 &(fr->block[i]),
3309 start_time, delta_time,
3310 native_lambda, rtemp,
3311 &last_t, fn);
3312 npts[k] += ndu;
3313 if (samples_rawdh[k])
3315 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3317 k++;
3320 else if (fr->block[i].id == enxDHHIST)
3322 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3323 if (type == dhbtDH || type == dhbtDHDL)
3325 int j;
3326 int nb = 0;
3327 samples_t *s; /* this is where the data will go */
3328 s = read_edr_hist_block(&nb, &(fr->block[i]),
3329 start_time, delta_time,
3330 native_lambda, rtemp,
3331 &last_t, fn);
3332 nhists[k] += nb;
3333 if (nb > 0)
3335 lambdas[k] = s->foreign_lambda;
3337 k++;
3338 /* and insert the new sample immediately */
3339 for (j = 0; j < nb; j++)
3341 lambda_data_list_insert_sample(sd->lb, s+j);
3347 /* Now store all our extant sample collections */
3348 for (i = 0; i < nsamples; i++)
3350 if (samples_rawdh[i])
3352 /* insert it into the existing list */
3353 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3359 char buf[STRLEN];
3360 printf("\n");
3361 lambda_vec_print(native_lambda, buf, FALSE);
3362 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3363 fn, first_t, last_t, buf);
3364 for (i = 0; i < nsamples; i++)
3366 if (lambdas[i])
3368 lambda_vec_print(lambdas[i], buf, TRUE);
3369 if (nhists[i] > 0)
3371 printf(" %s (%d hists)\n", buf, nhists[i]);
3373 else
3375 printf(" %s (%d pts)\n", buf, npts[i]);
3380 printf("\n\n");
3381 sfree(npts);
3382 sfree(nhists);
3383 sfree(lambdas);
3387 int gmx_bar(int argc, char *argv[])
3389 static const char *desc[] = {
3390 "[THISMODULE] calculates free energy difference estimates through ",
3391 "Bennett's acceptance ratio method (BAR). It also automatically",
3392 "adds series of individual free energies obtained with BAR into",
3393 "a combined free energy estimate.[PAR]",
3395 "Every individual BAR free energy difference relies on two ",
3396 "simulations at different states: say state A and state B, as",
3397 "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3398 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3399 "average of the Hamiltonian difference of state B given state A and",
3400 "vice versa.",
3401 "The energy differences to the other state must be calculated",
3402 "explicitly during the simulation. This can be done with",
3403 "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3405 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3406 "Two types of input files are supported:",
3408 " * Files with more than one [IT]y[it]-value. ",
3409 " The files should have columns ",
3410 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3411 " The [GRK]lambda[grk] values are inferred ",
3412 " from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3413 " dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3414 " legends of Delta H",
3415 " * Files with only one [IT]y[it]-value. Using the",
3416 " [TT]-extp[tt] option for these files, it is assumed",
3417 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3418 " Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3419 " The [GRK]lambda[grk] value of the simulation is inferred from the ",
3420 " subtitle (if present), otherwise from a number in the subdirectory ",
3421 " in the file name.",
3424 "The [GRK]lambda[grk] of the simulation is parsed from ",
3425 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3426 "foreign [GRK]lambda[grk] values from the legend containing the ",
3427 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3428 "the legend line containing 'T ='.[PAR]",
3430 "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3431 "These can contain either lists of energy differences (see the ",
3432 "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3433 "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3434 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3435 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3437 "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3438 "the energy difference can also be extrapolated from the ",
3439 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3440 "option, which assumes that the system's Hamiltonian depends linearly",
3441 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3443 "The free energy estimates are determined using BAR with bisection, ",
3444 "with the precision of the output set with [TT]-prec[tt]. ",
3445 "An error estimate taking into account time correlations ",
3446 "is made by splitting the data into blocks and determining ",
3447 "the free energy differences over those blocks and assuming ",
3448 "the blocks are independent. ",
3449 "The final error estimate is determined from the average variance ",
3450 "over 5 blocks. A range of block numbers for error estimation can ",
3451 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3453 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3454 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3455 "samples. [BB]Note[bb] that when aggregating energy ",
3456 "differences/derivatives with different sampling intervals, this is ",
3457 "almost certainly not correct. Usually subsequent energies are ",
3458 "correlated and different time intervals mean different degrees ",
3459 "of correlation between samples.[PAR]",
3461 "The results are split in two parts: the last part contains the final ",
3462 "results in kJ/mol, together with the error estimate for each part ",
3463 "and the total. The first part contains detailed free energy ",
3464 "difference estimates and phase space overlap measures in units of ",
3465 "kT (together with their computed error estimate). The printed ",
3466 "values are:",
3468 " * lam_A: the [GRK]lambda[grk] values for point A.",
3469 " * lam_B: the [GRK]lambda[grk] values for point B.",
3470 " * DG: the free energy estimate.",
3471 " * s_A: an estimate of the relative entropy of B in A.",
3472 " * s_B: an estimate of the relative entropy of A in B.",
3473 " * stdev: an estimate expected per-sample standard deviation.",
3476 "The relative entropy of both states in each other's ensemble can be ",
3477 "interpreted as a measure of phase space overlap: ",
3478 "the relative entropy s_A of the work samples of lambda_B in the ",
3479 "ensemble of lambda_A (and vice versa for s_B), is a ",
3480 "measure of the 'distance' between Boltzmann distributions of ",
3481 "the two states, that goes to zero for identical distributions. See ",
3482 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3483 "[PAR]",
3484 "The estimate of the expected per-sample standard deviation, as given ",
3485 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3486 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3487 "of the actual statistical error, because it assumes independent samples).[PAR]",
3489 "To get a visual estimate of the phase space overlap, use the ",
3490 "[TT]-oh[tt] option to write series of histograms, together with the ",
3491 "[TT]-nbin[tt] option.[PAR]"
3493 static real begin = 0, end = -1, temp = -1;
3494 int nd = 2, nbmin = 5, nbmax = 5;
3495 int nbin = 100;
3496 gmx_bool use_dhdl = FALSE;
3497 t_pargs pa[] = {
3498 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3499 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3500 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3501 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3502 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3503 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3504 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3505 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3508 t_filenm fnm[] = {
3509 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3510 { efEDR, "-g", "ener", ffOPTRDMULT },
3511 { efXVG, "-o", "bar", ffOPTWR },
3512 { efXVG, "-oi", "barint", ffOPTWR },
3513 { efXVG, "-oh", "histogram", ffOPTWR }
3515 #define NFILE asize(fnm)
3517 int f;
3518 int nf = 0; /* file counter */
3519 int nfile_tot; /* total number of input files */
3520 sim_data_t sim_data; /* the simulation data */
3521 barres_t *results; /* the results */
3522 int nresults; /* number of results in results array */
3524 double *partsum;
3525 double prec, dg_tot;
3526 FILE *fpb, *fpi;
3527 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3528 char buf[STRLEN], buf2[STRLEN];
3529 char ktformat[STRLEN], sktformat[STRLEN];
3530 char kteformat[STRLEN], skteformat[STRLEN];
3531 gmx_output_env_t *oenv;
3532 double kT;
3533 gmx_bool result_OK = TRUE, bEE = TRUE;
3535 gmx_bool disc_err = FALSE;
3536 double sum_disc_err = 0.; /* discretization error */
3537 gmx_bool histrange_err = FALSE;
3538 double sum_histrange_err = 0.; /* histogram range error */
3539 double stat_err = 0.; /* statistical error */
3541 if (!parse_common_args(&argc, argv,
3542 PCA_CAN_VIEW,
3543 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3545 return 0;
3548 gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3549 gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3551 sim_data_init(&sim_data);
3552 #if 0
3553 /* make linked list */
3554 lb = &lambda_head;
3555 lambda_data_init(lb, 0, 0);
3556 lb->next = lb;
3557 lb->prev = lb;
3558 #endif
3561 nfile_tot = xvgFiles.size() + edrFiles.size();
3563 if (nfile_tot == 0)
3565 gmx_fatal(FARGS, "No input files!");
3568 if (nd < 0)
3570 gmx_fatal(FARGS, "Can not have negative number of digits");
3572 prec = std::pow(10.0, static_cast<double>(-nd));
3574 snew(partsum, (nbmax+1)*(nbmax+1));
3575 nf = 0;
3577 /* read in all files. First xvg files */
3578 for (const std::string &filenm : xvgFiles)
3580 read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3581 nf++;
3583 /* then .edr files */
3584 for (const std::string &filenm : edrFiles)
3586 read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3587 nf++;
3590 /* fix the times to allow for equilibration */
3591 sim_data_impose_times(&sim_data, begin, end);
3593 if (opt2bSet("-oh", NFILE, fnm))
3595 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3598 /* assemble the output structures from the lambdas */
3599 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3601 sum_disc_err = barres_list_max_disc_err(results, nresults);
3603 if (nresults == 0)
3605 printf("\nNo results to calculate.\n");
3606 return 0;
3609 if (sum_disc_err > prec)
3611 prec = sum_disc_err;
3612 nd = static_cast<int>(std::ceil(-std::log10(prec)));
3613 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3617 /*sprintf(lamformat,"%%6.3f");*/
3618 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3619 /* the format strings of the results in kT */
3620 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3621 sprintf( sktformat, "%%%ds", 6+nd);
3622 /* the format strings of the errors in kT */
3623 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3624 sprintf( skteformat, "%%%ds", 4+nd);
3625 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3626 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3630 fpb = nullptr;
3631 if (opt2bSet("-o", NFILE, fnm))
3633 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3634 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3635 "\\lambda", buf, exvggtXYDY, oenv);
3638 fpi = nullptr;
3639 if (opt2bSet("-oi", NFILE, fnm))
3641 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3642 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3643 "\\lambda", buf, oenv);
3648 if (nbmin > nbmax)
3650 nbmin = nbmax;
3653 /* first calculate results */
3654 bEE = TRUE;
3655 disc_err = FALSE;
3656 for (f = 0; f < nresults; f++)
3658 /* Determine the free energy difference with a factor of 10
3659 * more accuracy than requested for printing.
3661 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3662 &bEE, partsum);
3664 if (results[f].dg_disc_err > prec/10.)
3666 disc_err = TRUE;
3668 if (results[f].dg_histrange_err > prec/10.)
3670 histrange_err = TRUE;
3674 /* print results in kT */
3675 kT = BOLTZ*temp;
3677 printf("\nTemperature: %g K\n", temp);
3679 printf("\nDetailed results in kT (see help for explanation):\n\n");
3680 printf("%6s ", " lam_A");
3681 printf("%6s ", " lam_B");
3682 printf(sktformat, "DG ");
3683 if (bEE)
3685 printf(skteformat, "+/- ");
3687 if (disc_err)
3689 printf(skteformat, "disc ");
3691 if (histrange_err)
3693 printf(skteformat, "range ");
3695 printf(sktformat, "s_A ");
3696 if (bEE)
3698 printf(skteformat, "+/- " );
3700 printf(sktformat, "s_B ");
3701 if (bEE)
3703 printf(skteformat, "+/- " );
3705 printf(sktformat, "stdev ");
3706 if (bEE)
3708 printf(skteformat, "+/- ");
3710 printf("\n");
3711 for (f = 0; f < nresults; f++)
3713 lambda_vec_print_short(results[f].a->native_lambda, buf);
3714 printf("%s ", buf);
3715 lambda_vec_print_short(results[f].b->native_lambda, buf);
3716 printf("%s ", buf);
3717 printf(ktformat, results[f].dg);
3718 printf(" ");
3719 if (bEE)
3721 printf(kteformat, results[f].dg_err);
3722 printf(" ");
3724 if (disc_err)
3726 printf(kteformat, results[f].dg_disc_err);
3727 printf(" ");
3729 if (histrange_err)
3731 printf(kteformat, results[f].dg_histrange_err);
3732 printf(" ");
3734 printf(ktformat, results[f].sa);
3735 printf(" ");
3736 if (bEE)
3738 printf(kteformat, results[f].sa_err);
3739 printf(" ");
3741 printf(ktformat, results[f].sb);
3742 printf(" ");
3743 if (bEE)
3745 printf(kteformat, results[f].sb_err);
3746 printf(" ");
3748 printf(ktformat, results[f].dg_stddev);
3749 printf(" ");
3750 if (bEE)
3752 printf(kteformat, results[f].dg_stddev_err);
3754 printf("\n");
3756 /* Check for negative relative entropy with a 95% certainty. */
3757 if (results[f].sa < -2*results[f].sa_err ||
3758 results[f].sb < -2*results[f].sb_err)
3760 result_OK = FALSE;
3764 if (!result_OK)
3766 printf("\nWARNING: Some of these results violate the Second Law of "
3767 "Thermodynamics: \n"
3768 " This is can be the result of severe undersampling, or "
3769 "(more likely)\n"
3770 " there is something wrong with the simulations.\n");
3774 /* final results in kJ/mol */
3775 printf("\n\nFinal results in kJ/mol:\n\n");
3776 dg_tot = 0;
3777 for (f = 0; f < nresults; f++)
3780 if (fpi != nullptr)
3782 lambda_vec_print_short(results[f].a->native_lambda, buf);
3783 fprintf(fpi, xvg2format, buf, dg_tot);
3787 if (fpb != nullptr)
3789 lambda_vec_print_intermediate(results[f].a->native_lambda,
3790 results[f].b->native_lambda,
3791 buf);
3793 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3796 printf("point ");
3797 lambda_vec_print_short(results[f].a->native_lambda, buf);
3798 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3799 printf("%s - %s", buf, buf2);
3800 printf(", DG ");
3802 printf(dgformat, results[f].dg*kT);
3803 if (bEE)
3805 printf(" +/- ");
3806 printf(dgformat, results[f].dg_err*kT);
3808 if (histrange_err)
3810 printf(" (max. range err. = ");
3811 printf(dgformat, results[f].dg_histrange_err*kT);
3812 printf(")");
3813 sum_histrange_err += results[f].dg_histrange_err*kT;
3816 printf("\n");
3817 dg_tot += results[f].dg;
3819 printf("\n");
3820 printf("total ");
3821 lambda_vec_print_short(results[0].a->native_lambda, buf);
3822 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3823 printf("%s - %s", buf, buf2);
3824 printf(", DG ");
3826 printf(dgformat, dg_tot*kT);
3827 if (bEE)
3829 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3830 printf(" +/- ");
3831 printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3833 printf("\n");
3834 if (disc_err)
3836 printf("\nmaximum discretization error = ");
3837 printf(dgformat, sum_disc_err);
3838 if (bEE && stat_err < sum_disc_err)
3840 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3843 if (histrange_err)
3845 printf("\nmaximum histogram range error = ");
3846 printf(dgformat, sum_histrange_err);
3847 if (bEE && stat_err < sum_histrange_err)
3849 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3853 printf("\n");
3856 if (fpi != nullptr)
3858 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3859 fprintf(fpi, xvg2format, buf, dg_tot);
3860 xvgrclose(fpi);
3862 if (fpb != nullptr)
3864 xvgrclose(fpb);
3867 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3868 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3870 return 0;