src/gromacs/gmxana/gmx_bar.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include <cctype>
  38 #include <cmath>
  39 #include <cstdlib>
  40 #include <cstring>
  41
  42 #include <algorithm>
  43 #include <limits>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "gromacs/commandline/viewit.h"
  47 #include "gromacs/fileio/enxio.h"
  48 #include "gromacs/fileio/xvgr.h"
  49 #include "gromacs/gmxana/gmx_ana.h"
  50 #include "gromacs/math/units.h"
  51 #include "gromacs/math/utilities.h"
  52 #include "gromacs/mdlib/energyoutput.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/trajectory/energyframe.h"
  55 #include "gromacs/utility/arraysize.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/dir_separator.h"
  58 #include "gromacs/utility/fatalerror.h"
  59 #include "gromacs/utility/gmxassert.h"
  60 #include "gromacs/utility/smalloc.h"
  61 #include "gromacs/utility/snprintf.h"
  62
  63
  64 /* Structure for the names of lambda vector components */
  65 typedef struct lambda_components_t
  66 {
  67     char **names;  /* Array of strings with names for the lambda vector
  68                       components */
  69     int    N;      /* The number of components */
  70     int    Nalloc; /* The number of allocated components */
  71 } lambda_components_t;
  72
  73 /* Structure for a lambda vector or a dhdl derivative direction */
  74 typedef struct lambda_vec_t
  75 {
  76     double                    *val;   /* The lambda vector component values. Only valid if
  77                                          dhdl == -1 */
  78     int                        dhdl;  /* The coordinate index for the derivative described by this
  79                                          structure, or -1 */
  80     const lambda_components_t *lc;    /* the associated lambda_components
  81                                          structure */
  82     int                        index; /* The state number (init-lambda-state) of this lambda
  83                                          vector, if known. If not, it is set to -1 */
  84 } lambda_vec_t;
  85
  86 /* the dhdl.xvg data from a simulation */
  87 typedef struct xvg_t
  88 {
  89     const char   *filename;
  90     int           ftp;           /* file type */
  91     int           nset;          /* number of lambdas, including dhdl */
  92     int          *np;            /* number of data points (du or hists) per lambda */
  93     int           np_alloc;      /* number of points (du or hists) allocated */
  94     double        temp;          /* temperature */
  95     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  96     double       *t;             /* the times (of second index for y) */
  97     double      **y;             /* the dU values. y[0] holds the derivative, while
  98                                     further ones contain the energy differences between
  99                                     the native lambda and the 'foreign' lambdas. */
 100     lambda_vec_t  native_lambda; /* the native lambda */
 101
 102     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 103 } xvg_t;
 104
 105
 106 typedef struct hist_t
 107 {
 108     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 109     double          dx[2];                  /* the histogram spacing. The reverse
 110                                                dx is the negative of the forward dx.*/
 111     int64_t         x0[2];                  /* the (forward + reverse) histogram start
 112                                                        point(s) as int */
 113
 114     int             nbin[2];                /* the (forward+reverse) number of bins */
 115     int64_t         sum;                    /* the total number of counts. Must be
 116                                                        the same for forward + reverse.  */
 117     int             nhist;                  /* number of hist datas (forward or reverse) */
 118
 119     double          start_time, delta_time; /* start time, end time of histogram */
 120 } hist_t;
 121
 122
 123 /* an aggregate of samples for partial free energy calculation */
 124 typedef struct samples_t
 125 {
 126     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 127     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 128     double        temp;           /* the temperature */
 129     gmx_bool      derivative;     /* whether this sample is a derivative */
 130
 131     /* The samples come either as either delta U lists: */
 132     int     ndu;                    /* the number of delta U samples */
 133     double *du;                     /* the delta u's */
 134     double *t;                      /* the times associated with those samples, or: */
 135     double  start_time, delta_time; /*start time and delta time for linear time*/
 136
 137     /* or as histograms: */
 138     hist_t *hist; /* a histogram */
 139
 140     /* allocation data: (not NULL for data 'owned' by this struct) */
 141     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 142     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 143     hist_t         *hist_alloc;          /* allocated hist */
 144
 145     int64_t         ntot;                /* total number of samples */
 146     const char     *filename;            /* the file name this sample comes from */
 147 } samples_t;
 148
 149 /* a sample range (start to end for du-style data, or boolean
 150     for both du-style data and histograms */
 151 typedef struct sample_range_t
 152 {
 153     int        start, end; /* start and end index for du style data */
 154     gmx_bool   use;        /* whether to use this sample */
 155
 156     samples_t *s;          /* the samples this range belongs to */
 157 } sample_range_t;
 158
 159
 160 /* a collection of samples for a partial free energy calculation
 161     (i.e. the collection of samples from one native lambda to one
 162     foreign lambda) */
 163 typedef struct sample_coll_t
 164 {
 165     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 166                                           in the histogram */
 167     lambda_vec_t   *foreign_lambda;    /* collection */
 168     double          temp;              /* the temperature */
 169
 170     int             nsamples;          /* the number of samples */
 171     samples_t     **s;                 /* the samples themselves */
 172     sample_range_t *r;                 /* the sample ranges */
 173     int             nsamples_alloc;    /* number of allocated samples */
 174
 175     int64_t         ntot;              /* total number of samples in the ranges of
 176                                                   this collection */
 177
 178     struct sample_coll_t *next, *prev; /* next and previous in the list */
 179 } sample_coll_t;
 180
 181 /* all the samples associated with a lambda point */
 182 typedef struct lambda_data_t
 183 {
 184     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 185     double                temp;        /* temperature */
 186
 187     sample_coll_t        *sc;          /* the samples */
 188
 189     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 190
 191     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 192 } lambda_data_t;
 193
 194 /* Top-level data structure of simulation data */
 195 typedef struct sim_data_t
 196 {
 197     lambda_data_t      *lb;      /* a lambda data linked list */
 198     lambda_data_t       lb_head; /* The head element of the linked list */
 199
 200     lambda_components_t lc;      /* the allowed components of the lambda
 201                                     vectors */
 202 } sim_data_t;
 203
 204 /* Top-level data structure with calculated values. */
 205 typedef struct {
 206     sample_coll_t *a, *b;            /* the simulation data */
 207
 208     double         dg;               /* the free energy difference */
 209     double         dg_err;           /* the free energy difference */
 210
 211     double         dg_disc_err;      /* discretization error */
 212     double         dg_histrange_err; /* histogram range error */
 213
 214     double         sa;               /* relative entropy of b in state a */
 215     double         sa_err;           /* error in sa */
 216     double         sb;               /* relative entropy of a in state b */
 217     double         sb_err;           /* error in sb */
 218
 219     double         dg_stddev;        /* expected dg stddev per sample */
 220     double         dg_stddev_err;    /* error in dg_stddev */
 221 } barres_t;
 222
 223
 224 /* Initialize a lambda_components structure */
 225 static void lambda_components_init(lambda_components_t *lc)
 226 {
 227     lc->N      = 0;
 228     lc->Nalloc = 2;
 229     snew(lc->names, lc->Nalloc);
 230 }
 231
 232 /* Add a component to a lambda_components structure */
 233 static void lambda_components_add(lambda_components_t *lc,
 234                                   const char *name, size_t name_length)
 235 {
 236     while (lc->N + 1 > lc->Nalloc)
 237     {
 238         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 239         srenew(lc->names, lc->Nalloc);
 240     }
 241     snew(lc->names[lc->N], name_length+1);
 242     std::strncpy(lc->names[lc->N], name, name_length);
 243     lc->N++;
 244 }
 245
 246 /* check whether a component with index 'index' matches the given name, or
 247    is also NULL. Returns TRUE if this is the case.
 248    the string name does not need to end */
 249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 250                                         int                        index,
 251                                         const char                *name,
 252                                         size_t                     name_length)
 253 {
 254     size_t len;
 255     if (!lc || index >= lc->N)
 256     {
 257         return FALSE;
 258     }
 259     if (name == nullptr && lc->names[index] == nullptr)
 260     {
 261         return TRUE;
 262     }
 263     if ((name == nullptr) != (lc->names[index] == nullptr))
 264     {
 265         return FALSE;
 266     }
 267     len = std::strlen(lc->names[index]);
 268     if (len != name_length)
 269     {
 270         return FALSE;
 271     }
 272     return std::strncmp(lc->names[index], name, name_length) == 0;
 273 }
 274
 275 /* Find the index of a given lambda component name, or -1 if not found */
 276 static int lambda_components_find(const lambda_components_t *lc,
 277                                   const char                *name,
 278                                   size_t                     name_length)
 279 {
 280     int i;
 281
 282     for (i = 0; i < lc->N; i++)
 283     {
 284         if (std::strncmp(lc->names[i], name, name_length) == 0)
 285         {
 286             return i;
 287         }
 288     }
 289     return -1;
 290 }
 291
 292
 293
 294 /* initialize a lambda vector */
 295 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 296 {
 297     snew(lv->val, lc->N);
 298     lv->index = -1;
 299     lv->dhdl  = -1;
 300     lv->lc    = lc;
 301 }
 302
 303 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 304 {
 305     int i;
 306
 307     lambda_vec_init(lv, orig->lc);
 308     lv->dhdl  = orig->dhdl;
 309     lv->index = orig->index;
 310     for (i = 0; i < lv->lc->N; i++)
 311     {
 312         lv->val[i] = orig->val[i];
 313     }
 314 }
 315
 316 /* write a lambda vec to a preallocated string */
 317 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 318 {
 319     int    i;
 320
 321     str[0] = 0; /* reset the string */
 322     if (lv->dhdl < 0)
 323     {
 324         if (named)
 325         {
 326             str += sprintf(str, "delta H to ");
 327         }
 328         if (lv->lc->N > 1)
 329         {
 330             str += sprintf(str, "(");
 331         }
 332         for (i = 0; i < lv->lc->N; i++)
 333         {
 334             str += sprintf(str, "%g", lv->val[i]);
 335             if (i < lv->lc->N-1)
 336             {
 337                 str += sprintf(str, ", ");
 338             }
 339         }
 340         if (lv->lc->N > 1)
 341         {
 342             sprintf(str, ")");
 343         }
 344     }
 345     else
 346     {
 347         /* this lambda vector describes a derivative */
 348         str += sprintf(str, "dH/dl");
 349         if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
 350         {
 351             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 352         }
 353     }
 354 }
 355
 356 /* write a shortened version of the lambda vec to a preallocated string */
 357 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 358 {
 359     if (lv->index >= 0)
 360     {
 361         sprintf(str, "%6d", lv->index);
 362     }
 363     else
 364     {
 365         if (lv->dhdl < 0)
 366         {
 367             sprintf(str, "%6.3f", lv->val[0]);
 368         }
 369         else
 370         {
 371             sprintf(str, "dH/dl[%d]", lv->dhdl);
 372         }
 373     }
 374 }
 375
 376 /* write an intermediate version of two lambda vecs to a preallocated string */
 377 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 378                                           const lambda_vec_t *b, char *str)
 379 {
 380     str[0] = 0;
 381     if ( (a->index >= 0) && (b->index >= 0) )
 382     {
 383         sprintf(str, "%6.3f", (a->index+b->index)/2.0);
 384     }
 385     else
 386     {
 387         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 388         {
 389             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
 390         }
 391     }
 392 }
 393
 394
 395 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 396    a and b must describe non-derivative lambda points */
 397 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 398 {
 399     int    i;
 400     double ret = 0.;
 401
 402     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 403     {
 404         gmx_fatal(FARGS,
 405                   "Trying to calculate the difference between derivatives instead of lambda points");
 406     }
 407     if (a->lc != b->lc)
 408     {
 409         gmx_fatal(FARGS,
 410                   "Trying to calculate the difference lambdas with differing basis set");
 411     }
 412     for (i = 0; i < a->lc->N; i++)
 413     {
 414         double df = a->val[i] - b->val[i];
 415         ret += df*df;
 416     }
 417     return std::sqrt(ret);
 418 }
 419
 420
 421 /* check whether two lambda vectors are the same */
 422 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 423 {
 424     int i;
 425
 426     if (a->lc != b->lc)
 427     {
 428         return FALSE;
 429     }
 430     if (a->dhdl < 0)
 431     {
 432         for (i = 0; i < a->lc->N; i++)
 433         {
 434             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 435             {
 436                 return FALSE;
 437             }
 438         }
 439         return TRUE;
 440     }
 441     else
 442     {
 443         /* they're derivatives, so we check whether the indices match */
 444         return (a->dhdl == b->dhdl);
 445     }
 446 }
 447
 448 /* Compare the sort order of two foreign lambda vectors
 449
 450     returns 1 if a is 'bigger' than b,
 451     returns 0 if they're the same,
 452     returns -1 if a is 'smaller' than b.*/
 453 static int lambda_vec_cmp_foreign(const lambda_vec_t *a,
 454                                   const lambda_vec_t *b)
 455 {
 456     int      i;
 457     double   norm_a    = 0, norm_b = 0;
 458     gmx_bool different = FALSE;
 459
 460     if (a->lc != b->lc)
 461     {
 462         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 463     }
 464     /* if either one has an index we sort based on that */
 465     if ((a->index >= 0) || (b->index >= 0))
 466     {
 467         if (a->index == b->index)
 468         {
 469             return 0;
 470         }
 471         return (a->index > b->index) ? 1 : -1;
 472     }
 473     if (a->dhdl >= 0 || b->dhdl >= 0)
 474     {
 475         /* lambda vectors that are derivatives always sort higher than those
 476            without derivatives */
 477         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 478         {
 479             return (a->dhdl >= 0) ? 1 : -1;
 480         }
 481         return 0;
 482     }
 483
 484     /* neither has an index, so we can only sort on the lambda components,
 485        which is only valid if there is one component */
 486     for (i = 0; i < a->lc->N; i++)
 487     {
 488         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 489         {
 490             different = TRUE;
 491         }
 492         norm_a += a->val[i]*a->val[i];
 493         norm_b += b->val[i]*b->val[i];
 494     }
 495     if (!different)
 496     {
 497         return 0;
 498     }
 499     return (norm_a > norm_b) ? 1 : -1;
 500 }
 501
 502 /* Compare the sort order of two native lambda vectors
 503
 504     returns 1 if a is 'bigger' than b,
 505     returns 0 if they're the same,
 506     returns -1 if a is 'smaller' than b.*/
 507 static int lambda_vec_cmp_native(const lambda_vec_t *a,
 508                                  const lambda_vec_t *b)
 509 {
 510     if (a->lc != b->lc)
 511     {
 512         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 513     }
 514     /* if either one has an index we sort based on that */
 515     if ((a->index >= 0) || (b->index >= 0))
 516     {
 517         if (a->index == b->index)
 518         {
 519             return 0;
 520         }
 521         return (a->index > b->index) ? 1 : -1;
 522     }
 523     /* neither has an index, so we can only sort on the lambda components,
 524        which is only valid if there is one component */
 525     if (a->lc->N > 1)
 526     {
 527         gmx_fatal(FARGS,
 528                   "Can't compare lambdas with no index and > 1 component");
 529     }
 530     if (a->dhdl >= 0 || b->dhdl >= 0)
 531     {
 532         gmx_fatal(FARGS,
 533                   "Can't compare native lambdas that are derivatives");
 534     }
 535     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 536     {
 537         return 0;
 538     }
 539     return a->val[0] > b->val[0] ? 1 : -1;
 540 }
 541
 542
 543
 544
 545 static void hist_init(hist_t *h, int nhist, int *nbin)
 546 {
 547     int i;
 548     if (nhist > 2)
 549     {
 550         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 551     }
 552     for (i = 0; i < nhist; i++)
 553     {
 554         snew(h->bin[i], nbin[i]);
 555         h->x0[i]      = 0;
 556         h->nbin[i]    = nbin[i];
 557         h->start_time = h->delta_time = 0;
 558         h->dx[i]      = 0;
 559     }
 560     h->sum   = 0;
 561     h->nhist = nhist;
 562 }
 563
 564 static void xvg_init(xvg_t *ba)
 565 {
 566     ba->filename = nullptr;
 567     ba->nset     = 0;
 568     ba->np_alloc = 0;
 569     ba->np       = nullptr;
 570     ba->y        = nullptr;
 571 }
 572
 573 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 574                          lambda_vec_t *foreign_lambda, double temp,
 575                          gmx_bool derivative, const char *filename)
 576 {
 577     s->native_lambda  = native_lambda;
 578     s->foreign_lambda = foreign_lambda;
 579     s->temp           = temp;
 580     s->derivative     = derivative;
 581
 582     s->ndu        = 0;
 583     s->du         = nullptr;
 584     s->t          = nullptr;
 585     s->start_time = s->delta_time = 0;
 586     s->hist       = nullptr;
 587     s->du_alloc   = nullptr;
 588     s->t_alloc    = nullptr;
 589     s->hist_alloc = nullptr;
 590     s->ndu_alloc  = 0;
 591     s->nt_alloc   = 0;
 592
 593     s->ntot     = 0;
 594     s->filename = filename;
 595 }
 596
 597 static void sample_range_init(sample_range_t *r, samples_t *s)
 598 {
 599     r->start = 0;
 600     r->end   = s->ndu;
 601     r->use   = TRUE;
 602     r->s     = nullptr;
 603 }
 604
 605 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 606                              lambda_vec_t *foreign_lambda, double temp)
 607 {
 608     sc->native_lambda  = native_lambda;
 609     sc->foreign_lambda = foreign_lambda;
 610     sc->temp           = temp;
 611
 612     sc->nsamples       = 0;
 613     sc->s              = nullptr;
 614     sc->r              = nullptr;
 615     sc->nsamples_alloc = 0;
 616
 617     sc->ntot = 0;
 618     sc->next = sc->prev = nullptr;
 619 }
 620
 621 static void sample_coll_destroy(sample_coll_t *sc)
 622 {
 623     /* don't free the samples themselves */
 624     sfree(sc->r);
 625     sfree(sc->s);
 626 }
 627
 628
 629 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 630                              double temp)
 631 {
 632     l->lambda = native_lambda;
 633     l->temp   = temp;
 634
 635     l->next = nullptr;
 636     l->prev = nullptr;
 637
 638     l->sc = &(l->sc_head);
 639
 640     sample_coll_init(l->sc, native_lambda, nullptr, 0.);
 641     l->sc->next = l->sc;
 642     l->sc->prev = l->sc;
 643 }
 644
 645 static void barres_init(barres_t *br)
 646 {
 647     br->dg            = 0;
 648     br->dg_err        = 0;
 649     br->sa            = 0;
 650     br->sa_err        = 0;
 651     br->sb            = 0;
 652     br->sb_err        = 0;
 653     br->dg_stddev     = 0;
 654     br->dg_stddev_err = 0;
 655
 656     br->a = nullptr;
 657     br->b = nullptr;
 658 }
 659
 660
 661 /* calculate the total number of samples in a sample collection */
 662 static void sample_coll_calc_ntot(sample_coll_t *sc)
 663 {
 664     int i;
 665
 666     sc->ntot = 0;
 667     for (i = 0; i < sc->nsamples; i++)
 668     {
 669         if (sc->r[i].use)
 670         {
 671             if (sc->s[i]->hist)
 672             {
 673                 sc->ntot += sc->s[i]->ntot;
 674             }
 675             else
 676             {
 677                 sc->ntot += sc->r[i].end - sc->r[i].start;
 678             }
 679         }
 680     }
 681 }
 682
 683
 684 /* find the barsamples_t associated with a lambda that corresponds to
 685    a specific foreign lambda */
 686 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 687                                                    lambda_vec_t  *foreign_lambda)
 688 {
 689     sample_coll_t *sc = l->sc->next;
 690
 691     while (sc != l->sc)
 692     {
 693         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 694         {
 695             return sc;
 696         }
 697         sc = sc->next;
 698     }
 699
 700     return nullptr;
 701 }
 702
 703 /* insert li into an ordered list of lambda_colls */
 704 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 705 {
 706     sample_coll_t *scn = l->sc->next;
 707     while ( (scn != l->sc) )
 708     {
 709         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 710         {
 711             break;
 712         }
 713         scn = scn->next;
 714     }
 715     /* now insert it before the found scn */
 716     sc->next        = scn;
 717     sc->prev        = scn->prev;
 718     scn->prev->next = sc;
 719     scn->prev       = sc;
 720 }
 721
 722 /* insert li into an ordered list of lambdas */
 723 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 724 {
 725     lambda_data_t *lc = head->next;
 726     while (lc != head)
 727     {
 728         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 729         {
 730             break;
 731         }
 732         lc = lc->next;
 733     }
 734     /* now insert ourselves before the found lc */
 735     li->next       = lc;
 736     li->prev       = lc->prev;
 737     lc->prev->next = li;
 738     lc->prev       = li;
 739 }
 740
 741 /* insert a sample and a sample_range into a sample_coll. The
 742     samples are stored as a pointer, the range is copied. */
 743 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 744                                       sample_range_t *r)
 745 {
 746     /* first check if it belongs here */
 747     GMX_ASSERT(sc->next->s, "Next not properly initialized!");
 748     if (sc->temp != s->temp)
 749     {
 750         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 751                   s->filename, sc->next->s[0]->filename);
 752     }
 753     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 754     {
 755         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 756                   s->filename, sc->next->s[0]->filename);
 757     }
 758     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 759     {
 760         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 761                   s->filename, sc->next->s[0]->filename);
 762     }
 763
 764     /* check if there's room */
 765     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 766     {
 767         sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
 768         srenew(sc->s, sc->nsamples_alloc);
 769         srenew(sc->r, sc->nsamples_alloc);
 770     }
 771     sc->s[sc->nsamples] = s;
 772     sc->r[sc->nsamples] = *r;
 773     sc->nsamples++;
 774
 775     sample_coll_calc_ntot(sc);
 776 }
 777
 778 /* insert a sample into a lambda_list, creating the right sample_coll if
 779    neccesary */
 780 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 781 {
 782     gmx_bool       found = FALSE;
 783     sample_coll_t *sc;
 784     sample_range_t r;
 785
 786     lambda_data_t *l = head->next;
 787
 788     /* first search for the right lambda_data_t */
 789     while (l != head)
 790     {
 791         if (lambda_vec_same(l->lambda, s->native_lambda) )
 792         {
 793             found = TRUE;
 794             break;
 795         }
 796         l = l->next;
 797     }
 798
 799     if (!found)
 800     {
 801         snew(l, 1);                                     /* allocate a new one */
 802         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 803         lambda_data_insert_lambda(head, l);             /* add it to the list */
 804     }
 805
 806     /* now look for a sample collection */
 807     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 808     if (!sc)
 809     {
 810         snew(sc, 1); /* allocate a new one */
 811         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 812         lambda_data_insert_sample_coll(l, sc);
 813     }
 814
 815     /* now insert the samples into the sample coll */
 816     sample_range_init(&r, s);
 817     sample_coll_insert_sample(sc, s, &r);
 818 }
 819
 820
 821 /* make a histogram out of a sample collection */
 822 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 823                                   int *nbin_alloc, int *nbin,
 824                                   double *dx, double *xmin, int nbin_default)
 825 {
 826     int      i, j, k;
 827     gmx_bool dx_set   = FALSE;
 828     gmx_bool xmin_set = FALSE;
 829
 830     gmx_bool xmax_set      = FALSE;
 831     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 832                                        limits of a histogram */
 833     double   xmax          = -1;
 834
 835     /* first determine dx and xmin; try the histograms */
 836     for (i = 0; i < sc->nsamples; i++)
 837     {
 838         if (sc->s[i]->hist)
 839         {
 840             hist_t *hist = sc->s[i]->hist;
 841             for (k = 0; k < hist->nhist; k++)
 842             {
 843                 double hdx      = hist->dx[k];
 844                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 845
 846                 /* we use the biggest dx*/
 847                 if ( (!dx_set) || hist->dx[0] > *dx)
 848                 {
 849                     dx_set = TRUE;
 850                     *dx    = hist->dx[0];
 851                 }
 852                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 853                 {
 854                     xmin_set = TRUE;
 855                     *xmin    = (hist->x0[k]*hdx);
 856                 }
 857
 858                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 859                 {
 860                     xmax_set = TRUE;
 861                     xmax     = xmax_now;
 862                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 863                     {
 864                         xmax_set_hard = TRUE;
 865                     }
 866                 }
 867                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 868                 {
 869                     xmax_set_hard = TRUE;
 870                     xmax          = xmax_now;
 871                 }
 872             }
 873         }
 874     }
 875     /* and the delta us */
 876     for (i = 0; i < sc->nsamples; i++)
 877     {
 878         if (sc->s[i]->ndu > 0)
 879         {
 880             /* determine min and max */
 881             int    starti  = sc->r[i].start;
 882             int    endi    = sc->r[i].end;
 883             double du_xmin = sc->s[i]->du[starti];
 884             double du_xmax = sc->s[i]->du[starti];
 885             for (j = starti+1; j < endi; j++)
 886             {
 887                 if (sc->s[i]->du[j] < du_xmin)
 888                 {
 889                     du_xmin = sc->s[i]->du[j];
 890                 }
 891                 if (sc->s[i]->du[j] > du_xmax)
 892                 {
 893                     du_xmax = sc->s[i]->du[j];
 894                 }
 895             }
 896
 897             /* and now change the limits */
 898             if ( (!xmin_set) || (du_xmin < *xmin) )
 899             {
 900                 xmin_set = TRUE;
 901                 *xmin    = du_xmin;
 902             }
 903             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 904             {
 905                 xmax_set = TRUE;
 906                 xmax     = du_xmax;
 907             }
 908         }
 909     }
 910
 911     if (!xmax_set || !xmin_set)
 912     {
 913         *nbin = 0;
 914         return;
 915     }
 916
 917
 918     if (!dx_set)
 919     {
 920         *nbin = nbin_default;
 921         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 922                                                be 0, and we count from 0 */
 923     }
 924     else
 925     {
 926         *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
 927     }
 928
 929     if (*nbin > *nbin_alloc)
 930     {
 931         *nbin_alloc = *nbin;
 932         srenew(*bin, *nbin_alloc);
 933     }
 934
 935     /* reset the histogram */
 936     for (i = 0; i < (*nbin); i++)
 937     {
 938         (*bin)[i] = 0;
 939     }
 940
 941     /* now add the actual data */
 942     for (i = 0; i < sc->nsamples; i++)
 943     {
 944         if (sc->s[i]->hist)
 945         {
 946             hist_t *hist = sc->s[i]->hist;
 947             for (k = 0; k < hist->nhist; k++)
 948             {
 949                 double hdx       = hist->dx[k];
 950                 double xmin_hist = hist->x0[k]*hdx;
 951                 for (j = 0; j < hist->nbin[k]; j++)
 952                 {
 953                     /* calculate the bin corresponding to the middle of the
 954                        original bin */
 955                     double x     = hdx*(j+0.5) + xmin_hist;
 956                     int    binnr = static_cast<int>((x-(*xmin))/(*dx));
 957
 958                     if (binnr >= *nbin || binnr < 0)
 959                     {
 960                         binnr = (*nbin)-1;
 961                     }
 962
 963                     (*bin)[binnr] += hist->bin[k][j];
 964                 }
 965             }
 966         }
 967         else
 968         {
 969             int starti = sc->r[i].start;
 970             int endi   = sc->r[i].end;
 971             for (j = starti; j < endi; j++)
 972             {
 973                 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
 974                 if (binnr >= *nbin || binnr < 0)
 975                 {
 976                     binnr = (*nbin)-1;
 977                 }
 978
 979                 (*bin)[binnr]++;
 980             }
 981         }
 982     }
 983 }
 984
 985 /* write a collection of histograms to a file */
 986 static void sim_data_histogram(sim_data_t *sd, const char *filename,
 987                                int nbin_default, const gmx_output_env_t *oenv)
 988 {
 989     char           label_x[STRLEN];
 990     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
 991     const char    *title   = "N(\\DeltaH)";
 992     const char    *label_y = "Samples";
 993     FILE          *fp;
 994     lambda_data_t *bl;
 995     int            nsets     = 0;
 996     char         **setnames  = nullptr;
 997     gmx_bool       first_set = FALSE;
 998     /* histogram data: */
 999     int           *hist       = nullptr;
1000     int            nbin       = 0;
1001     int            nbin_alloc = 0;
1002     double         dx         = 0;
1003     double         minval     = 0;
1004     int            i;
1005     lambda_data_t *bl_head = sd->lb;
1006
1007     printf("\nWriting histogram to %s\n", filename);
1008     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1009
1010     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1011
1012     /* first get all the set names */
1013     bl = bl_head->next;
1014     /* iterate over all lambdas */
1015     while (bl != bl_head)
1016     {
1017         sample_coll_t *sc = bl->sc->next;
1018
1019         /* iterate over all samples */
1020         while (sc != bl->sc)
1021         {
1022             char buf[STRLEN], buf2[STRLEN];
1023
1024             nsets++;
1025             srenew(setnames, nsets);
1026             snew(setnames[nsets-1], STRLEN);
1027             if (sc->foreign_lambda->dhdl < 0)
1028             {
1029                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1030                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1031                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1032                         deltag, lambda, buf2, lambda, buf);
1033             }
1034             else
1035             {
1036                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1037                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1038                         dhdl, lambda, buf);
1039             }
1040             sc = sc->next;
1041         }
1042
1043         bl = bl->next;
1044     }
1045     xvgr_legend(fp, nsets, setnames, oenv);
1046
1047
1048     /* now make the histograms */
1049     bl = bl_head->next;
1050     /* iterate over all lambdas */
1051     while (bl != bl_head)
1052     {
1053         sample_coll_t *sc = bl->sc->next;
1054
1055         /* iterate over all samples */
1056         while (sc != bl->sc)
1057         {
1058             if (!first_set)
1059             {
1060                 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1061             }
1062
1063             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1064                                   nbin_default);
1065
1066             for (i = 0; i < nbin; i++)
1067             {
1068                 double xmin = i*dx + minval;
1069                 double xmax = (i+1)*dx + minval;
1070
1071                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1072             }
1073
1074             first_set = FALSE;
1075             sc        = sc->next;
1076         }
1077
1078         bl = bl->next;
1079     }
1080
1081     if (hist)
1082     {
1083         sfree(hist);
1084     }
1085
1086     xvgrclose(fp);
1087 }
1088
1089 static int
1090 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1091 {
1092     int n = 0;
1093
1094     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1095     if (lambda->index >= 0)
1096     {
1097         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1098     }
1099     if (lambda->dhdl >= 0)
1100     {
1101         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1102     }
1103     else
1104     {
1105         int i;
1106         for (i = 0; i < lambda->lc->N; i++)
1107         {
1108             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1109         }
1110     }
1111     return n;
1112 }
1113
1114 /* create a collection (array) of barres_t object given a ordered linked list
1115    of barlamda_t sample collections */
1116 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1117                                     gmx_bool use_dhdl)
1118 {
1119     lambda_data_t *bl;
1120     int            nlambda = 0;
1121     barres_t      *res;
1122     gmx_bool       dhdl    = FALSE;
1123     gmx_bool       first   = TRUE;
1124     lambda_data_t *bl_head = sd->lb;
1125
1126     /* first count the lambdas */
1127     bl = bl_head->next;
1128     while (bl != bl_head)
1129     {
1130         nlambda++;
1131         bl = bl->next;
1132     }
1133     snew(res, nlambda-1);
1134
1135     /* next put the right samples in the res */
1136     *nres = 0;
1137     bl    = bl_head->next->next; /* we start with the second one. */
1138     while (bl != bl_head)
1139     {
1140         sample_coll_t *sc, *scprev;
1141         barres_t      *br = &(res[*nres]);
1142         /* there is always a previous one. we search for that as a foreign
1143            lambda: */
1144         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1145         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1146
1147         barres_init(br);
1148
1149         if (use_dhdl)
1150         {
1151             /* we use dhdl */
1152
1153             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1154             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1155
1156             if (first)
1157             {
1158                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1159                 dhdl = TRUE;
1160             }
1161             if (!dhdl)
1162             {
1163                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1164             }
1165         }
1166         else if (!scprev && !sc)
1167         {
1168             char descX[STRLEN], descY[STRLEN];
1169             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1170             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1171
1172             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1173         }
1174
1175         /* normal delta H */
1176         if (!scprev)
1177         {
1178             char descX[STRLEN], descY[STRLEN];
1179             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1180             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1181             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1182         }
1183         if (!sc)
1184         {
1185             char descX[STRLEN], descY[STRLEN];
1186             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1187             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1188             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1189         }
1190         br->a = scprev;
1191         br->b = sc;
1192
1193         first = FALSE;
1194         (*nres)++;
1195         bl = bl->next;
1196     }
1197     return res;
1198 }
1199
1200 /* estimate the maximum discretization error */
1201 static double barres_list_max_disc_err(barres_t *res, int nres)
1202 {
1203     int    i, j;
1204     double disc_err = 0.;
1205     double delta_lambda;
1206
1207     for (i = 0; i < nres; i++)
1208     {
1209         barres_t *br = &(res[i]);
1210
1211         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1212                                            br->a->native_lambda);
1213
1214         for (j = 0; j < br->a->nsamples; j++)
1215         {
1216             if (br->a->s[j]->hist)
1217             {
1218                 double Wfac = 1.;
1219                 if (br->a->s[j]->derivative)
1220                 {
1221                     Wfac =  delta_lambda;
1222                 }
1223
1224                 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1225             }
1226         }
1227         for (j = 0; j < br->b->nsamples; j++)
1228         {
1229             if (br->b->s[j]->hist)
1230             {
1231                 double Wfac = 1.;
1232                 if (br->b->s[j]->derivative)
1233                 {
1234                     Wfac =  delta_lambda;
1235                 }
1236                 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1237             }
1238         }
1239     }
1240     return disc_err;
1241 }
1242
1243
1244 /* impose start and end times on a sample collection, updating sample_ranges */
1245 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1246                                      double end_t)
1247 {
1248     int i;
1249     for (i = 0; i < sc->nsamples; i++)
1250     {
1251         samples_t      *s = sc->s[i];
1252         sample_range_t *r = &(sc->r[i]);
1253         if (s->hist)
1254         {
1255             double end_time = s->hist->delta_time*s->hist->sum +
1256                 s->hist->start_time;
1257             if (s->hist->start_time < begin_t || end_time > end_t)
1258             {
1259                 r->use = FALSE;
1260             }
1261         }
1262         else
1263         {
1264             if (!s->t)
1265             {
1266                 double end_time;
1267                 if (s->start_time < begin_t)
1268                 {
1269                     r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1270                 }
1271                 end_time = s->delta_time*s->ndu + s->start_time;
1272                 if (end_time > end_t)
1273                 {
1274                     r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1275                 }
1276             }
1277             else
1278             {
1279                 int j;
1280                 for (j = 0; j < s->ndu; j++)
1281                 {
1282                     if (s->t[j] < begin_t)
1283                     {
1284                         r->start = j;
1285                     }
1286
1287                     if (s->t[j] >= end_t)
1288                     {
1289                         r->end = j;
1290                         break;
1291                     }
1292                 }
1293             }
1294             if (r->start > r->end)
1295             {
1296                 r->use = FALSE;
1297             }
1298         }
1299     }
1300     sample_coll_calc_ntot(sc);
1301 }
1302
1303 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1304 {
1305     double         first_t, last_t;
1306     double         begin_t, end_t;
1307     lambda_data_t *lc;
1308     lambda_data_t *head = sd->lb;
1309     int            j;
1310
1311     if (begin <= 0 && end < 0)
1312     {
1313         return;
1314     }
1315
1316     /* first determine the global start and end times */
1317     first_t = -1;
1318     last_t  = -1;
1319     lc      = head->next;
1320     while (lc != head)
1321     {
1322         sample_coll_t *sc = lc->sc->next;
1323         while (sc != lc->sc)
1324         {
1325             for (j = 0; j < sc->nsamples; j++)
1326             {
1327                 double start_t, end_t;
1328
1329                 start_t = sc->s[j]->start_time;
1330                 end_t   =   sc->s[j]->start_time;
1331                 if (sc->s[j]->hist)
1332                 {
1333                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1334                 }
1335                 else
1336                 {
1337                     if (sc->s[j]->t)
1338                     {
1339                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1340                     }
1341                     else
1342                     {
1343                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1344                     }
1345                 }
1346
1347                 if (start_t < first_t || first_t < 0)
1348                 {
1349                     first_t = start_t;
1350                 }
1351                 if (end_t > last_t)
1352                 {
1353                     last_t = end_t;
1354                 }
1355             }
1356             sc = sc->next;
1357         }
1358         lc = lc->next;
1359     }
1360
1361     /* calculate the actual times */
1362     if (begin > 0)
1363     {
1364         begin_t = begin;
1365     }
1366     else
1367     {
1368         begin_t = first_t;
1369     }
1370
1371     if (end > 0)
1372     {
1373         end_t = end;
1374     }
1375     else
1376     {
1377         end_t = last_t;
1378     }
1379     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1380
1381     if (begin_t > end_t)
1382     {
1383         return;
1384     }
1385     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1386
1387     /* then impose them */
1388     lc = head->next;
1389     while (lc != head)
1390     {
1391         sample_coll_t *sc = lc->sc->next;
1392         while (sc != lc->sc)
1393         {
1394             sample_coll_impose_times(sc, begin_t, end_t);
1395             sc = sc->next;
1396         }
1397         lc = lc->next;
1398     }
1399 }
1400
1401
1402 /* create subsample i out of ni from an existing sample_coll */
1403 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1404                                              sample_coll_t *sc_orig,
1405                                              int i, int ni)
1406 {
1407     int             j;
1408
1409     int64_t         ntot_start;
1410     int64_t         ntot_end;
1411     int64_t         ntot_so_far;
1412
1413     *sc = *sc_orig; /* just copy all fields */
1414
1415     /* allocate proprietary memory */
1416     snew(sc->s, sc_orig->nsamples);
1417     snew(sc->r, sc_orig->nsamples);
1418
1419     /* copy the samples */
1420     for (j = 0; j < sc_orig->nsamples; j++)
1421     {
1422         sc->s[j] = sc_orig->s[j];
1423         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1424     }
1425
1426     /* now fix start and end fields */
1427     /* the casts avoid possible overflows */
1428     ntot_start  = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1429     ntot_end    = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1430     ntot_so_far = 0;
1431     for (j = 0; j < sc->nsamples; j++)
1432     {
1433         int64_t ntot_add;
1434         int64_t new_start, new_end;
1435
1436         if (sc->r[j].use)
1437         {
1438             if (sc->s[j]->hist)
1439             {
1440                 ntot_add = sc->s[j]->hist->sum;
1441             }
1442             else
1443             {
1444                 ntot_add = sc->r[j].end - sc->r[j].start;
1445             }
1446         }
1447         else
1448         {
1449             ntot_add = 0;
1450         }
1451
1452         if (!sc->s[j]->hist)
1453         {
1454             if (ntot_so_far < ntot_start)
1455             {
1456                 /* adjust starting point */
1457                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1458             }
1459             else
1460             {
1461                 new_start = sc->r[j].start;
1462             }
1463             /* adjust end point */
1464             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1465             if (new_end > sc->r[j].end)
1466             {
1467                 new_end = sc->r[j].end;
1468             }
1469
1470             /* check if we're in range at all */
1471             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1472             {
1473                 new_start = 0;
1474                 new_end   = 0;
1475             }
1476             /* and write the new range */
1477             GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1478             GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1479             sc->r[j].start = static_cast<int>(new_start);
1480             sc->r[j].end   = static_cast<int>(new_end);
1481         }
1482         else
1483         {
1484             if (sc->r[j].use)
1485             {
1486                 double overlap;
1487                 double ntot_start_norm, ntot_end_norm;
1488                 /* calculate the amount of overlap of the
1489                    desired range (ntot_start -- ntot_end) onto
1490                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1491
1492                 /* first calculate normalized bounds
1493                    (where 0 is the start of the hist range, and 1 the end) */
1494                 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1495                 ntot_end_norm   = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1496
1497                 /* now fix the boundaries */
1498                 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1499                 ntot_end_norm   = std::max(0.0, std::min(1.0, ntot_end_norm));
1500
1501                 /* and calculate the overlap */
1502                 overlap = ntot_end_norm - ntot_start_norm;
1503
1504                 if (overlap > 0.95) /* we allow for 5% slack */
1505                 {
1506                     sc->r[j].use = TRUE;
1507                 }
1508                 else if (overlap < 0.05)
1509                 {
1510                     sc->r[j].use = FALSE;
1511                 }
1512                 else
1513                 {
1514                     return FALSE;
1515                 }
1516             }
1517         }
1518         ntot_so_far += ntot_add;
1519     }
1520     sample_coll_calc_ntot(sc);
1521
1522     return TRUE;
1523 }
1524
1525 /* calculate minimum and maximum work values in sample collection */
1526 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1527                                 double *Wmin, double *Wmax)
1528 {
1529     int i, j;
1530
1531     *Wmin =  std::numeric_limits<float>::max();
1532     *Wmax = -std::numeric_limits<float>::max();
1533
1534     for (i = 0; i < sc->nsamples; i++)
1535     {
1536         samples_t      *s = sc->s[i];
1537         sample_range_t *r = &(sc->r[i]);
1538         if (r->use)
1539         {
1540             if (!s->hist)
1541             {
1542                 for (j = r->start; j < r->end; j++)
1543                 {
1544                     *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1545                     *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1546                 }
1547             }
1548             else
1549             {
1550                 int    hd = 0; /* determine the histogram direction: */
1551                 double dx;
1552                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1553                 {
1554                     hd = 1;
1555                 }
1556                 dx = s->hist->dx[hd];
1557
1558                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1559                 {
1560                     *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1561                     *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1562                     /* look for the highest value bin with values */
1563                     if (s->hist->bin[hd][j] > 0)
1564                     {
1565                         *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1566                         *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1567                         break;
1568                     }
1569                 }
1570             }
1571         }
1572     }
1573 }
1574
1575 /* Initialize a sim_data structure */
1576 static void sim_data_init(sim_data_t *sd)
1577 {
1578     /* make linked list */
1579     sd->lb       = &(sd->lb_head);
1580     sd->lb->next = sd->lb;
1581     sd->lb->prev = sd->lb;
1582
1583     lambda_components_init(&(sd->lc));
1584 }
1585
1586
1587 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1588 {
1589     int    i;
1590     double sum;
1591
1592     sum = 0;
1593
1594     for (i = 0; i < n; i++)
1595     {
1596         sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1597     }
1598
1599     return sum;
1600 }
1601
1602 /* calculate the BAR average given a histogram
1603
1604     if type== 0, calculate the best estimate for the average,
1605     if type==-1, calculate the minimum possible value given the histogram
1606     if type== 1, calculate the maximum possible value given the histogram */
1607 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1608                                 int type)
1609 {
1610     double sum = 0.;
1611     int    i;
1612     int    maxbin;
1613     /* normalization factor multiplied with bin width and
1614        number of samples (we normalize through M): */
1615     double normdx = 1.;
1616     int    hd     = 0; /* determine the histogram direction: */
1617     double dx;
1618
1619     if ( (hist->nhist > 1) && (Wfac < 0) )
1620     {
1621         hd = 1;
1622     }
1623     dx     = hist->dx[hd];
1624     maxbin = hist->nbin[hd]-1;
1625     if (type == 1)
1626     {
1627         maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1628     }
1629
1630     for (i = 0; i < maxbin; i++)
1631     {
1632         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1633         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1634
1635         sum += pxdx/(1. + std::exp(x + sbMmDG));
1636     }
1637
1638     return sum;
1639 }
1640
1641 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1642                                 double temp, double tol, int type)
1643 {
1644     double kT, beta, M;
1645     int    i;
1646     double Wfac1, Wfac2, Wmin, Wmax;
1647     double DG0, DG1, DG2, dDG1;
1648     double n1, n2; /* numbers of samples as doubles */
1649
1650     kT   = BOLTZ*temp;
1651     beta = 1/kT;
1652
1653     /* count the numbers of samples */
1654     n1 = ca->ntot;
1655     n2 = cb->ntot;
1656
1657     M = std::log(n1/n2);
1658
1659     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1660     if (ca->foreign_lambda->dhdl < 0)
1661     {
1662         /* this is the case when the delta U were calculated directly
1663            (i.e. we're not scaling dhdl) */
1664         Wfac1 = beta;
1665         Wfac2 = beta;
1666     }
1667     else
1668     {
1669         /* we're using dhdl, so delta_lambda needs to be a
1670            multiplication factor.  */
1671         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1672         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1673                                                   ca->native_lambda);
1674         if (cb->native_lambda->lc->N > 1)
1675         {
1676             gmx_fatal(FARGS,
1677                       "Can't (yet) do multi-component dhdl interpolation");
1678         }
1679
1680         Wfac1 =  beta*delta_lambda;
1681         Wfac2 = -beta*delta_lambda;
1682     }
1683
1684     if (beta < 1)
1685     {
1686         /* We print the output both in kT and kJ/mol.
1687          * Here we determine DG in kT, so when beta < 1
1688          * the precision has to be increased.
1689          */
1690         tol *= beta;
1691     }
1692
1693     /* Calculate minimum and maximum work to give an initial estimate of
1694      * delta G  as their average.
1695      */
1696     {
1697         double Wmin1, Wmin2, Wmax1, Wmax2;
1698         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1699         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1700
1701         Wmin = std::min(Wmin1, Wmin2);
1702         Wmax = std::max(Wmax1, Wmax2);
1703     }
1704
1705     DG0 = Wmin;
1706     DG2 = Wmax;
1707
1708     if (debug)
1709     {
1710         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1711     }
1712     /* We approximate by bisection: given our initial estimates
1713        we keep checking whether the halfway point is greater or
1714        smaller than what we get out of the BAR averages.
1715
1716        For the comparison we can use twice the tolerance. */
1717     while (DG2 - DG0 > 2*tol)
1718     {
1719         DG1 = 0.5*(DG0 + DG2);
1720
1721         /* calculate the BAR averages */
1722         dDG1 = 0.;
1723
1724         for (i = 0; i < ca->nsamples; i++)
1725         {
1726             samples_t      *s = ca->s[i];
1727             sample_range_t *r = &(ca->r[i]);
1728             if (r->use)
1729             {
1730                 if (s->hist)
1731                 {
1732                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1733                 }
1734                 else
1735                 {
1736                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1737                                          Wfac1, (M-DG1));
1738                 }
1739             }
1740         }
1741         for (i = 0; i < cb->nsamples; i++)
1742         {
1743             samples_t      *s = cb->s[i];
1744             sample_range_t *r = &(cb->r[i]);
1745             if (r->use)
1746             {
1747                 if (s->hist)
1748                 {
1749                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1750                 }
1751                 else
1752                 {
1753                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1754                                          Wfac2, -(M-DG1));
1755                 }
1756             }
1757         }
1758
1759         if (dDG1 < 0)
1760         {
1761             DG0 = DG1;
1762         }
1763         else
1764         {
1765             DG2 = DG1;
1766         }
1767         if (debug)
1768         {
1769             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1770         }
1771     }
1772
1773     return 0.5*(DG0 + DG2);
1774 }
1775
1776 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1777                              double temp, double dg, double *sa, double *sb)
1778 {
1779     int    i, j;
1780     double W_ab = 0.;
1781     double W_ba = 0.;
1782     double kT, beta;
1783     double Wfac1, Wfac2;
1784     double n1, n2;
1785
1786     kT   = BOLTZ*temp;
1787     beta = 1/kT;
1788
1789     /* count the numbers of samples */
1790     n1 = ca->ntot;
1791     n2 = cb->ntot;
1792
1793     /* to ensure the work values are the same as during the delta_G */
1794     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1795     if (ca->foreign_lambda->dhdl < 0)
1796     {
1797         /* this is the case when the delta U were calculated directly
1798            (i.e. we're not scaling dhdl) */
1799         Wfac1 = beta;
1800         Wfac2 = beta;
1801     }
1802     else
1803     {
1804         /* we're using dhdl, so delta_lambda needs to be a
1805            multiplication factor.  */
1806         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1807                                                   ca->native_lambda);
1808         Wfac1 =  beta*delta_lambda;
1809         Wfac2 = -beta*delta_lambda;
1810     }
1811
1812     /* first calculate the average work in both directions */
1813     for (i = 0; i < ca->nsamples; i++)
1814     {
1815         samples_t      *s = ca->s[i];
1816         sample_range_t *r = &(ca->r[i]);
1817         if (r->use)
1818         {
1819             if (!s->hist)
1820             {
1821                 for (j = r->start; j < r->end; j++)
1822                 {
1823                     W_ab += Wfac1*s->du[j];
1824                 }
1825             }
1826             else
1827             {
1828                 /* normalization factor multiplied with bin width and
1829                    number of samples (we normalize through M): */
1830                 double normdx = 1.;
1831                 double dx;
1832                 int    hd = 0; /* histogram direction */
1833                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1834                 {
1835                     hd = 1;
1836                 }
1837                 dx = s->hist->dx[hd];
1838
1839                 for (j = 0; j < s->hist->nbin[0]; j++)
1840                 {
1841                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1842                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1843                     W_ab += pxdx*x;
1844                 }
1845             }
1846         }
1847     }
1848     W_ab /= n1;
1849
1850     for (i = 0; i < cb->nsamples; i++)
1851     {
1852         samples_t      *s = cb->s[i];
1853         sample_range_t *r = &(cb->r[i]);
1854         if (r->use)
1855         {
1856             if (!s->hist)
1857             {
1858                 for (j = r->start; j < r->end; j++)
1859                 {
1860                     W_ba += Wfac1*s->du[j];
1861                 }
1862             }
1863             else
1864             {
1865                 /* normalization factor multiplied with bin width and
1866                    number of samples (we normalize through M): */
1867                 double normdx = 1.;
1868                 double dx;
1869                 int    hd = 0; /* histogram direction */
1870                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1871                 {
1872                     hd = 1;
1873                 }
1874                 dx = s->hist->dx[hd];
1875
1876                 for (j = 0; j < s->hist->nbin[0]; j++)
1877                 {
1878                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1879                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1880                     W_ba += pxdx*x;
1881                 }
1882             }
1883         }
1884     }
1885     W_ba /= n2;
1886
1887     /* then calculate the relative entropies */
1888     *sa = (W_ab - dg);
1889     *sb = (W_ba + dg);
1890 }
1891
1892 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1893                            double temp, double dg, double *stddev)
1894 {
1895     int    i, j;
1896     double M;
1897     double sigmafact = 0.;
1898     double kT, beta;
1899     double Wfac1, Wfac2;
1900     double n1, n2;
1901
1902     kT   = BOLTZ*temp;
1903     beta = 1/kT;
1904
1905     /* count the numbers of samples */
1906     n1 = ca->ntot;
1907     n2 = cb->ntot;
1908
1909     /* to ensure the work values are the same as during the delta_G */
1910     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1911     if (ca->foreign_lambda->dhdl < 0)
1912     {
1913         /* this is the case when the delta U were calculated directly
1914            (i.e. we're not scaling dhdl) */
1915         Wfac1 = beta;
1916         Wfac2 = beta;
1917     }
1918     else
1919     {
1920         /* we're using dhdl, so delta_lambda needs to be a
1921            multiplication factor.  */
1922         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1923                                                   ca->native_lambda);
1924         Wfac1 =  beta*delta_lambda;
1925         Wfac2 = -beta*delta_lambda;
1926     }
1927
1928     M = std::log(n1/n2);
1929
1930
1931     /* calculate average in both directions */
1932     for (i = 0; i < ca->nsamples; i++)
1933     {
1934         samples_t      *s = ca->s[i];
1935         sample_range_t *r = &(ca->r[i]);
1936         if (r->use)
1937         {
1938             if (!s->hist)
1939             {
1940                 for (j = r->start; j < r->end; j++)
1941                 {
1942                     sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1943                 }
1944             }
1945             else
1946             {
1947                 /* normalization factor multiplied with bin width and
1948                    number of samples (we normalize through M): */
1949                 double normdx = 1.;
1950                 double dx;
1951                 int    hd = 0; /* histogram direction */
1952                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1953                 {
1954                     hd = 1;
1955                 }
1956                 dx = s->hist->dx[hd];
1957
1958                 for (j = 0; j < s->hist->nbin[0]; j++)
1959                 {
1960                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1961                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1962
1963                     sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1964                 }
1965             }
1966         }
1967     }
1968     for (i = 0; i < cb->nsamples; i++)
1969     {
1970         samples_t      *s = cb->s[i];
1971         sample_range_t *r = &(cb->r[i]);
1972         if (r->use)
1973         {
1974             if (!s->hist)
1975             {
1976                 for (j = r->start; j < r->end; j++)
1977                 {
1978                     sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1979                 }
1980             }
1981             else
1982             {
1983                 /* normalization factor multiplied with bin width and
1984                    number of samples (we normalize through M): */
1985                 double normdx = 1.;
1986                 double dx;
1987                 int    hd = 0; /* histogram direction */
1988                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1989                 {
1990                     hd = 1;
1991                 }
1992                 dx = s->hist->dx[hd];
1993
1994                 for (j = 0; j < s->hist->nbin[0]; j++)
1995                 {
1996                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1997                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1998
1999                     sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2000                 }
2001             }
2002         }
2003     }
2004
2005     sigmafact /= (n1 + n2);
2006
2007
2008     /* Eq. 10 from
2009        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2010     *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2011 }
2012
2013
2014
2015 static void calc_bar(barres_t *br, double tol,
2016                      int npee_min, int npee_max, gmx_bool *bEE,
2017                      double *partsum)
2018 {
2019     int      npee, p;
2020     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2021                                                         for calculated quantities */
2022     double   temp = br->a->temp;
2023     int      i;
2024     double   dg_min, dg_max;
2025     gmx_bool have_hist = FALSE;
2026
2027     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2028
2029     br->dg_disc_err      = 0.;
2030     br->dg_histrange_err = 0.;
2031
2032     /* check if there are histograms */
2033     for (i = 0; i < br->a->nsamples; i++)
2034     {
2035         if (br->a->r[i].use && br->a->s[i]->hist)
2036         {
2037             have_hist = TRUE;
2038             break;
2039         }
2040     }
2041     if (!have_hist)
2042     {
2043         for (i = 0; i < br->b->nsamples; i++)
2044         {
2045             if (br->b->r[i].use && br->b->s[i]->hist)
2046             {
2047                 have_hist = TRUE;
2048                 break;
2049             }
2050         }
2051     }
2052
2053     /* calculate histogram-specific errors */
2054     if (have_hist)
2055     {
2056         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2057         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2058
2059         if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2060         {
2061             /* the histogram range  error is the biggest of the differences
2062                between the best estimate and the extremes */
2063             br->dg_histrange_err = std::abs(dg_max - dg_min);
2064         }
2065         br->dg_disc_err = 0.;
2066         for (i = 0; i < br->a->nsamples; i++)
2067         {
2068             if (br->a->s[i]->hist)
2069             {
2070                 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2071             }
2072         }
2073         for (i = 0; i < br->b->nsamples; i++)
2074         {
2075             if (br->b->s[i]->hist)
2076             {
2077                 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2078             }
2079         }
2080     }
2081     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2082
2083     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2084
2085     dg_sig2     = 0;
2086     sa_sig2     = 0;
2087     sb_sig2     = 0;
2088     stddev_sig2 = 0;
2089
2090     *bEE = TRUE;
2091     {
2092         sample_coll_t ca, cb;
2093
2094         /* initialize the samples */
2095         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2096                          br->a->temp);
2097         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2098                          br->b->temp);
2099
2100         for (npee = npee_min; npee <= npee_max; npee++)
2101         {
2102             double dgs      = 0;
2103             double dgs2     = 0;
2104             double dsa      = 0;
2105             double dsb      = 0;
2106             double dsa2     = 0;
2107             double dsb2     = 0;
2108             double dstddev  = 0;
2109             double dstddev2 = 0;
2110
2111
2112             for (p = 0; p < npee; p++)
2113             {
2114                 double   dgp;
2115                 double   stddevc;
2116                 double   sac, sbc;
2117                 gmx_bool cac, cbc;
2118
2119                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2120                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2121
2122                 if (!cac || !cbc)
2123                 {
2124                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2125                     *bEE = FALSE;
2126                     if (cac)
2127                     {
2128                         sample_coll_destroy(&ca);
2129                     }
2130                     if (cbc)
2131                     {
2132                         sample_coll_destroy(&cb);
2133                     }
2134                     return;
2135                 }
2136
2137                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2138                 dgs  += dgp;
2139                 dgs2 += dgp*dgp;
2140
2141                 partsum[npee*(npee_max+1)+p] += dgp;
2142
2143                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2144                 dsa  += sac;
2145                 dsa2 += sac*sac;
2146                 dsb  += sbc;
2147                 dsb2 += sbc*sbc;
2148                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2149
2150                 dstddev  += stddevc;
2151                 dstddev2 += stddevc*stddevc;
2152
2153                 sample_coll_destroy(&ca);
2154                 sample_coll_destroy(&cb);
2155             }
2156             dgs     /= npee;
2157             dgs2    /= npee;
2158             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2159
2160             dsa     /= npee;
2161             dsa2    /= npee;
2162             dsb     /= npee;
2163             dsb2    /= npee;
2164             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2165             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2166
2167             dstddev     /= npee;
2168             dstddev2    /= npee;
2169             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2170         }
2171         br->dg_err        = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2172         br->sa_err        = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2173         br->sb_err        = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2174         br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2175     }
2176 }
2177
2178
2179 static double bar_err(int nbmin, int nbmax, const double *partsum)
2180 {
2181     int    nb, b;
2182     double svar, s, s2, dg;
2183
2184     svar = 0;
2185     for (nb = nbmin; nb <= nbmax; nb++)
2186     {
2187         s  = 0;
2188         s2 = 0;
2189         for (b = 0; b < nb; b++)
2190         {
2191             dg  = partsum[nb*(nbmax+1)+b];
2192             s  += dg;
2193             s2 += dg*dg;
2194         }
2195         s    /= nb;
2196         s2   /= nb;
2197         svar += (s2 - s*s)/(nb - 1);
2198     }
2199
2200     return std::sqrt(svar/(nbmax + 1 - nbmin));
2201 }
2202
2203
2204 /* Seek the end of an identifier (consecutive non-spaces), followed by
2205    an optional number of spaces or '='-signs. Returns a pointer to the
2206    first non-space value found after that. Returns NULL if the string
2207    ends before that.
2208  */
2209 static const char *find_value(const char *str)
2210 {
2211     gmx_bool name_end_found = FALSE;
2212
2213     /* if the string is a NULL pointer, return a NULL pointer. */
2214     if (str == nullptr)
2215     {
2216         return nullptr;
2217     }
2218     while (*str != '\0')
2219     {
2220         /* first find the end of the name */
2221         if (!name_end_found)
2222         {
2223             if (std::isspace(*str) || (*str == '=') )
2224             {
2225                 name_end_found = TRUE;
2226             }
2227         }
2228         else
2229         {
2230             if (!( std::isspace(*str) || (*str == '=') ))
2231             {
2232                 return str;
2233             }
2234         }
2235         str++;
2236     }
2237     return nullptr;
2238 }
2239
2240
2241 /* read a vector-notation description of a lambda vector */
2242 static gmx_bool read_lambda_compvec(const char                *str,
2243                                     lambda_vec_t              *lv,
2244                                     const lambda_components_t *lc_in,
2245                                     lambda_components_t       *lc_out,
2246                                     const char               **end,
2247                                     const char                *fn)
2248 {
2249     gmx_bool    initialize_lc = FALSE;   /* whether to initialize the lambda
2250                                             components, or to check them */
2251     gmx_bool    start_reached = FALSE;   /* whether the start of component names
2252                                             has been reached */
2253     gmx_bool    vector        = FALSE;   /* whether there are multiple components */
2254     int         n             = 0;       /* current component number */
2255     const char *val_start     = nullptr; /* start of the component name, or NULL
2256                                             if not in a value */
2257     char       *strtod_end;
2258     gmx_bool    OK = TRUE;
2259
2260     if (end)
2261     {
2262         *end = str;
2263     }
2264
2265
2266     if (lc_out && lc_out->N == 0)
2267     {
2268         initialize_lc = TRUE;
2269     }
2270
2271     if (lc_in == nullptr)
2272     {
2273         lc_in = lc_out;
2274     }
2275
2276     while (true)
2277     {
2278         if (!start_reached)
2279         {
2280             if (std::isalnum(*str))
2281             {
2282                 vector        = FALSE;
2283                 start_reached = TRUE;
2284                 val_start     = str;
2285             }
2286             else if (*str == '(')
2287             {
2288                 vector        = TRUE;
2289                 start_reached = TRUE;
2290             }
2291             else if (!std::isspace(*str))
2292             {
2293                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2294             }
2295         }
2296         else
2297         {
2298             if (val_start)
2299             {
2300                 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2301                 {
2302                     /* end of value */
2303                     if (lv == nullptr)
2304                     {
2305                         if (initialize_lc)
2306                         {
2307                             lambda_components_add(lc_out, val_start,
2308                                                   (str-val_start));
2309                         }
2310                         else
2311                         {
2312                             if (!lambda_components_check(lc_out, n, val_start,
2313                                                          (str-val_start)))
2314                             {
2315                                 return FALSE;
2316                             }
2317                         }
2318                     }
2319                     else
2320                     {
2321                         /* add a vector component to lv */
2322                         lv->val[n] = strtod(val_start, &strtod_end);
2323                         if (val_start == strtod_end)
2324                         {
2325                             gmx_fatal(FARGS,
2326                                       "Error reading lambda vector in %s", fn);
2327                         }
2328                     }
2329                     /* reset for the next identifier */
2330                     val_start = nullptr;
2331                     n++;
2332                     if (!vector)
2333                     {
2334                         return OK;
2335                     }
2336                 }
2337             }
2338             else if (std::isalnum(*str))
2339             {
2340                 val_start = str;
2341             }
2342             if (*str == ')')
2343             {
2344                 str++;
2345                 if (end)
2346                 {
2347                     *end = str;
2348                 }
2349                 if (!vector)
2350                 {
2351                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2352                 }
2353                 else
2354                 {
2355                     GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2356                     if (n == lc_in->N)
2357                     {
2358                         return OK;
2359                     }
2360                     else if (lv == nullptr)
2361                     {
2362                         return FALSE;
2363                     }
2364                     else
2365                     {
2366                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2367                                   fn);
2368                         return FALSE;
2369                     }
2370
2371                 }
2372             }
2373         }
2374         if (*str == '\0')
2375         {
2376             break;
2377         }
2378         str++;
2379         if (end)
2380         {
2381             *end = str;
2382         }
2383     }
2384     if (vector)
2385     {
2386         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2387         return FALSE;
2388     }
2389     return OK;
2390 }
2391
2392 /* read and check the component names from a string */
2393 static gmx_bool read_lambda_components(const char          *str,
2394                                        lambda_components_t *lc,
2395                                        const char         **end,
2396                                        const char          *fn)
2397 {
2398     return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2399 }
2400
2401 /* read an initialized lambda vector from a string */
2402 static gmx_bool read_lambda_vector(const char   *str,
2403                                    lambda_vec_t *lv,
2404                                    const char  **end,
2405                                    const char   *fn)
2406 {
2407     return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2408 }
2409
2410
2411
2412 /* deduce lambda value from legend.
2413     fn = the file name
2414     legend = the legend string
2415     ba = the xvg data
2416     lam = the initialized lambda vector
2417    returns whether to use the data in this set.
2418  */
2419 static gmx_bool legend2lambda(const char   *fn,
2420                               const char   *legend,
2421                               lambda_vec_t *lam)
2422 {
2423     const char   *ptr    = nullptr, *ptr2 = nullptr;
2424     gmx_bool      ok     = FALSE;
2425     gmx_bool      bdhdl  = FALSE;
2426     const char   *tostr  = " to ";
2427
2428     if (legend == nullptr)
2429     {
2430         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2431     }
2432
2433     /* look for the last 'to': */
2434     ptr2 = legend;
2435     do
2436     {
2437         ptr2 = std::strstr(ptr2, tostr);
2438         if (ptr2 != nullptr)
2439         {
2440             ptr = ptr2;
2441             ptr2++;
2442         }
2443     }
2444     while (ptr2 != nullptr && *ptr2 != '\0');
2445
2446     if (ptr)
2447     {
2448         ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2449     }
2450     else
2451     {
2452         /* look for the = sign */
2453         ptr = std::strrchr(legend, '=');
2454         if (!ptr)
2455         {
2456             /* otherwise look for the last space */
2457             ptr = std::strrchr(legend, ' ');
2458         }
2459     }
2460
2461     if (std::strstr(legend, "dH"))
2462     {
2463         ok    = TRUE;
2464         bdhdl = TRUE;
2465     }
2466     else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2467     {
2468         ok    = TRUE;
2469         bdhdl = FALSE;
2470     }
2471     else /*if (std::strstr(legend, "pV"))*/
2472     {
2473         return FALSE;
2474     }
2475     if (!ptr)
2476     {
2477         ok = FALSE;
2478     }
2479
2480     if (!ok)
2481     {
2482         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2483     }
2484     if (!bdhdl)
2485     {
2486         ptr = find_value(ptr);
2487         if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2488         {
2489             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2490         }
2491     }
2492     else
2493     {
2494         int         dhdl_index;
2495         const char *end;
2496
2497         ptr = std::strrchr(legend, '=');
2498         end = ptr;
2499         if (ptr)
2500         {
2501             /* there must be a component name */
2502             ptr--;
2503             if (ptr < legend)
2504             {
2505                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2506             }
2507             /* now backtrack to the start of the identifier */
2508             while (isspace(*ptr))
2509             {
2510                 end = ptr;
2511                 ptr--;
2512                 if (ptr < legend)
2513                 {
2514                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2515                 }
2516             }
2517             while (!std::isspace(*ptr))
2518             {
2519                 ptr--;
2520                 if (ptr < legend)
2521                 {
2522                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2523                 }
2524             }
2525             ptr++;
2526             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2527             if (dhdl_index < 0)
2528             {
2529                 char buf[STRLEN];
2530                 std::strncpy(buf, ptr, (end-ptr));
2531                 buf[(end-ptr)] = '\0';
2532                 gmx_fatal(FARGS,
2533                           "Did not find lambda component for '%s' in %s",
2534                           buf, fn);
2535             }
2536         }
2537         else
2538         {
2539             if (lam->lc->N > 1)
2540             {
2541                 gmx_fatal(FARGS,
2542                           "dhdl without component name with >1 lambda component in %s",
2543                           fn);
2544             }
2545             dhdl_index = 0;
2546         }
2547         lam->dhdl = dhdl_index;
2548     }
2549     return TRUE;
2550 }
2551
2552 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2553                                 lambda_components_t *lc)
2554 {
2555     gmx_bool    bFound;
2556     const char *ptr;
2557     char       *end;
2558     double      native_lambda;
2559
2560     bFound = FALSE;
2561
2562     /* first check for a state string */
2563     ptr = std::strstr(subtitle, "state");
2564     if (ptr)
2565     {
2566         int         index = -1;
2567         const char *val_end;
2568
2569         /* the new 4.6 style lambda vectors */
2570         ptr = find_value(ptr);
2571         if (ptr)
2572         {
2573             index = std::strtol(ptr, &end, 10);
2574             if (ptr == end)
2575             {
2576                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2577                 return FALSE;
2578             }
2579             ptr = end;
2580         }
2581         else
2582         {
2583             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2584             return FALSE;
2585         }
2586         /* now find the lambda vector component names */
2587         while (*ptr != '(' && !std::isalnum(*ptr))
2588         {
2589             ptr++;
2590             if (*ptr == '\0')
2591             {
2592                 gmx_fatal(FARGS,
2593                           "Incomplete lambda vector component data in %s", fn);
2594                 return FALSE;
2595             }
2596         }
2597         val_end = ptr;
2598         if (!read_lambda_components(ptr, lc, &val_end, fn))
2599         {
2600             gmx_fatal(FARGS,
2601                       "lambda vector components in %s don't match those previously read",
2602                       fn);
2603         }
2604         ptr = find_value(val_end);
2605         if (!ptr)
2606         {
2607             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2608             return FALSE;
2609         }
2610         lambda_vec_init(&(ba->native_lambda), lc);
2611         if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2612         {
2613             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2614         }
2615         ba->native_lambda.index = index;
2616         bFound                  = TRUE;
2617     }
2618     else
2619     {
2620         /* compatibility mode: check for lambda in other ways. */
2621         /* plain text lambda string */
2622         ptr = std::strstr(subtitle, "lambda");
2623         if (ptr == nullptr)
2624         {
2625             /* xmgrace formatted lambda string */
2626             ptr = std::strstr(subtitle, "\\xl\\f{}");
2627         }
2628         if (ptr == nullptr)
2629         {
2630             /* xmgr formatted lambda string */
2631             ptr = std::strstr(subtitle, "\\8l\\4");
2632         }
2633         if (ptr != nullptr)
2634         {
2635             ptr = std::strstr(ptr, "=");
2636         }
2637         if (ptr != nullptr)
2638         {
2639             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2640             /* add the lambda component name as an empty string */
2641             if (lc->N > 0)
2642             {
2643                 if (!lambda_components_check(lc, 0, "", 0))
2644                 {
2645                     gmx_fatal(FARGS,
2646                               "lambda vector components in %s don't match those previously read",
2647                               fn);
2648                 }
2649             }
2650             else
2651             {
2652                 lambda_components_add(lc, "", 0);
2653             }
2654             lambda_vec_init(&(ba->native_lambda), lc);
2655             ba->native_lambda.val[0] = native_lambda;
2656         }
2657     }
2658
2659     return bFound;
2660 }
2661
2662 static void read_bar_xvg_lowlevel(const char *fn, const real *temp, xvg_t *ba,
2663                                   lambda_components_t *lc)
2664 {
2665     int          i;
2666     char        *subtitle, **legend, *ptr;
2667     int          np;
2668     gmx_bool     native_lambda_read = FALSE;
2669     char         buf[STRLEN];
2670
2671     xvg_init(ba);
2672
2673     ba->filename = fn;
2674
2675     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2676     if (!ba->y)
2677     {
2678         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2679     }
2680     /* Reorder the data */
2681     ba->t  = ba->y[0];
2682     for (i = 1; i < ba->nset; i++)
2683     {
2684         ba->y[i-1] = ba->y[i];
2685     }
2686     ba->nset--;
2687
2688     snew(ba->np, ba->nset);
2689     for (i = 0; i < ba->nset; i++)
2690     {
2691         ba->np[i] = np;
2692     }
2693
2694     ba->temp = -1;
2695     if (subtitle != nullptr)
2696     {
2697         /* try to extract temperature */
2698         ptr = std::strstr(subtitle, "T =");
2699         if (ptr != nullptr)
2700         {
2701             ptr += 3;
2702             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2703             {
2704                 if (ba->temp <= 0)
2705                 {
2706                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2707                               ba->temp, fn);
2708                 }
2709             }
2710         }
2711     }
2712     if (ba->temp < 0)
2713     {
2714         if (*temp <= 0)
2715         {
2716             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2717         }
2718         ba->temp = *temp;
2719     }
2720
2721     /* Try to deduce lambda from the subtitle */
2722     if (subtitle)
2723     {
2724         if (subtitle2lambda(subtitle, ba, fn, lc))
2725         {
2726             native_lambda_read = TRUE;
2727         }
2728     }
2729     snew(ba->lambda, ba->nset);
2730     if (legend == nullptr)
2731     {
2732         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2733         if (ba->nset == 1)
2734         {
2735             ba->lambda[0] = ba->native_lambda;
2736         }
2737         else
2738         {
2739             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2740         }
2741     }
2742     else
2743     {
2744         for (i = 0; i < ba->nset; )
2745         {
2746             /* Read lambda from the legend */
2747             lambda_vec_init( &(ba->lambda[i]), lc );
2748             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2749             gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2750             if (use)
2751             {
2752                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2753                 i++;
2754             }
2755             else
2756             {
2757                 int j;
2758                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2759                 for (j = i+1; j < ba->nset; j++)
2760                 {
2761                     ba->y[j-1]  = ba->y[j];
2762                     legend[j-1] = legend[j];
2763                 }
2764                 ba->nset--;
2765             }
2766         }
2767     }
2768
2769     if (!native_lambda_read)
2770     {
2771         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2772     }
2773
2774     if (legend != nullptr)
2775     {
2776         for (i = 0; i < ba->nset-1; i++)
2777         {
2778             sfree(legend[i]);
2779         }
2780         sfree(legend);
2781     }
2782 }
2783
2784 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2785 {
2786     xvg_t     *barsim;
2787     samples_t *s;
2788     int        i;
2789
2790     snew(barsim, 1);
2791
2792     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2793
2794     if (barsim->nset < 1)
2795     {
2796         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2797     }
2798
2799     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2800     {
2801         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2802     }
2803     *temp = barsim->temp;
2804
2805     /* now create a series of samples_t */
2806     snew(s, barsim->nset);
2807     for (i = 0; i < barsim->nset; i++)
2808     {
2809         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2810                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2811                                                    &(barsim->lambda[i])),
2812                      fn);
2813         s[i].du  = barsim->y[i];
2814         s[i].ndu = barsim->np[i];
2815         s[i].t   = barsim->t;
2816
2817         lambda_data_list_insert_sample(sd->lb, s+i);
2818     }
2819     {
2820         char buf[STRLEN];
2821
2822         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2823         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2824                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2825         for (i = 0; i < barsim->nset; i++)
2826         {
2827             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2828             printf("        %s (%d pts)\n", buf, s[i].ndu);
2829         }
2830     }
2831     printf("\n\n");
2832 }
2833
2834 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2835                                  double start_time, double delta_time,
2836                                  lambda_vec_t *native_lambda, double temp,
2837                                  double *last_t, const char *filename)
2838 {
2839     int           i, j;
2840     lambda_vec_t *foreign_lambda;
2841     int           type;
2842     samples_t    *s; /* convenience pointer */
2843     int           startj;
2844
2845     /* check the block types etc. */
2846     if ( (blk->nsub < 3) ||
2847          (blk->sub[0].type != xdr_datatype_int) ||
2848          (blk->sub[1].type != xdr_datatype_double) ||
2849          (
2850              (blk->sub[2].type != xdr_datatype_float) &&
2851              (blk->sub[2].type != xdr_datatype_double)
2852          ) ||
2853          (blk->sub[0].nr < 1) ||
2854          (blk->sub[1].nr < 1) )
2855     {
2856         gmx_fatal(FARGS,
2857                   "Unexpected/corrupted block data in file %s around time %f.",
2858                   filename, start_time);
2859     }
2860
2861     snew(foreign_lambda, 1);
2862     lambda_vec_init(foreign_lambda, native_lambda->lc);
2863     lambda_vec_copy(foreign_lambda, native_lambda);
2864     type = blk->sub[0].ival[0];
2865     if (type == dhbtDH)
2866     {
2867         for (i = 0; i < native_lambda->lc->N; i++)
2868         {
2869             foreign_lambda->val[i] = blk->sub[1].dval[i];
2870         }
2871     }
2872     else
2873     {
2874         if (blk->sub[0].nr > 1)
2875         {
2876             foreign_lambda->dhdl = blk->sub[0].ival[1];
2877         }
2878         else
2879         {
2880             foreign_lambda->dhdl = 0;
2881         }
2882     }
2883
2884     if (!*smp)
2885     {
2886         /* initialize the samples structure if it's empty. */
2887         snew(*smp, 1);
2888         samples_init(*smp, native_lambda, foreign_lambda, temp,
2889                      type == dhbtDHDL, filename);
2890         (*smp)->start_time = start_time;
2891         (*smp)->delta_time = delta_time;
2892     }
2893
2894     /* set convenience pointer */
2895     s = *smp;
2896
2897     /* now double check */
2898     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2899     {
2900         char buf[STRLEN], buf2[STRLEN];
2901         lambda_vec_print(foreign_lambda, buf, FALSE);
2902         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2903         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2904         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2905                   filename, start_time);
2906     }
2907
2908     /* make room for the data */
2909     if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2910     {
2911         s->ndu_alloc += (s->ndu_alloc < static_cast<size_t>(blk->sub[2].nr)) ?
2912             blk->sub[2].nr*2 : s->ndu_alloc;
2913         srenew(s->du_alloc, s->ndu_alloc);
2914         s->du = s->du_alloc;
2915     }
2916     startj   = s->ndu;
2917     s->ndu  += blk->sub[2].nr;
2918     s->ntot += blk->sub[2].nr;
2919     *ndu     = blk->sub[2].nr;
2920
2921     /* and copy the data*/
2922     for (j = 0; j < blk->sub[2].nr; j++)
2923     {
2924         if (blk->sub[2].type == xdr_datatype_float)
2925         {
2926             s->du[startj+j] = blk->sub[2].fval[j];
2927         }
2928         else
2929         {
2930             s->du[startj+j] = blk->sub[2].dval[j];
2931         }
2932     }
2933     if (start_time + blk->sub[2].nr*delta_time > *last_t)
2934     {
2935         *last_t = start_time + blk->sub[2].nr*delta_time;
2936     }
2937 }
2938
2939 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2940                                       double start_time, double delta_time,
2941                                       lambda_vec_t *native_lambda, double temp,
2942                                       double *last_t, const char *filename)
2943 {
2944     int           i, j;
2945     samples_t    *s;
2946     int           nhist;
2947     lambda_vec_t *foreign_lambda;
2948     int           type;
2949     int           nbins[2];
2950
2951     /* check the block types etc. */
2952     if ( (blk->nsub < 2) ||
2953          (blk->sub[0].type != xdr_datatype_double) ||
2954          (blk->sub[1].type != xdr_datatype_int64) ||
2955          (blk->sub[0].nr < 2)  ||
2956          (blk->sub[1].nr < 2) )
2957     {
2958         gmx_fatal(FARGS,
2959                   "Unexpected/corrupted block data in file %s around time %f",
2960                   filename, start_time);
2961     }
2962
2963     nhist = blk->nsub-2;
2964     if (nhist == 0)
2965     {
2966         return nullptr;
2967     }
2968     if (nhist > 2)
2969     {
2970         gmx_fatal(FARGS,
2971                   "Unexpected/corrupted block data in file %s around time %f",
2972                   filename, start_time);
2973     }
2974
2975     snew(s, 1);
2976     *nsamples = 1;
2977
2978     snew(foreign_lambda, 1);
2979     lambda_vec_init(foreign_lambda, native_lambda->lc);
2980     lambda_vec_copy(foreign_lambda, native_lambda);
2981     type = static_cast<int>(blk->sub[1].lval[1]);
2982     if (type == dhbtDH)
2983     {
2984         double old_foreign_lambda;
2985
2986         old_foreign_lambda = blk->sub[0].dval[0];
2987         if (old_foreign_lambda >= 0)
2988         {
2989             foreign_lambda->val[0] = old_foreign_lambda;
2990             if (foreign_lambda->lc->N > 1)
2991             {
2992                 gmx_fatal(FARGS,
2993                           "Single-component lambda in multi-component file %s",
2994                           filename);
2995             }
2996         }
2997         else
2998         {
2999             for (i = 0; i < native_lambda->lc->N; i++)
3000             {
3001                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3002             }
3003         }
3004     }
3005     else
3006     {
3007         if (foreign_lambda->lc->N > 1)
3008         {
3009             if (blk->sub[1].nr < 3 + nhist)
3010             {
3011                 gmx_fatal(FARGS,
3012                           "Missing derivative coord in multi-component file %s",
3013                           filename);
3014             }
3015             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3016         }
3017         else
3018         {
3019             foreign_lambda->dhdl = 0;
3020         }
3021     }
3022
3023     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3024                  filename);
3025     snew(s->hist, 1);
3026
3027     for (i = 0; i < nhist; i++)
3028     {
3029         nbins[i] = blk->sub[i+2].nr;
3030     }
3031
3032     hist_init(s->hist, nhist, nbins);
3033
3034     for (i = 0; i < nhist; i++)
3035     {
3036         s->hist->x0[i] = blk->sub[1].lval[2+i];
3037         s->hist->dx[i] = blk->sub[0].dval[1];
3038         if (i == 1)
3039         {
3040             s->hist->dx[i] = -s->hist->dx[i];
3041         }
3042     }
3043
3044     s->hist->start_time = start_time;
3045     s->hist->delta_time = delta_time;
3046     s->start_time       = start_time;
3047     s->delta_time       = delta_time;
3048
3049     for (i = 0; i < nhist; i++)
3050     {
3051         int64_t     sum = 0;
3052
3053         for (j = 0; j < s->hist->nbin[i]; j++)
3054         {
3055             int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3056
3057             s->hist->bin[i][j] = binv;
3058             sum               += binv;
3059
3060         }
3061         if (i == 0)
3062         {
3063             s->ntot      = sum;
3064             s->hist->sum = sum;
3065         }
3066         else
3067         {
3068             if (s->ntot != sum)
3069             {
3070                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3071                           filename);
3072             }
3073         }
3074     }
3075
3076     if (start_time + s->hist->sum*delta_time > *last_t)
3077     {
3078         *last_t = start_time + s->hist->sum*delta_time;
3079     }
3080     return s;
3081 }
3082
3083
3084 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3085 {
3086     int            i, j;
3087     ener_file_t    fp;
3088     t_enxframe    *fr;
3089     int            nre;
3090     gmx_enxnm_t   *enm           = nullptr;
3091     double         first_t       = -1;
3092     double         last_t        = -1;
3093     samples_t    **samples_rawdh = nullptr; /* contains samples for raw delta_h  */
3094     int           *nhists        = nullptr; /* array to keep count & print at end */
3095     int           *npts          = nullptr; /* array to keep count & print at end */
3096     lambda_vec_t **lambdas       = nullptr; /* array to keep count & print at end */
3097     lambda_vec_t  *native_lambda;
3098     int            nsamples = 0;
3099     lambda_vec_t   start_lambda;
3100
3101     fp = open_enx(fn, "r");
3102     do_enxnms(fp, &nre, &enm);
3103     snew(fr, 1);
3104
3105     snew(native_lambda, 1);
3106     start_lambda.lc  = nullptr;
3107     start_lambda.val = nullptr;
3108
3109     while (do_enx(fp, fr))
3110     {
3111         /* count the data blocks */
3112         int    nblocks_raw  = 0;
3113         int    nblocks_hist = 0;
3114         int    nlam         = 0;
3115         int    k;
3116         /* DHCOLL block information: */
3117         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3118         double rtemp      = 0;
3119
3120         /* count the blocks and handle collection information: */
3121         for (i = 0; i < fr->nblock; i++)
3122         {
3123             if (fr->block[i].id == enxDHHIST)
3124             {
3125                 nblocks_hist++;
3126             }
3127             if (fr->block[i].id == enxDH)
3128             {
3129                 nblocks_raw++;
3130             }
3131             if (fr->block[i].id == enxDHCOLL)
3132             {
3133                 nlam++;
3134                 if ( (fr->block[i].nsub < 1) ||
3135                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3136                      (fr->block[i].sub[0].nr < 5))
3137                 {
3138                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3139                 }
3140
3141                 /* read the data from the DHCOLL block */
3142                 rtemp            = fr->block[i].sub[0].dval[0];
3143                 start_time       = fr->block[i].sub[0].dval[1];
3144                 delta_time       = fr->block[i].sub[0].dval[2];
3145                 old_start_lambda = fr->block[i].sub[0].dval[3];
3146                 delta_lambda     = fr->block[i].sub[0].dval[4];
3147
3148                 if (delta_lambda > 0)
3149                 {
3150                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3151                 }
3152                 if ( ( *temp != rtemp) && (*temp > 0) )
3153                 {
3154                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3155                 }
3156                 *temp = rtemp;
3157
3158                 if (old_start_lambda >= 0)
3159                 {
3160                     if (sd->lc.N > 0)
3161                     {
3162                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3163                         {
3164                             gmx_fatal(FARGS,
3165                                       "lambda vector components in %s don't match those previously read",
3166                                       fn);
3167                         }
3168                     }
3169                     else
3170                     {
3171                         lambda_components_add(&(sd->lc), "", 0);
3172                     }
3173                     if (!start_lambda.lc)
3174                     {
3175                         lambda_vec_init(&start_lambda, &(sd->lc));
3176                     }
3177                     start_lambda.val[0] = old_start_lambda;
3178                 }
3179                 else
3180                 {
3181                     /* read lambda vector */
3182                     int      n_lambda_vec;
3183                     gmx_bool check = (sd->lc.N > 0);
3184                     if (fr->block[i].nsub < 2)
3185                     {
3186                         gmx_fatal(FARGS,
3187                                   "No lambda vector, but start_lambda=%f\n",
3188                                   old_start_lambda);
3189                     }
3190                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3191                     for (j = 0; j < n_lambda_vec; j++)
3192                     {
3193                         const char *name =
3194                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3195                         if (check)
3196                         {
3197                             /* check the components */
3198                             lambda_components_check(&(sd->lc), j, name,
3199                                                     std::strlen(name));
3200                         }
3201                         else
3202                         {
3203                             lambda_components_add(&(sd->lc), name,
3204                                                   std::strlen(name));
3205                         }
3206                     }
3207                     lambda_vec_init(&start_lambda, &(sd->lc));
3208                     start_lambda.index = fr->block[i].sub[1].ival[0];
3209                     for (j = 0; j < n_lambda_vec; j++)
3210                     {
3211                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3212                     }
3213                 }
3214                 if (first_t < 0)
3215                 {
3216                     first_t = start_time;
3217                 }
3218             }
3219         }
3220
3221         if (nlam != 1)
3222         {
3223             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3224         }
3225         if (nblocks_raw > 0 && nblocks_hist > 0)
3226         {
3227             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3228         }
3229
3230         if (nsamples == 0)
3231         {
3232             /* this is the first round; allocate the associated data
3233                structures */
3234             /*native_lambda=start_lambda;*/
3235             lambda_vec_init(native_lambda, &(sd->lc));
3236             lambda_vec_copy(native_lambda, &start_lambda);
3237             nsamples = nblocks_raw+nblocks_hist;
3238             snew(nhists, nsamples);
3239             snew(npts, nsamples);
3240             snew(lambdas, nsamples);
3241             snew(samples_rawdh, nsamples);
3242             for (i = 0; i < nsamples; i++)
3243             {
3244                 nhists[i]        = 0;
3245                 npts[i]          = 0;
3246                 lambdas[i]       = nullptr;
3247                 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3248                                                ones contain values */
3249             }
3250         }
3251         else
3252         {
3253             // nsamples > 0 means this is NOT the first iteration
3254
3255             /* check the native lambda */
3256             if (!lambda_vec_same(&start_lambda, native_lambda) )
3257             {
3258                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3259                           fn, native_lambda->val[0], start_lambda.val[0], start_time);
3260             }
3261             /* check the number of samples against the previous number */
3262             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3263             {
3264                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3265                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3266             }
3267             /* check whether last iterations's end time matches with
3268                the currrent start time */
3269             if ( (std::abs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3270             {
3271                 /* it didn't. We need to store our samples and reallocate */
3272                 for (i = 0; i < nsamples; i++)
3273                 {
3274                     if (samples_rawdh[i])
3275                     {
3276                         /* insert it into the existing list */
3277                         lambda_data_list_insert_sample(sd->lb,
3278                                                        samples_rawdh[i]);
3279                         /* and make sure we'll allocate a new one this time
3280                            around */
3281                         samples_rawdh[i] = nullptr;
3282                     }
3283                 }
3284             }
3285         }
3286
3287         /* and read them */
3288         k = 0; /* counter for the lambdas, etc. arrays */
3289         for (i = 0; i < fr->nblock; i++)
3290         {
3291             if (fr->block[i].id == enxDH)
3292             {
3293                 int type = (fr->block[i].sub[0].ival[0]);
3294                 if (type == dhbtDH || type == dhbtDHDL)
3295                 {
3296                     int ndu;
3297                     read_edr_rawdh_block(&(samples_rawdh[k]),
3298                                          &ndu,
3299                                          &(fr->block[i]),
3300                                          start_time, delta_time,
3301                                          native_lambda, rtemp,
3302                                          &last_t, fn);
3303                     npts[k] += ndu;
3304                     if (samples_rawdh[k])
3305                     {
3306                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3307                     }
3308                     k++;
3309                 }
3310             }
3311             else if (fr->block[i].id == enxDHHIST)
3312             {
3313                 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3314                 if (type == dhbtDH || type == dhbtDHDL)
3315                 {
3316                     int        j;
3317                     int        nb = 0;
3318                     samples_t *s; /* this is where the data will go */
3319                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3320                                             start_time, delta_time,
3321                                             native_lambda, rtemp,
3322                                             &last_t, fn);
3323                     nhists[k] += nb;
3324                     if (nb > 0)
3325                     {
3326                         lambdas[k] = s->foreign_lambda;
3327                     }
3328                     k++;
3329                     /* and insert the new sample immediately */
3330                     for (j = 0; j < nb; j++)
3331                     {
3332                         lambda_data_list_insert_sample(sd->lb, s+j);
3333                     }
3334                 }
3335             }
3336         }
3337     }
3338     /* Now store all our extant sample collections */
3339     for (i = 0; i < nsamples; i++)
3340     {
3341         if (samples_rawdh[i])
3342         {
3343             /* insert it into the existing list */
3344             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3345         }
3346     }
3347
3348
3349     {
3350         char buf[STRLEN];
3351         printf("\n");
3352         lambda_vec_print(native_lambda, buf, FALSE);
3353         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3354                fn, first_t, last_t, buf);
3355         for (i = 0; i < nsamples; i++)
3356         {
3357             if (lambdas[i])
3358             {
3359                 lambda_vec_print(lambdas[i], buf, TRUE);
3360                 if (nhists[i] > 0)
3361                 {
3362                     printf("        %s (%d hists)\n", buf, nhists[i]);
3363                 }
3364                 else
3365                 {
3366                     printf("        %s (%d pts)\n", buf, npts[i]);
3367                 }
3368             }
3369         }
3370     }
3371     printf("\n\n");
3372     sfree(npts);
3373     sfree(nhists);
3374     sfree(lambdas);
3375 }
3376
3377
3378 int gmx_bar(int argc, char *argv[])
3379 {
3380     static const char *desc[] = {
3381         "[THISMODULE] calculates free energy difference estimates through ",
3382         "Bennett's acceptance ratio method (BAR). It also automatically",
3383         "adds series of individual free energies obtained with BAR into",
3384         "a combined free energy estimate.[PAR]",
3385
3386         "Every individual BAR free energy difference relies on two ",
3387         "simulations at different states: say state A and state B, as",
3388         "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3389         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3390         "average of the Hamiltonian difference of state B given state A and",
3391         "vice versa.",
3392         "The energy differences to the other state must be calculated",
3393         "explicitly during the simulation. This can be done with",
3394         "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3395
3396         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3397         "Two types of input files are supported:",
3398         "",
3399         " * Files with more than one [IT]y[it]-value. ",
3400         "   The files should have columns ",
3401         "   with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3402         "   The [GRK]lambda[grk] values are inferred ",
3403         "   from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3404         "   dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3405         "   legends of Delta H",
3406         " * Files with only one [IT]y[it]-value. Using the",
3407         "   [TT]-extp[tt] option for these files, it is assumed",
3408         "   that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3409         "   Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3410         "   The [GRK]lambda[grk] value of the simulation is inferred from the ",
3411         "   subtitle (if present), otherwise from a number in the subdirectory ",
3412         "   in the file name.",
3413         "",
3414
3415         "The [GRK]lambda[grk] of the simulation is parsed from ",
3416         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3417         "foreign [GRK]lambda[grk] values from the legend containing the ",
3418         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3419         "the legend line containing 'T ='.[PAR]",
3420
3421         "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3422         "These can contain either lists of energy differences (see the ",
3423         "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3424         "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3425         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3426         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3427
3428         "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3429         "the energy difference can also be extrapolated from the ",
3430         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3431         "option, which assumes that the system's Hamiltonian depends linearly",
3432         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3433
3434         "The free energy estimates are determined using BAR with bisection, ",
3435         "with the precision of the output set with [TT]-prec[tt]. ",
3436         "An error estimate taking into account time correlations ",
3437         "is made by splitting the data into blocks and determining ",
3438         "the free energy differences over those blocks and assuming ",
3439         "the blocks are independent. ",
3440         "The final error estimate is determined from the average variance ",
3441         "over 5 blocks. A range of block numbers for error estimation can ",
3442         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3443
3444         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3445         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3446         "samples. [BB]Note[bb] that when aggregating energy ",
3447         "differences/derivatives with different sampling intervals, this is ",
3448         "almost certainly not correct. Usually subsequent energies are ",
3449         "correlated and different time intervals mean different degrees ",
3450         "of correlation between samples.[PAR]",
3451
3452         "The results are split in two parts: the last part contains the final ",
3453         "results in kJ/mol, together with the error estimate for each part ",
3454         "and the total. The first part contains detailed free energy ",
3455         "difference estimates and phase space overlap measures in units of ",
3456         "kT (together with their computed error estimate). The printed ",
3457         "values are:",
3458         "",
3459         " * lam_A: the [GRK]lambda[grk] values for point A.",
3460         " * lam_B: the [GRK]lambda[grk] values for point B.",
3461         " *    DG: the free energy estimate.",
3462         " *   s_A: an estimate of the relative entropy of B in A.",
3463         " *   s_B: an estimate of the relative entropy of A in B.",
3464         " * stdev: an estimate expected per-sample standard deviation.",
3465         "",
3466
3467         "The relative entropy of both states in each other's ensemble can be ",
3468         "interpreted as a measure of phase space overlap: ",
3469         "the relative entropy s_A of the work samples of lambda_B in the ",
3470         "ensemble of lambda_A (and vice versa for s_B), is a ",
3471         "measure of the 'distance' between Boltzmann distributions of ",
3472         "the two states, that goes to zero for identical distributions. See ",
3473         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3474         "[PAR]",
3475         "The estimate of the expected per-sample standard deviation, as given ",
3476         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3477         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3478         "of the actual statistical error, because it assumes independent samples).[PAR]",
3479
3480         "To get a visual estimate of the phase space overlap, use the ",
3481         "[TT]-oh[tt] option to write series of histograms, together with the ",
3482         "[TT]-nbin[tt] option.[PAR]"
3483     };
3484     static real        begin    = 0, end = -1, temp = -1;
3485     int                nd       = 2, nbmin = 5, nbmax = 5;
3486     int                nbin     = 100;
3487     gmx_bool           use_dhdl = FALSE;
3488     t_pargs            pa[]     = {
3489         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3490         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3491         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3492         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3493         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3494         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3495         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3496         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3497     };
3498
3499     t_filenm           fnm[] = {
3500         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3501         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3502         { efXVG, "-o",  "bar",    ffOPTWR },
3503         { efXVG, "-oi", "barint", ffOPTWR },
3504         { efXVG, "-oh", "histogram", ffOPTWR }
3505     };
3506 #define NFILE asize(fnm)
3507
3508     int               f;
3509     int               nf = 0;    /* file counter */
3510     int               nfile_tot; /* total number of input files */
3511     sim_data_t        sim_data;  /* the simulation data */
3512     barres_t         *results;   /* the results */
3513     int               nresults;  /* number of results in results array */
3514
3515     double           *partsum;
3516     double            prec, dg_tot;
3517     FILE             *fpb, *fpi;
3518     char              dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3519     char              buf[STRLEN], buf2[STRLEN];
3520     char              ktformat[STRLEN], sktformat[STRLEN];
3521     char              kteformat[STRLEN], skteformat[STRLEN];
3522     gmx_output_env_t *oenv;
3523     double            kT;
3524     gmx_bool          result_OK = TRUE, bEE = TRUE;
3525
3526     gmx_bool          disc_err          = FALSE;
3527     double            sum_disc_err      = 0.; /* discretization error */
3528     gmx_bool          histrange_err     = FALSE;
3529     double            sum_histrange_err = 0.; /* histogram range error */
3530     double            stat_err          = 0.; /* statistical error */
3531
3532     if (!parse_common_args(&argc, argv,
3533                            PCA_CAN_VIEW,
3534                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3535     {
3536         return 0;
3537     }
3538
3539     gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3540     gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3541
3542     sim_data_init(&sim_data);
3543 #if 0
3544     /* make linked list */
3545     lb = &lambda_head;
3546     lambda_data_init(lb, 0, 0);
3547     lb->next = lb;
3548     lb->prev = lb;
3549 #endif
3550
3551
3552     nfile_tot = xvgFiles.size() + edrFiles.size();
3553
3554     if (nfile_tot == 0)
3555     {
3556         gmx_fatal(FARGS, "No input files!");
3557     }
3558
3559     if (nd < 0)
3560     {
3561         gmx_fatal(FARGS, "Can not have negative number of digits");
3562     }
3563     prec = std::pow(10.0, static_cast<double>(-nd));
3564
3565     snew(partsum, (nbmax+1)*(nbmax+1));
3566     nf = 0;
3567
3568     /* read in all files. First xvg files */
3569     for (const std::string &filenm : xvgFiles)
3570     {
3571         read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3572         nf++;
3573     }
3574     /* then .edr files */
3575     for (const std::string &filenm : edrFiles)
3576     {
3577         read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3578         nf++;
3579     }
3580
3581     /* fix the times to allow for equilibration */
3582     sim_data_impose_times(&sim_data, begin, end);
3583
3584     if (opt2bSet("-oh", NFILE, fnm))
3585     {
3586         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3587     }
3588
3589     /* assemble the output structures from the lambdas */
3590     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3591
3592     sum_disc_err = barres_list_max_disc_err(results, nresults);
3593
3594     if (nresults == 0)
3595     {
3596         printf("\nNo results to calculate.\n");
3597         return 0;
3598     }
3599
3600     if (sum_disc_err > prec)
3601     {
3602         prec = sum_disc_err;
3603         nd   = static_cast<int>(std::ceil(-std::log10(prec)));
3604         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3605     }
3606
3607
3608     /*sprintf(lamformat,"%%6.3f");*/
3609     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3610     /* the format strings of the results in kT */
3611     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3612     sprintf( sktformat, "%%%ds", 6+nd);
3613     /* the format strings of the errors in kT */
3614     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3615     sprintf( skteformat, "%%%ds", 4+nd);
3616     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3617     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3618
3619
3620
3621     fpb = nullptr;
3622     if (opt2bSet("-o", NFILE, fnm))
3623     {
3624         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3625         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3626                             "\\lambda", buf, exvggtXYDY, oenv);
3627     }
3628
3629     fpi = nullptr;
3630     if (opt2bSet("-oi", NFILE, fnm))
3631     {
3632         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3633         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3634                        "\\lambda", buf, oenv);
3635     }
3636
3637
3638
3639     if (nbmin > nbmax)
3640     {
3641         nbmin = nbmax;
3642     }
3643
3644     /* first calculate results */
3645     bEE      = TRUE;
3646     disc_err = FALSE;
3647     for (f = 0; f < nresults; f++)
3648     {
3649         /* Determine the free energy difference with a factor of 10
3650          * more accuracy than requested for printing.
3651          */
3652         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3653                  &bEE, partsum);
3654
3655         if (results[f].dg_disc_err > prec/10.)
3656         {
3657             disc_err = TRUE;
3658         }
3659         if (results[f].dg_histrange_err > prec/10.)
3660         {
3661             histrange_err = TRUE;
3662         }
3663     }
3664
3665     /* print results in kT */
3666     kT   = BOLTZ*temp;
3667
3668     printf("\nTemperature: %g K\n", temp);
3669
3670     printf("\nDetailed results in kT (see help for explanation):\n\n");
3671     printf("%6s ", " lam_A");
3672     printf("%6s ", " lam_B");
3673     printf(sktformat,  "DG ");
3674     if (bEE)
3675     {
3676         printf(skteformat, "+/- ");
3677     }
3678     if (disc_err)
3679     {
3680         printf(skteformat, "disc ");
3681     }
3682     if (histrange_err)
3683     {
3684         printf(skteformat, "range ");
3685     }
3686     printf(sktformat,  "s_A ");
3687     if (bEE)
3688     {
3689         printf(skteformat, "+/- " );
3690     }
3691     printf(sktformat,  "s_B ");
3692     if (bEE)
3693     {
3694         printf(skteformat, "+/- " );
3695     }
3696     printf(sktformat,  "stdev ");
3697     if (bEE)
3698     {
3699         printf(skteformat, "+/- ");
3700     }
3701     printf("\n");
3702     for (f = 0; f < nresults; f++)
3703     {
3704         lambda_vec_print_short(results[f].a->native_lambda, buf);
3705         printf("%s ", buf);
3706         lambda_vec_print_short(results[f].b->native_lambda, buf);
3707         printf("%s ", buf);
3708         printf(ktformat,  results[f].dg);
3709         printf(" ");
3710         if (bEE)
3711         {
3712             printf(kteformat, results[f].dg_err);
3713             printf(" ");
3714         }
3715         if (disc_err)
3716         {
3717             printf(kteformat, results[f].dg_disc_err);
3718             printf(" ");
3719         }
3720         if (histrange_err)
3721         {
3722             printf(kteformat, results[f].dg_histrange_err);
3723             printf(" ");
3724         }
3725         printf(ktformat,  results[f].sa);
3726         printf(" ");
3727         if (bEE)
3728         {
3729             printf(kteformat, results[f].sa_err);
3730             printf(" ");
3731         }
3732         printf(ktformat,  results[f].sb);
3733         printf(" ");
3734         if (bEE)
3735         {
3736             printf(kteformat, results[f].sb_err);
3737             printf(" ");
3738         }
3739         printf(ktformat,  results[f].dg_stddev);
3740         printf(" ");
3741         if (bEE)
3742         {
3743             printf(kteformat, results[f].dg_stddev_err);
3744         }
3745         printf("\n");
3746
3747         /* Check for negative relative entropy with a 95% certainty. */
3748         if (results[f].sa < -2*results[f].sa_err ||
3749             results[f].sb < -2*results[f].sb_err)
3750         {
3751             result_OK = FALSE;
3752         }
3753     }
3754
3755     if (!result_OK)
3756     {
3757         printf("\nWARNING: Some of these results violate the Second Law of "
3758                "Thermodynamics: \n"
3759                "         This is can be the result of severe undersampling, or "
3760                "(more likely)\n"
3761                "         there is something wrong with the simulations.\n");
3762     }
3763
3764
3765     /* final results in kJ/mol */
3766     printf("\n\nFinal results in kJ/mol:\n\n");
3767     dg_tot  = 0;
3768     for (f = 0; f < nresults; f++)
3769     {
3770
3771         if (fpi != nullptr)
3772         {
3773             lambda_vec_print_short(results[f].a->native_lambda, buf);
3774             fprintf(fpi, xvg2format, buf, dg_tot);
3775         }
3776
3777
3778         if (fpb != nullptr)
3779         {
3780             lambda_vec_print_intermediate(results[f].a->native_lambda,
3781                                           results[f].b->native_lambda,
3782                                           buf);
3783
3784             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3785         }
3786
3787         printf("point ");
3788         lambda_vec_print_short(results[f].a->native_lambda, buf);
3789         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3790         printf("%s - %s", buf, buf2);
3791         printf(",   DG ");
3792
3793         printf(dgformat, results[f].dg*kT);
3794         if (bEE)
3795         {
3796             printf(" +/- ");
3797             printf(dgformat, results[f].dg_err*kT);
3798         }
3799         if (histrange_err)
3800         {
3801             printf(" (max. range err. = ");
3802             printf(dgformat, results[f].dg_histrange_err*kT);
3803             printf(")");
3804             sum_histrange_err += results[f].dg_histrange_err*kT;
3805         }
3806
3807         printf("\n");
3808         dg_tot += results[f].dg;
3809     }
3810     printf("\n");
3811     printf("total ");
3812     lambda_vec_print_short(results[0].a->native_lambda, buf);
3813     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3814     printf("%s - %s", buf, buf2);
3815     printf(",   DG ");
3816
3817     printf(dgformat, dg_tot*kT);
3818     if (bEE)
3819     {
3820         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3821         printf(" +/- ");
3822         printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3823     }
3824     printf("\n");
3825     if (disc_err)
3826     {
3827         printf("\nmaximum discretization error = ");
3828         printf(dgformat, sum_disc_err);
3829         if (bEE && stat_err < sum_disc_err)
3830         {
3831             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3832         }
3833     }
3834     if (histrange_err)
3835     {
3836         printf("\nmaximum histogram range error = ");
3837         printf(dgformat, sum_histrange_err);
3838         if (bEE && stat_err < sum_histrange_err)
3839         {
3840             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3841         }
3842
3843     }
3844     printf("\n");
3845
3846
3847     if (fpi != nullptr)
3848     {
3849         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3850         fprintf(fpi, xvg2format, buf, dg_tot);
3851         xvgrclose(fpi);
3852     }
3853     if (fpb != nullptr)
3854     {
3855         xvgrclose(fpb);
3856     }
3857
3858     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3859     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3860
3861     return 0;
3862 }