Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / awh / biasstate.cpp
blob7247a260eb316fa9301dc26d11a91ec1d351769e
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief
38 * Implements the BiasState class.
40 * \author Viveca Lindahl
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_awh
45 #include "gmxpre.h"
47 #include "biasstate.h"
49 #include <cassert>
50 #include <cmath>
51 #include <cstdio>
52 #include <cstdlib>
53 #include <cstring>
55 #include <algorithm>
57 #include "gromacs/fileio/gmxfio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/mdrunutility/multisim.h"
62 #include "gromacs/mdtypes/awh_history.h"
63 #include "gromacs/mdtypes/awh_params.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/simd/simd.h"
66 #include "gromacs/simd/simd_math.h"
67 #include "gromacs/utility/arrayref.h"
68 #include "gromacs/utility/exceptions.h"
69 #include "gromacs/utility/gmxassert.h"
70 #include "gromacs/utility/smalloc.h"
71 #include "gromacs/utility/stringutil.h"
73 #include "grid.h"
74 #include "pointstate.h"
76 namespace gmx
79 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
81 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
83 /* The PMF is just the negative of the log of the sampled PMF histogram.
84 * Points with zero target weight are ignored, they will mostly contain noise.
86 for (size_t i = 0; i < points_.size(); i++)
88 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
92 namespace
95 /*! \brief
96 * Sum an array over all simulations on the master rank of each simulation.
98 * \param[in,out] arrayRef The data to sum.
99 * \param[in] multiSimComm Struct for multi-simulation communication.
101 void sumOverSimulations(gmx::ArrayRef<int> arrayRef,
102 const gmx_multisim_t *multiSimComm)
104 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
107 /*! \brief
108 * Sum an array over all simulations on the master rank of each simulation.
110 * \param[in,out] arrayRef The data to sum.
111 * \param[in] multiSimComm Struct for multi-simulation communication.
113 void sumOverSimulations(gmx::ArrayRef<double> arrayRef,
114 const gmx_multisim_t *multiSimComm)
116 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
119 /*! \brief
120 * Sum an array over all simulations on all ranks of each simulation.
122 * This assumes the data is identical on all ranks within each simulation.
124 * \param[in,out] arrayRef The data to sum.
125 * \param[in] commRecord Struct for intra-simulation communication.
126 * \param[in] multiSimComm Struct for multi-simulation communication.
128 template<typename T>
129 void sumOverSimulations(gmx::ArrayRef<T> arrayRef,
130 const t_commrec *commRecord,
131 const gmx_multisim_t *multiSimComm)
133 if (MASTER(commRecord))
135 sumOverSimulations(arrayRef, multiSimComm);
137 if (commRecord->nnodes > 1)
139 gmx_bcast(arrayRef.size()*sizeof(T), arrayRef.data(), commRecord);
143 /*! \brief
144 * Sum PMF over multiple simulations, when requested.
146 * \param[in,out] pointState The state of the points in the bias.
147 * \param[in] numSharedUpdate The number of biases sharing the histogram.
148 * \param[in] commRecord Struct for intra-simulation communication.
149 * \param[in] multiSimComm Struct for multi-simulation communication.
151 void sumPmf(gmx::ArrayRef<PointState> pointState,
152 int numSharedUpdate,
153 const t_commrec *commRecord,
154 const gmx_multisim_t *multiSimComm)
156 if (numSharedUpdate == 1)
158 return;
160 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->nsim == 0, "numSharedUpdate should be a multiple of multiSimComm->nsim");
161 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
163 std::vector<double> buffer(pointState.size());
165 /* Need to temporarily exponentiate the log weights to sum over simulations */
166 for (size_t i = 0; i < buffer.size(); i++)
168 buffer[i] = pointState[i].inTargetRegion() ? std::exp(-pointState[i].logPmfSum()) : 0;
171 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
173 /* Take log again to get (non-normalized) PMF */
174 double normFac = 1.0/numSharedUpdate;
175 for (gmx::index i = 0; i < pointState.ssize(); i++)
177 if (pointState[i].inTargetRegion())
179 pointState[i].setLogPmfSum(-std::log(buffer[i]*normFac));
184 /*! \brief
185 * Find the minimum free energy value.
187 * \param[in] pointState The state of the points.
188 * \returns the minimum free energy value.
190 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
192 double fMin = GMX_FLOAT_MAX;
194 for (auto const &ps : pointState)
196 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
198 fMin = ps.freeEnergy();
202 return fMin;
205 /*! \brief
206 * Find and return the log of the probability weight of a point given a coordinate value.
208 * The unnormalized weight is given by
209 * w(point|value) = exp(bias(point) - U(value,point)),
210 * where U is a harmonic umbrella potential.
212 * \param[in] dimParams The bias dimensions parameters
213 * \param[in] points The point state.
214 * \param[in] grid The grid.
215 * \param[in] pointIndex Point to evaluate probability weight for.
216 * \param[in] pointBias Bias for the point (as a log weight).
217 * \param[in] value Coordinate value.
218 * \returns the log of the biased probability weight.
220 double biasedLogWeightFromPoint(const std::vector<DimParams> &dimParams,
221 const std::vector<PointState> &points,
222 const Grid &grid,
223 int pointIndex,
224 double pointBias,
225 const awh_dvec value)
227 double logWeight = detail::c_largeNegativeExponent;
229 /* Only points in the target reigon have non-zero weight */
230 if (points[pointIndex].inTargetRegion())
232 logWeight = pointBias;
234 /* Add potential for all parameter dimensions */
235 for (size_t d = 0; d < dimParams.size(); d++)
237 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
238 logWeight -= 0.5*dimParams[d].betak*dev*dev;
242 return logWeight;
245 } // namespace
247 void BiasState::calcConvolvedPmf(const std::vector<DimParams> &dimParams,
248 const Grid &grid,
249 std::vector<float> *convolvedPmf) const
251 size_t numPoints = grid.numPoints();
253 convolvedPmf->resize(numPoints);
255 /* Get the PMF to convolve. */
256 std::vector<float> pmf(numPoints);
257 getPmf(pmf);
259 for (size_t m = 0; m < numPoints; m++)
261 double freeEnergyWeights = 0;
262 const GridPoint &point = grid.point(m);
263 for (auto &neighbor : point.neighbor)
265 /* The negative PMF is a positive bias. */
266 double biasNeighbor = -pmf[neighbor];
268 /* Add the convolved PMF weights for the neighbors of this point.
269 Note that this function only adds point within the target > 0 region.
270 Sum weights, take the logarithm last to get the free energy. */
271 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
272 neighbor, biasNeighbor,
273 point.coordValue);
274 freeEnergyWeights += std::exp(logWeight);
277 GMX_RELEASE_ASSERT(freeEnergyWeights > 0, "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
278 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
282 namespace
285 /*! \brief
286 * Updates the target distribution for all points.
288 * The target distribution is always updated for all points
289 * at the same time.
291 * \param[in,out] pointState The state of all points.
292 * \param[in] params The bias parameters.
294 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState,
295 const BiasParams &params)
297 double freeEnergyCutoff = 0;
298 if (params.eTarget == eawhtargetCUTOFF)
300 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
303 double sumTarget = 0;
304 for (PointState &ps : pointState)
306 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
308 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
310 /* Normalize to 1 */
311 double invSum = 1.0/sumTarget;
312 for (PointState &ps : pointState)
314 ps.scaleTarget(invSum);
318 /*! \brief
319 * Puts together a string describing a grid point.
321 * \param[in] grid The grid.
322 * \param[in] point Grid point index.
323 * \returns a string for the point.
325 std::string gridPointValueString(const Grid &grid, int point)
327 std::string pointString;
329 pointString += "(";
331 for (int d = 0; d < grid.numDimensions(); d++)
333 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
334 if (d < grid.numDimensions() - 1)
336 pointString += ",";
338 else
340 pointString += ")";
344 return pointString;
347 } // namespace
349 int BiasState::warnForHistogramAnomalies(const Grid &grid,
350 int biasIndex,
351 double t,
352 FILE *fplog,
353 int maxNumWarnings) const
355 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
356 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
358 /* Sum up the histograms and get their normalization */
359 double sumVisits = 0;
360 double sumWeights = 0;
361 for (auto &pointState : points_)
363 if (pointState.inTargetRegion())
365 sumVisits += pointState.numVisitsTot();
366 sumWeights += pointState.weightSumTot();
369 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
370 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
371 double invNormVisits = 1.0/sumVisits;
372 double invNormWeight = 1.0/sumWeights;
374 /* Check all points for warnings */
375 int numWarnings = 0;
376 size_t numPoints = grid.numPoints();
377 for (size_t m = 0; m < numPoints; m++)
379 /* Skip points close to boundary or non-target region */
380 const GridPoint &gridPoint = grid.point(m);
381 bool skipPoint = false;
382 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
384 int neighbor = gridPoint.neighbor[n];
385 skipPoint = !points_[neighbor].inTargetRegion();
386 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
388 const GridPoint &neighborPoint = grid.point(neighbor);
389 skipPoint =
390 neighborPoint.index[d] == 0 ||
391 neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
395 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
396 const double relativeWeight = points_[m].weightSumTot()*invNormWeight;
397 const double relativeVisits = points_[m].numVisitsTot()*invNormVisits;
398 if (!skipPoint &&
399 relativeVisits < relativeWeight*maxHistogramRatio)
401 std::string pointValueString = gridPointValueString(grid, m);
402 std::string warningMessage =
403 gmx::formatString("\nawh%d warning: "
404 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
405 "is less than a fraction %g of the reference distribution at that point. "
406 "If you are not certain about your settings you might want to increase your pull force constant or "
407 "modify your sampling region.\n",
408 biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
409 gmx::TextLineWrapper wrapper;
410 wrapper.settings().setLineLength(c_linewidth);
411 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
413 numWarnings++;
415 if (numWarnings >= maxNumWarnings)
417 break;
421 return numWarnings;
424 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams> &dimParams,
425 const Grid &grid,
426 int point,
427 gmx::ArrayRef<double> force) const
429 double potential = 0;
430 for (size_t d = 0; d < dimParams.size(); d++)
432 double deviation = getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
434 double k = dimParams[d].k;
436 /* Force from harmonic potential 0.5*k*dev^2 */
437 force[d] = -k*deviation;
438 potential += 0.5*k*deviation*deviation;
441 return potential;
444 void BiasState::calcConvolvedForce(const std::vector<DimParams> &dimParams,
445 const Grid &grid,
446 gmx::ArrayRef<const double> probWeightNeighbor,
447 gmx::ArrayRef<double> forceWorkBuffer,
448 gmx::ArrayRef<double> force) const
450 for (size_t d = 0; d < dimParams.size(); d++)
452 force[d] = 0;
455 /* Only neighboring points have non-negligible contribution. */
456 const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
457 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
458 for (size_t n = 0; n < neighbor.size(); n++)
460 double weightNeighbor = probWeightNeighbor[n];
461 int indexNeighbor = neighbor[n];
463 /* Get the umbrella force from this point. The returned potential is ignored here. */
464 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor,
465 forceFromNeighbor);
467 /* Add the weighted umbrella force to the convolved force. */
468 for (size_t d = 0; d < dimParams.size(); d++)
470 force[d] += forceFromNeighbor[d]*weightNeighbor;
475 double BiasState::moveUmbrella(const std::vector<DimParams> &dimParams,
476 const Grid &grid,
477 gmx::ArrayRef<const double> probWeightNeighbor,
478 gmx::ArrayRef<double> biasForce,
479 int64_t step,
480 int64_t seed,
481 int indexSeed)
483 /* Generate and set a new coordinate reference value */
484 coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
486 std::vector<double> newForce(dimParams.size());
487 double newPotential =
488 calcUmbrellaForceAndPotential(dimParams, grid, coordState_.umbrellaGridpoint(), newForce);
490 /* A modification of the reference value at time t will lead to a different
491 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
492 this means the force and velocity will change signs roughly as often.
493 To avoid any issues we take the average of the previous and new force
494 at steps when the reference value has been moved. E.g. if the ref. value
495 is set every step to (coord dvalue +/- delta) would give zero force.
497 for (gmx::index d = 0; d < biasForce.ssize(); d++)
499 /* Average of the current and new force */
500 biasForce[d] = 0.5*(biasForce[d] + newForce[d]);
503 return newPotential;
506 namespace
509 /*! \brief
510 * Sets the histogram rescaling factors needed to control the histogram size.
512 * For sake of robustness, the reference weight histogram can grow at a rate
513 * different from the actual sampling rate. Typically this happens for a limited
514 * initial time, alternatively growth is scaled down by a constant factor for all
515 * times. Since the size of the reference histogram sets the size of the free
516 * energy update this should be reflected also in the PMF. Thus the PMF histogram
517 * needs to be rescaled too.
519 * This function should only be called by the bias update function or wrapped by a function that
520 * knows what scale factors should be applied when, e.g,
521 * getSkippedUpdateHistogramScaleFactors().
523 * \param[in] params The bias parameters.
524 * \param[in] newHistogramSize New reference weight histogram size.
525 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
526 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
527 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
529 void setHistogramUpdateScaleFactors(const BiasParams &params,
530 double newHistogramSize,
531 double oldHistogramSize,
532 double *weightHistScaling,
533 double *logPmfSumScaling)
536 /* The two scaling factors below are slightly different (ignoring the log factor) because the
537 reference and the PMF histogram apply weight scaling differently. The weight histogram
538 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
539 It is done this way because that is what target type local Boltzmann (for which
540 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
541 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
542 1) empirically this is necessary for converging the PMF; 2) since the extraction of
543 the PMF is theoretically only valid for a constant bias, new samples should get more
544 weight than old ones for which the bias is fluctuating more. */
545 *weightHistScaling = newHistogramSize/(oldHistogramSize + params.updateWeight*params.localWeightScaling);
546 *logPmfSumScaling = std::log(newHistogramSize/(oldHistogramSize + params.updateWeight));
549 } // namespace
551 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams &params,
552 double *weightHistScaling,
553 double *logPmfSumScaling) const
555 GMX_ASSERT(params.skipUpdates(), "Calling function for skipped updates when skipping updates is not allowed");
557 if (inInitialStage())
559 /* In between global updates the reference histogram size is kept constant so we trivially know what the
560 histogram size was at the time of the skipped update. */
561 double histogramSize = histogramSize_.histogramSize();
562 setHistogramUpdateScaleFactors(params, histogramSize, histogramSize,
563 weightHistScaling, logPmfSumScaling);
565 else
567 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
568 *weightHistScaling = 1;
569 *logPmfSumScaling = 0;
573 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams &params)
575 double weightHistScaling;
576 double logPmfsumScaling;
578 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
580 for (auto &pointState : points_)
582 bool didUpdate = pointState.performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
584 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
585 if (didUpdate)
587 pointState.updateBias();
592 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams &params,
593 const Grid &grid)
595 double weightHistScaling;
596 double logPmfsumScaling;
598 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
600 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
601 const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
602 for (auto &neighbor : neighbors)
604 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
606 if (didUpdate)
608 points_[neighbor].updateBias();
613 namespace
616 /*! \brief
617 * Merge update lists from multiple sharing simulations.
619 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
620 * \param[in] numPoints Total number of points.
621 * \param[in] commRecord Struct for intra-simulation communication.
622 * \param[in] multiSimComm Struct for multi-simulation communication.
624 void mergeSharedUpdateLists(std::vector<int> *updateList,
625 int numPoints,
626 const t_commrec *commRecord,
627 const gmx_multisim_t *multiSimComm)
629 std::vector<int> numUpdatesOfPoint;
631 /* Flag the update points of this sim.
632 TODO: we can probably avoid allocating this array and just use the input array. */
633 numUpdatesOfPoint.resize(numPoints, 0);
634 for (auto &pointIndex : *updateList)
636 numUpdatesOfPoint[pointIndex] = 1;
639 /* Sum over the sims to get all the flagged points */
640 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
642 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
643 updateList->clear();
644 for (int m = 0; m < numPoints; m++)
646 if (numUpdatesOfPoint[m] > 0)
648 updateList->push_back(m);
653 /*! \brief
654 * Generate an update list of points sampled since the last update.
656 * \param[in] grid The AWH bias.
657 * \param[in] points The point state.
658 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since last update.
659 * \param[in] endUpdatelist The end of the rectangular that has been sampled since last update.
660 * \param[in,out] updateList Local update list to set (assumed >= npoints long).
662 void makeLocalUpdateList(const Grid &grid,
663 const std::vector<PointState> &points,
664 const awh_ivec originUpdatelist,
665 const awh_ivec endUpdatelist,
666 std::vector<int> *updateList)
668 awh_ivec origin;
669 awh_ivec numPoints;
671 /* Define the update search grid */
672 for (int d = 0; d < grid.numDimensions(); d++)
674 origin[d] = originUpdatelist[d];
675 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
677 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
678 This helps for calculating the distance/number of points but should be removed and fixed when the way of
679 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
680 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
683 /* Make the update list */
684 updateList->clear();
685 int pointIndex = -1;
686 bool pointExists = true;
687 while (pointExists)
689 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
691 if (pointExists && points[pointIndex].inTargetRegion())
693 updateList->push_back(pointIndex);
698 } // namespace
700 void BiasState::resetLocalUpdateRange(const Grid &grid)
702 const int gridpointIndex = coordState_.gridpointIndex();
703 for (int d = 0; d < grid.numDimensions(); d++)
705 /* This gives the minimum range consisting only of the current closest point. */
706 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
707 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
711 namespace
714 /*! \brief
715 * Add partial histograms (accumulating between updates) to accumulating histograms.
717 * \param[in,out] pointState The state of the points in the bias.
718 * \param[in,out] weightSumCovering The weights for checking covering.
719 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
720 * \param[in] commRecord Struct for intra-simulation communication.
721 * \param[in] multiSimComm Struct for multi-simulation communication.
722 * \param[in] localUpdateList List of points with data.
724 void sumHistograms(gmx::ArrayRef<PointState> pointState,
725 gmx::ArrayRef<double> weightSumCovering,
726 int numSharedUpdate,
727 const t_commrec *commRecord,
728 const gmx_multisim_t *multiSimComm,
729 const std::vector<int> &localUpdateList)
731 /* The covering checking histograms are added before summing over simulations, so that the weights from different
732 simulations are kept distinguishable. */
733 for (int globalIndex : localUpdateList)
735 weightSumCovering[globalIndex] +=
736 pointState[globalIndex].weightSumIteration();
739 /* Sum histograms over multiple simulations if needed. */
740 if (numSharedUpdate > 1)
742 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
744 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
745 std::vector<double> weightSum;
746 std::vector<double> coordVisits;
748 weightSum.resize(localUpdateList.size());
749 coordVisits.resize(localUpdateList.size());
751 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
753 const PointState &ps = pointState[localUpdateList[localIndex]];
755 weightSum[localIndex] = ps.weightSumIteration();
756 coordVisits[localIndex] = ps.numVisitsIteration();
759 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
760 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
762 /* Transfer back the result */
763 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
765 PointState &ps = pointState[localUpdateList[localIndex]];
767 ps.setPartialWeightAndCount(weightSum[localIndex],
768 coordVisits[localIndex]);
772 /* Now add the partial counts and weights to the accumulating histograms.
773 Note: we still need to use the weights for the update so we wait
774 with resetting them until the end of the update. */
775 for (int globalIndex : localUpdateList)
777 pointState[globalIndex].addPartialWeightAndCount();
781 /*! \brief
782 * Label points along an axis as covered or not.
784 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
786 * \param[in] visited Visited? For each point.
787 * \param[in] checkCovering Check for covering? For each point.
788 * \param[in] numPoints The number of grid points along this dimension.
789 * \param[in] period Period in number of points.
790 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
791 * \param[in,out] covered In this array elements are 1 for covered points and 0 for non-covered points, this routine assumes that \p covered has at least size \p numPoints.
793 void labelCoveredPoints(const std::vector<bool> &visited,
794 const std::vector<bool> &checkCovering,
795 int numPoints,
796 int period,
797 int coverRadius,
798 gmx::ArrayRef<int> covered)
800 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
802 bool haveFirstNotVisited = false;
803 int firstNotVisited = -1;
804 int notVisitedLow = -1;
805 int notVisitedHigh = -1;
807 for (int n = 0; n < numPoints; n++)
809 if (checkCovering[n] && !visited[n])
811 if (!haveFirstNotVisited)
813 notVisitedLow = n;
814 firstNotVisited = n;
815 haveFirstNotVisited = true;
817 else
819 notVisitedHigh = n;
821 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
822 by unvisited points. The unvisted end points affect the coveredness of the visited
823 with a reach equal to the cover radius. */
824 int notCoveredLow = notVisitedLow + coverRadius;
825 int notCoveredHigh = notVisitedHigh - coverRadius;
826 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
828 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
831 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval the
832 notVisitedLow of the next. */
833 notVisitedLow = notVisitedHigh;
838 /* Have labelled all the internal points. Now take care of the boundary regions. */
839 if (!haveFirstNotVisited)
841 /* No non-visited points <=> all points visited => all points covered. */
843 for (int n = 0; n < numPoints; n++)
845 covered[n] = 1;
848 else
850 int lastNotVisited = notVisitedLow;
852 /* For periodic boundaries, non-visited points can influence points
853 on the other side of the boundary so we need to wrap around. */
855 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
856 For non-periodic boundaries there is no lower end point so a dummy value is used. */
857 int notVisitedHigh = firstNotVisited;
858 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
860 int notCoveredLow = notVisitedLow + coverRadius;
861 int notCoveredHigh = notVisitedHigh - coverRadius;
863 for (int i = 0; i <= notVisitedHigh; i++)
865 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
866 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
869 /* Upper end. Same as for lower end but in the other direction. */
870 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
871 notVisitedLow = lastNotVisited;
873 notCoveredLow = notVisitedLow + coverRadius;
874 notCoveredHigh = notVisitedHigh - coverRadius;
876 for (int i = notVisitedLow; i <= numPoints - 1; i++)
878 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
879 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
884 } // namespace
886 bool BiasState::isSamplingRegionCovered(const BiasParams &params,
887 const std::vector<DimParams> &dimParams,
888 const Grid &grid,
889 const t_commrec *commRecord,
890 const gmx_multisim_t *multiSimComm) const
892 /* Allocate and initialize arrays: one for checking visits along each dimension,
893 one for keeping track of which points to check and one for the covered points.
894 Possibly these could be kept as AWH variables to avoid these allocations. */
895 struct CheckDim
897 std::vector<bool> visited;
898 std::vector<bool> checkCovering;
899 // We use int for the covering array since we might use gmx_sumi_sim.
900 std::vector<int> covered;
903 std::vector<CheckDim> checkDim;
904 checkDim.resize(grid.numDimensions());
906 for (int d = 0; d < grid.numDimensions(); d++)
908 const size_t numPoints = grid.axis(d).numPoints();
909 checkDim[d].visited.resize(numPoints, false);
910 checkDim[d].checkCovering.resize(numPoints, false);
911 checkDim[d].covered.resize(numPoints, 0);
914 /* Set visited points along each dimension and which points should be checked for covering.
915 Specifically, points above the free energy cutoff (if there is one) or points outside
916 of the target region are ignored. */
918 /* Set the free energy cutoff */
919 double maxFreeEnergy = GMX_FLOAT_MAX;
921 if (params.eTarget == eawhtargetCUTOFF)
923 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
926 /* Set the threshold weight for a point to be considered visited. */
927 double weightThreshold = 1;
928 for (int d = 0; d < grid.numDimensions(); d++)
930 weightThreshold *= grid.axis(d).spacing()*std::sqrt(dimParams[d].betak*0.5*M_1_PI);
933 /* Project the sampling weights onto each dimension */
934 for (size_t m = 0; m < grid.numPoints(); m++)
936 const PointState &pointState = points_[m];
938 for (int d = 0; d < grid.numDimensions(); d++)
940 int n = grid.point(m).index[d];
942 /* Is visited if it was already visited or if there is enough weight at the current point */
943 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
945 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
946 checkDim[d].checkCovering[n] = checkDim[d].checkCovering[n] || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
950 /* Label each point along each dimension as covered or not. */
951 for (int d = 0; d < grid.numDimensions(); d++)
953 labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(), grid.axis(d).numPointsInPeriod(),
954 params.coverRadius()[d], checkDim[d].covered);
957 /* Now check for global covering. Each dimension needs to be covered separately.
958 A dimension is covered if each point is covered. Multiple simulations collectively
959 cover the points, i.e. a point is covered if any of the simulations covered it.
960 However, visited points are not shared, i.e. if a point is covered or not is
961 determined by the visits of a single simulation. In general the covering criterion is
962 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
963 For 1 simulation, all points covered <=> all points visited. For multiple simulations
964 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
965 In the limit of large coverRadius, all points covered => all points visited by at least one
966 simulation (since no point will be covered until all points have been visited by a
967 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
968 needs with surrounding points before sharing covering information with other simulations. */
970 /* Communicate the covered points between sharing simulations if needed. */
971 if (params.numSharedUpdate > 1)
973 /* For multiple dimensions this may not be the best way to do it. */
974 for (int d = 0; d < grid.numDimensions(); d++)
976 sumOverSimulations(gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()), commRecord, multiSimComm);
980 /* Now check if for each dimension all points are covered. Break if not true. */
981 bool allPointsCovered = true;
982 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
984 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
986 allPointsCovered = (checkDim[d].covered[n] != 0);
990 return allPointsCovered;
993 /*! \brief
994 * Normalizes the free energy and PMF sum.
996 * \param[in] pointState The state of the points.
998 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState> *pointState)
1000 double minF = freeEnergyMinimumValue(*pointState);
1002 for (PointState &ps : *pointState)
1004 ps.normalizeFreeEnergyAndPmfSum(minF);
1008 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams> &dimParams,
1009 const Grid &grid,
1010 const BiasParams &params,
1011 const t_commrec *commRecord,
1012 const gmx_multisim_t *multiSimComm,
1013 double t,
1014 int64_t step,
1015 FILE *fplog,
1016 std::vector<int> *updateList)
1018 /* Note hat updateList is only used in this scope and is always
1019 * re-initialized. We do not use a local vector, because that would
1020 * cause reallocation every time this funtion is called and the vector
1021 * can be the size of the whole grid.
1024 /* Make a list of all local points, i.e. those that could have been touched since
1025 the last update. These are the points needed for summing histograms below
1026 (non-local points only add zeros). For local updates, this will also be the
1027 final update list. */
1028 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_,
1029 updateList);
1030 if (params.numSharedUpdate > 1)
1032 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1035 /* Reset the range for the next update */
1036 resetLocalUpdateRange(grid);
1038 /* Add samples to histograms for all local points and sync simulations if needed */
1039 sumHistograms(points_, weightSumCovering_,
1040 params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1042 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1044 /* Renormalize the free energy if values are too large. */
1045 bool needToNormalizeFreeEnergy = false;
1046 for (int &globalIndex : *updateList)
1048 /* We want to keep the absolute value of the free energies to be less c_largePositiveExponent
1049 to be able to safely pass these values to exp(). The check below ensures this as long as
1050 the free energy values grow less than 0.5*c_largePositiveExponent in a return time to this
1051 neighborhood. For reasonable update sizes it's unlikely that this requirement would be
1052 broken. */
1053 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5*detail::c_largePositiveExponent)
1055 needToNormalizeFreeEnergy = true;
1056 break;
1060 /* Update target distribution? */
1061 bool needToUpdateTargetDistribution =
1062 (params.eTarget != eawhtargetCONSTANT &&
1063 params.isUpdateTargetStep(step));
1065 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1066 bool detectedCovering = false;
1067 if (inInitialStage())
1069 detectedCovering = (params.isCheckCoveringStep(step) &&
1070 isSamplingRegionCovered(params, dimParams, grid,
1071 commRecord, multiSimComm));
1074 /* The weighthistogram size after this update. */
1075 double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_, weightSumCovering_, fplog);
1077 /* Make the update list. Usually we try to only update local points,
1078 * but if the update has non-trivial or non-deterministic effects
1079 * on non-local points a global update is needed. This is the case when:
1080 * 1) a covering occurred in the initial stage, leading to non-trivial
1081 * histogram rescaling factors; or
1082 * 2) the target distribution will be updated, since we don't make any
1083 * assumption on its form; or
1084 * 3) the AWH parameters are such that we never attempt to skip non-local
1085 * updates; or
1086 * 4) the free energy values have grown so large that a renormalization
1087 * is needed.
1089 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1091 /* Global update, just add all points. */
1092 updateList->clear();
1093 for (size_t m = 0; m < points_.size(); m++)
1095 if (points_[m].inTargetRegion())
1097 updateList->push_back(m);
1102 /* Set histogram scale factors. */
1103 double weightHistScalingSkipped = 0;
1104 double logPmfsumScalingSkipped = 0;
1105 if (params.skipUpdates())
1107 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1109 double weightHistScalingNew;
1110 double logPmfsumScalingNew;
1111 setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1112 &weightHistScalingNew, &logPmfsumScalingNew);
1114 /* Update free energy and reference weight histogram for points in the update list. */
1115 for (int pointIndex : *updateList)
1117 PointState *pointStateToUpdate = &points_[pointIndex];
1119 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1120 if (params.skipUpdates())
1122 pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1125 /* Now do an update with new sampling data. */
1126 pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1129 /* Only update the histogram size after we are done with the local point updates */
1130 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1132 if (needToNormalizeFreeEnergy)
1134 normalizeFreeEnergyAndPmfSum(&points_);
1137 if (needToUpdateTargetDistribution)
1139 /* The target distribution is always updated for all points at once. */
1140 updateTargetDistribution(points_, params);
1143 /* Update the bias. The bias is updated separately and last since it simply a function of
1144 the free energy and the target distribution and we want to avoid doing extra work. */
1145 for (int pointIndex : *updateList)
1147 points_[pointIndex].updateBias();
1150 /* Increase the update counter. */
1151 histogramSize_.incrementNumUpdates();
1154 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams> &dimParams,
1155 const Grid &grid,
1156 std::vector < double, AlignedAllocator < double>> *weight) const
1158 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1159 const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1161 #if GMX_SIMD_HAVE_DOUBLE
1162 typedef SimdDouble PackType;
1163 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1164 #else
1165 typedef double PackType;
1166 constexpr int packSize = 1;
1167 #endif
1168 /* Round the size of the weight array up to packSize */
1169 const int weightSize = ((neighbors.size() + packSize - 1)/packSize)*packSize;
1170 weight->resize(weightSize);
1172 double * gmx_restrict weightData = weight->data();
1173 PackType weightSumPack(0.0);
1174 for (size_t i = 0; i < neighbors.size(); i += packSize)
1176 for (size_t n = i; n < i + packSize; n++)
1178 if (n < neighbors.size())
1180 const int neighbor = neighbors[n];
1181 (*weight)[n] = biasedLogWeightFromPoint(dimParams, points_, grid,
1182 neighbor, points_[neighbor].bias(),
1183 coordState_.coordValue());
1185 else
1187 /* Pad with values that don't affect the result */
1188 (*weight)[n] = detail::c_largeNegativeExponent;
1191 PackType weightPack = load<PackType>(weightData + i);
1192 weightPack = gmx::exp(weightPack);
1193 weightSumPack = weightSumPack + weightPack;
1194 store(weightData + i, weightPack);
1196 /* Sum of probability weights */
1197 double weightSum = reduce(weightSumPack);
1198 GMX_RELEASE_ASSERT(weightSum > 0, "zero probability weight when updating AWH probability weights.");
1200 /* Normalize probabilities to sum to 1 */
1201 double invWeightSum = 1/weightSum;
1202 for (double &w : *weight)
1204 w *= invWeightSum;
1207 /* Return the convolved bias */
1208 return std::log(weightSum);
1211 double BiasState::calcConvolvedBias(const std::vector<DimParams> &dimParams,
1212 const Grid &grid,
1213 const awh_dvec &coordValue) const
1215 int point = grid.nearestIndex(coordValue);
1216 const GridPoint &gridPoint = grid.point(point);
1218 /* Sum the probability weights from the neighborhood of the given point */
1219 double weightSum = 0;
1220 for (int neighbor : gridPoint.neighbor)
1222 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
1223 neighbor, points_[neighbor].bias(),
1224 coordValue);
1225 weightSum += std::exp(logWeight);
1228 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1229 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1232 void BiasState::sampleProbabilityWeights(const Grid &grid,
1233 gmx::ArrayRef<const double> probWeightNeighbor)
1235 const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1237 /* Save weights for next update */
1238 for (size_t n = 0; n < neighbor.size(); n++)
1240 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1243 /* Update the local update range. Two corner points define this rectangular
1244 * domain. We need to choose two new corner points such that the new domain
1245 * contains both the old update range and the current neighborhood.
1246 * In the simplest case when an update is performed every sample,
1247 * the update range would simply equal the current neighborhood.
1249 int neighborStart = neighbor[0];
1250 int neighborLast = neighbor[neighbor.size() - 1];
1251 for (int d = 0; d < grid.numDimensions(); d++)
1253 int origin = grid.point(neighborStart).index[d];
1254 int last = grid.point(neighborLast).index[d];
1256 if (origin > last)
1258 /* Unwrap if wrapped around the boundary (only happens for periodic
1259 * boundaries). This has been already for the stored index interval.
1261 /* TODO: what we want to do is to find the smallest the update
1262 * interval that contains all points that need to be updated.
1263 * This amounts to combining two intervals, the current
1264 * [origin, end] update interval and the new touched neighborhood
1265 * into a new interval that contains all points from both the old
1266 * intervals.
1268 * For periodic boundaries it becomes slightly more complicated
1269 * than for closed boundaries because then it needs not be
1270 * true that origin < end (so one can't simply relate the origin/end
1271 * in the min()/max() below). The strategy here is to choose the
1272 * origin closest to a reference point (index 0) and then unwrap
1273 * the end index if needed and choose the largest end index.
1274 * This ensures that both intervals are in the new interval
1275 * but it's not necessarily the smallest.
1276 * Currently we solve this by going through each possibility
1277 * and checking them.
1279 last += grid.axis(d).numPointsInPeriod();
1282 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1283 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1287 void BiasState::sampleCoordAndPmf(const Grid &grid,
1288 gmx::ArrayRef<const double> probWeightNeighbor,
1289 double convolvedBias)
1291 /* Sampling-based deconvolution extracting the PMF.
1292 * Update the PMF histogram with the current coordinate value.
1294 * Because of the finite width of the harmonic potential, the free energy
1295 * defined for each coordinate point does not exactly equal that of the
1296 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1297 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1298 * reweighted histogram of the coordinate value. Strictly, this relies on
1299 * the unknown normalization constant Z being either constant or known. Here,
1300 * neither is true except in the long simulation time limit. Empirically however,
1301 * it works (mainly because how the PMF histogram is rescaled).
1304 /* Only save coordinate data that is in range (the given index is always
1305 * in range even if the coordinate value is not).
1307 if (grid.covers(coordState_.coordValue()))
1309 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1310 points_[coordState_.gridpointIndex()].samplePmf(convolvedBias);
1313 /* Save probability weights for the update */
1314 sampleProbabilityWeights(grid, probWeightNeighbor);
1317 void BiasState::initHistoryFromState(AwhBiasHistory *biasHistory) const
1319 biasHistory->pointState.resize(points_.size());
1322 void BiasState::updateHistory(AwhBiasHistory *biasHistory,
1323 const Grid &grid) const
1325 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(), "The AWH history setup does not match the AWH state.");
1327 AwhBiasStateHistory *stateHistory = &biasHistory->state;
1328 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1330 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1332 AwhPointStateHistory *psh = &biasHistory->pointState[m];
1334 points_[m].storeState(psh);
1336 psh->weightsum_covering = weightSumCovering_[m];
1339 histogramSize_.storeState(stateHistory);
1341 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid,
1342 originUpdatelist_);
1343 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid,
1344 endUpdatelist_);
1347 void BiasState::restoreFromHistory(const AwhBiasHistory &biasHistory,
1348 const Grid &grid)
1350 const AwhBiasStateHistory &stateHistory = biasHistory.state;
1352 coordState_.restoreFromHistory(stateHistory);
1354 if (biasHistory.pointState.size() != points_.size())
1356 GMX_THROW(InvalidInputError("Bias grid size in checkpoint and simulation do not match. Likely you provided a checkpoint from a different simulation."));
1358 for (size_t m = 0; m < points_.size(); m++)
1360 points_[m].setFromHistory(biasHistory.pointState[m]);
1363 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1365 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1368 histogramSize_.restoreFromHistory(stateHistory);
1370 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1371 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1374 void BiasState::broadcast(const t_commrec *commRecord)
1376 gmx_bcast(sizeof(coordState_), &coordState_, commRecord);
1378 gmx_bcast(points_.size()*sizeof(PointState), points_.data(), commRecord);
1380 gmx_bcast(weightSumCovering_.size()*sizeof(double), weightSumCovering_.data(), commRecord);
1382 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord);
1385 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams> &dimParams,
1386 const Grid &grid)
1388 std::vector<float> convolvedPmf;
1390 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1392 for (size_t m = 0; m < points_.size(); m++)
1394 points_[m].setFreeEnergy(convolvedPmf[m]);
1398 /*! \brief
1399 * Count trailing data rows containing only zeros.
1401 * \param[in] data 2D data array.
1402 * \param[in] numRows Number of rows in array.
1403 * \param[in] numColumns Number of cols in array.
1404 * \returns the number of trailing zero rows.
1406 static int countTrailingZeroRows(const double* const *data,
1407 int numRows,
1408 int numColumns)
1410 int numZeroRows = 0;
1411 for (int m = numRows - 1; m >= 0; m--)
1413 bool rowIsZero = true;
1414 for (int d = 0; d < numColumns; d++)
1416 if (data[d][m] != 0)
1418 rowIsZero = false;
1419 break;
1423 if (!rowIsZero)
1425 /* At a row with non-zero data */
1426 break;
1428 else
1430 /* Still at a zero data row, keep checking rows higher up. */
1431 numZeroRows++;
1435 return numZeroRows;
1438 /*! \brief
1439 * Initializes the PMF and target with data read from an input table.
1441 * \param[in] dimParams The dimension parameters.
1442 * \param[in] grid The grid.
1443 * \param[in] filename The filename to read PMF and target from.
1444 * \param[in] numBias Number of biases.
1445 * \param[in] biasIndex The index of the bias.
1446 * \param[in,out] pointState The state of the points in this bias.
1448 static void readUserPmfAndTargetDistribution(const std::vector<DimParams> &dimParams,
1449 const Grid &grid,
1450 const std::string &filename,
1451 int numBias,
1452 int biasIndex,
1453 std::vector<PointState> *pointState)
1455 /* Read the PMF and target distribution.
1456 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1457 base on the force constant. The free energy and target together determine the bias.
1459 std::string filenameModified(filename);
1460 if (numBias > 1)
1462 size_t n = filenameModified.rfind('.');
1463 GMX_RELEASE_ASSERT(n != std::string::npos, "The filename should contain an extension starting with .");
1464 filenameModified.insert(n, formatString("%d", biasIndex));
1467 std::string correctFormatMessage =
1468 formatString("%s is expected in the following format. "
1469 "The first ndim column(s) should contain the coordinate values for each point, "
1470 "each column containing values of one dimension (in ascending order). "
1471 "For a multidimensional coordinate, points should be listed "
1472 "in the order obtained by traversing lower dimensions first. "
1473 "E.g. for two-dimensional grid of size nxn: "
1474 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1475 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1476 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1477 "Make sure the input file ends with a new line but has no trailing new lines.",
1478 filename.c_str());
1479 gmx::TextLineWrapper wrapper;
1480 wrapper.settings().setLineLength(c_linewidth);
1481 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1483 double **data;
1484 int numColumns;
1485 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1487 /* Check basic data properties here. Grid takes care of more complicated things. */
1489 if (numRows <= 0)
1491 std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1492 GMX_THROW(InvalidInputError(mesg));
1495 /* Less than 2 points is not useful for PMF or target. */
1496 if (numRows < 2)
1498 std::string mesg =
1499 gmx::formatString("%s contains too few data points (%d)."
1500 "The minimum number of points is 2.",
1501 filename.c_str(), numRows);
1502 GMX_THROW(InvalidInputError(mesg));
1505 /* Make sure there are enough columns of data.
1507 Two formats are allowed. Either with columns {coords, PMF, target} or
1508 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1509 is how AWH output is written (x, y, z being other AWH variables). For this format,
1510 trailing columns are ignored.
1512 int columnIndexTarget;
1513 int numColumnsMin = dimParams.size() + 2;
1514 int columnIndexPmf = dimParams.size();
1515 if (numColumns == numColumnsMin)
1517 columnIndexTarget = columnIndexPmf + 1;
1519 else
1521 columnIndexTarget = columnIndexPmf + 4;
1524 if (numColumns < numColumnsMin)
1526 std::string mesg =
1527 gmx::formatString("The number of columns in %s should be at least %d."
1528 "\n\n%s",
1529 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1530 GMX_THROW(InvalidInputError(mesg));
1533 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1534 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1535 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1536 if (numZeroRows > 1)
1538 std::string mesg = gmx::formatString("Found %d trailing zero data rows in %s. Please remove trailing empty lines and try again.",
1539 numZeroRows, filename.c_str());
1540 GMX_THROW(InvalidInputError(mesg));
1543 /* Convert from user units to internal units before sending the data of to grid. */
1544 for (size_t d = 0; d < dimParams.size(); d++)
1546 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1547 if (scalingFactor == 1)
1549 continue;
1551 for (size_t m = 0; m < pointState->size(); m++)
1553 data[d][m] *= scalingFactor;
1557 /* Get a data point for each AWH grid point so that they all get data. */
1558 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1559 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1561 /* Extract the data for each grid point.
1562 * We check if the target distribution is zero for all points.
1564 bool targetDistributionIsZero = true;
1565 for (size_t m = 0; m < pointState->size(); m++)
1567 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1568 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1570 /* Check if the values are allowed. */
1571 if (target < 0)
1573 std::string mesg = gmx::formatString("Target distribution weight at point %zu (%g) in %s is negative.",
1574 m, target, filename.c_str());
1575 GMX_THROW(InvalidInputError(mesg));
1577 if (target > 0)
1579 targetDistributionIsZero = false;
1581 (*pointState)[m].setTargetConstantWeight(target);
1584 if (targetDistributionIsZero)
1586 std::string mesg = gmx::formatString("The target weights given in column %d in %s are all 0",
1587 columnIndexTarget, filename.c_str());
1588 GMX_THROW(InvalidInputError(mesg));
1591 /* Free the arrays. */
1592 for (int m = 0; m < numColumns; m++)
1594 sfree(data[m]);
1596 sfree(data);
1599 void BiasState::normalizePmf(int numSharingSims)
1601 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1602 Approximately (for large enough force constant) we should have:
1603 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1606 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1607 double expSumPmf = 0;
1608 double expSumF = 0;
1609 for (const PointState &pointState : points_)
1611 if (pointState.inTargetRegion())
1613 expSumPmf += std::exp( pointState.logPmfSum());
1614 expSumF += std::exp(-pointState.freeEnergy());
1617 double numSamples = histogramSize_.histogramSize()/numSharingSims;
1619 /* Renormalize */
1620 double logRenorm = std::log(numSamples*expSumF/expSumPmf);
1621 for (PointState &pointState : points_)
1623 if (pointState.inTargetRegion())
1625 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1630 void BiasState::initGridPointState(const AwhBiasParams &awhBiasParams,
1631 const std::vector<DimParams> &dimParams,
1632 const Grid &grid,
1633 const BiasParams &params,
1634 const std::string &filename,
1635 int numBias)
1637 /* Modify PMF, free energy and the constant target distribution factor
1638 * to user input values if there is data given.
1640 if (awhBiasParams.bUserData)
1642 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1643 setFreeEnergyToConvolvedPmf(dimParams, grid);
1646 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1647 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN ||
1648 points_[0].weightSumRef() != 0,
1649 "AWH reference weight histogram not initialized properly with local Boltzmann target distribution.");
1651 updateTargetDistribution(points_, params);
1653 for (PointState &pointState : points_)
1655 if (pointState.inTargetRegion())
1657 pointState.updateBias();
1659 else
1661 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1662 pointState.setTargetToZero();
1666 /* Set the initial reference weighthistogram. */
1667 const double histogramSize = histogramSize_.histogramSize();
1668 for (auto &pointState : points_)
1670 pointState.setInitialReferenceWeightHistogram(histogramSize);
1673 /* Make sure the pmf is normalized consistently with the histogram size.
1674 Note: the target distribution and free energy need to be set here. */
1675 normalizePmf(params.numSharedUpdate);
1678 BiasState::BiasState(const AwhBiasParams &awhBiasParams,
1679 double histogramSizeInitial,
1680 const std::vector<DimParams> &dimParams,
1681 const Grid &grid) :
1682 coordState_(awhBiasParams, dimParams, grid),
1683 points_(grid.numPoints()),
1684 weightSumCovering_(grid.numPoints()),
1685 histogramSize_(awhBiasParams, histogramSizeInitial)
1687 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1688 for (size_t d = 0; d < dimParams.size(); d++)
1690 int index = grid.point(coordState_.gridpointIndex()).index[d];
1691 originUpdatelist_[d] = index;
1692 endUpdatelist_[d] = index;
1696 } // namespace gmx