Create separate module for trajectory data
[gromacs.git] / src / gromacs / trajectoryanalysis / modules / pairdist.cpp
blob3091cb4451f299c3891add4012e9a8315eecc36d
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
36 * \brief
37 * Implements gmx::analysismodules::PairDistance.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
42 #include "gmxpre.h"
44 #include "pairdist.h"
46 #include <cmath>
48 #include <algorithm>
49 #include <limits>
50 #include <string>
51 #include <vector>
53 #include "gromacs/analysisdata/analysisdata.h"
54 #include "gromacs/analysisdata/modules/plot.h"
55 #include "gromacs/options/basicoptions.h"
56 #include "gromacs/options/filenameoption.h"
57 #include "gromacs/options/ioptionscontainer.h"
58 #include "gromacs/selection/nbsearch.h"
59 #include "gromacs/selection/selection.h"
60 #include "gromacs/selection/selectionoption.h"
61 #include "gromacs/trajectory/trajectoryframe.h"
62 #include "gromacs/trajectoryanalysis/analysissettings.h"
63 #include "gromacs/utility/arrayref.h"
64 #include "gromacs/utility/exceptions.h"
65 #include "gromacs/utility/stringutil.h"
67 namespace gmx
70 namespace analysismodules
73 namespace
76 //! \addtogroup module_trajectoryanalysis
77 //! \{
79 //! Enum value to store the selected value for `-type`.
80 enum DistanceType
82 eDistanceType_Min,
83 eDistanceType_Max
86 //! Enum value to store the selected value for `-refgrouping`/`-selgrouping`.
87 enum GroupType
89 eGroupType_All,
90 eGroupType_Residue,
91 eGroupType_Molecule,
92 eGroupType_None
95 //! Strings corresponding to DistanceType.
96 const char *const c_distanceTypes[] = { "min", "max" };
97 //! Strings corresponding to GroupType.
98 const char *const c_groupTypes[] = { "all", "res", "mol", "none" };
100 /*! \brief
101 * Implements `gmx pairdist` trajectory analysis module.
103 class PairDistance : public TrajectoryAnalysisModule
105 public:
106 PairDistance();
108 virtual void initOptions(IOptionsContainer *options,
109 TrajectoryAnalysisSettings *settings);
110 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
111 const TopologyInformation &top);
113 virtual TrajectoryAnalysisModuleDataPointer startFrames(
114 const AnalysisDataParallelOptions &opt,
115 const SelectionCollection &selections);
116 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
117 TrajectoryAnalysisModuleData *pdata);
119 virtual void finishAnalysis(int nframes);
120 virtual void writeOutput();
122 private:
123 /*! \brief
124 * Computed distances as a function of time.
126 * There is one data set for each selection in `sel_`.
127 * Within each data set, there is one column for each distance to be
128 * computed, as explained in the `-h` text.
130 AnalysisData distances_;
132 /*! \brief
133 * Reference selection to compute distances to.
135 * mappedId() identifies the group (of type `refGroupType_`) into which
136 * each position belogs.
138 Selection refSel_;
139 /*! \brief
140 * Selections to compute distances from.
142 * mappedId() identifies the group (of type `selGroupType_`) into which
143 * each position belogs.
145 SelectionList sel_;
147 std::string fnDist_;
149 double cutoff_;
150 DistanceType distanceType_;
151 GroupType refGroupType_;
152 GroupType selGroupType_;
154 //! Number of groups in `refSel_`.
155 int refGroupCount_;
156 //! Maximum number of pairs of groups for one selection.
157 int maxGroupCount_;
158 //! Initial squared distance for distance accumulation.
159 real initialDist2_;
160 //! Cutoff squared for use in the actual calculation.
161 real cutoff2_;
163 //! Neighborhood search object for the pair search.
164 AnalysisNeighborhood nb_;
166 // Copy and assign disallowed by base.
169 PairDistance::PairDistance()
170 : cutoff_(0.0), distanceType_(eDistanceType_Min),
171 refGroupType_(eGroupType_All), selGroupType_(eGroupType_All),
172 refGroupCount_(0), maxGroupCount_(0), initialDist2_(0.0), cutoff2_(0.0)
174 registerAnalysisDataset(&distances_, "dist");
178 void
179 PairDistance::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
181 static const char *const desc[] = {
182 "[THISMODULE] calculates pairwise distances between one reference",
183 "selection (given with [TT]-ref[tt]) and one or more other selections",
184 "(given with [TT]-sel[tt]). It can calculate either the minimum",
185 "distance (the default), or the maximum distance (with",
186 "[TT]-type max[tt]). Distances to each selection provided with",
187 "[TT]-sel[tt] are computed independently.[PAR]",
188 "By default, the global minimum/maximum distance is computed.",
189 "To compute more distances (e.g., minimum distances to each residue",
190 "in [TT]-ref[tt]), use [TT]-refgrouping[tt] and/or [TT]-selgrouping[tt]",
191 "to specify how the positions within each selection should be",
192 "grouped.[PAR]",
193 "Computed distances are written to the file specified with [TT]-o[tt].",
194 "If there are N groups in [TT]-ref[tt] and M groups in the first",
195 "selection in [TT]-sel[tt], then the output contains N*M columns",
196 "for the first selection. The columns contain distances like this:",
197 "r1-s1, r2-s1, ..., r1-s2, r2-s2, ..., where rn is the n'th group",
198 "in [TT]-ref[tt] and sn is the n'th group in the other selection.",
199 "The distances for the second selection comes as separate columns",
200 "after the first selection, and so on. If some selections are",
201 "dynamic, only the selected positions are used in the computation",
202 "but the same number of columns is always written out. If there",
203 "are no positions contributing to some group pair, then the cutoff",
204 "value is written (see below).[PAR]",
205 "[TT]-cutoff[tt] sets a cutoff for the computed distances.",
206 "If the result would contain a distance over the cutoff, the cutoff",
207 "value is written to the output file instead. By default, no cutoff",
208 "is used, but if you are not interested in values beyond a cutoff,",
209 "or if you know that the minimum distance is smaller than a cutoff,",
210 "you should set this option to allow the tool to use grid-based",
211 "searching and be significantly faster.[PAR]",
212 "If you want to compute distances between fixed pairs,",
213 "[gmx-distance] may be a more suitable tool."
216 settings->setHelpText(desc);
218 options->addOption(FileNameOption("o").filetype(eftPlot).outputFile().required()
219 .store(&fnDist_).defaultBasename("dist")
220 .description("Distances as function of time"));
222 options->addOption(DoubleOption("cutoff").store(&cutoff_)
223 .description("Maximum distance to consider"));
224 options->addOption(EnumOption<DistanceType>("type").store(&distanceType_)
225 .enumValue(c_distanceTypes)
226 .description("Type of distances to calculate"));
227 options->addOption(EnumOption<GroupType>("refgrouping").store(&refGroupType_)
228 .enumValue(c_groupTypes)
229 .description("Grouping of -ref positions to compute the min/max over"));
230 options->addOption(EnumOption<GroupType>("selgrouping").store(&selGroupType_)
231 .enumValue(c_groupTypes)
232 .description("Grouping of -sel positions to compute the min/max over"));
234 options->addOption(SelectionOption("ref").store(&refSel_).required()
235 .description("Reference positions to calculate distances from"));
236 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue()
237 .description("Positions to calculate distances for"));
240 //! Helper function to initialize the grouping for a selection.
241 int initSelectionGroups(Selection *sel, t_topology *top, int type)
243 e_index_t indexType = INDEX_UNKNOWN;
244 switch (type)
246 case eGroupType_All: indexType = INDEX_ALL; break;
247 case eGroupType_Residue: indexType = INDEX_RES; break;
248 case eGroupType_Molecule: indexType = INDEX_MOL; break;
249 case eGroupType_None: indexType = INDEX_ATOM; break;
251 return sel->initOriginalIdsToGroup(top, indexType);
255 void
256 PairDistance::initAnalysis(const TrajectoryAnalysisSettings &settings,
257 const TopologyInformation &top)
259 refGroupCount_ = initSelectionGroups(&refSel_, top.topology(), refGroupType_);
261 maxGroupCount_ = 0;
262 distances_.setDataSetCount(sel_.size());
263 for (size_t i = 0; i < sel_.size(); ++i)
265 const int selGroupCount
266 = initSelectionGroups(&sel_[i], top.topology(), selGroupType_);
267 const int columnCount = refGroupCount_ * selGroupCount;
268 maxGroupCount_ = std::max(maxGroupCount_, columnCount);
269 distances_.setColumnCount(i, columnCount);
272 if (!fnDist_.empty())
274 AnalysisDataPlotModulePointer plotm(
275 new AnalysisDataPlotModule(settings.plotSettings()));
276 plotm->setFileName(fnDist_);
277 if (distanceType_ == eDistanceType_Max)
279 plotm->setTitle("Maximum distance");
281 else
283 plotm->setTitle("Minimum distance");
285 // TODO: Figure out and add a descriptive subtitle and/or a longer
286 // title and/or better legends based on the grouping and the reference
287 // selection.
288 plotm->setXAxisIsTime();
289 plotm->setYLabel("Distance (nm)");
290 for (size_t g = 0; g < sel_.size(); ++g)
292 plotm->appendLegend(sel_[g].name());
294 distances_.addModule(plotm);
297 nb_.setCutoff(cutoff_);
298 if (cutoff_ <= 0.0)
300 cutoff_ = 0.0;
301 initialDist2_ = std::numeric_limits<real>::max();
303 else
305 initialDist2_ = cutoff_ * cutoff_;
307 if (distanceType_ == eDistanceType_Max)
309 initialDist2_ = 0.0;
311 cutoff2_ = cutoff_ * cutoff_;
314 /*! \brief
315 * Temporary memory for use within a single-frame calculation.
317 class PairDistanceModuleData : public TrajectoryAnalysisModuleData
319 public:
320 /*! \brief
321 * Reserves memory for the frame-local data.
323 PairDistanceModuleData(TrajectoryAnalysisModule *module,
324 const AnalysisDataParallelOptions &opt,
325 const SelectionCollection &selections,
326 int refGroupCount,
327 const Selection &refSel,
328 int maxGroupCount)
329 : TrajectoryAnalysisModuleData(module, opt, selections)
331 distArray_.resize(maxGroupCount);
332 countArray_.resize(maxGroupCount);
333 refCountArray_.resize(refGroupCount);
334 if (!refSel.isDynamic())
336 initRefCountArray(refSel);
340 virtual void finish() { finishDataHandles(); }
342 /*! \brief
343 * Computes the number of positions in each group in \p refSel
344 * and stores them into `refCountArray_`.
346 void initRefCountArray(const Selection &refSel)
348 std::fill(refCountArray_.begin(), refCountArray_.end(), 0);
349 int refPos = 0;
350 while (refPos < refSel.posCount())
352 const int refIndex = refSel.position(refPos).mappedId();
353 const int startPos = refPos;
354 ++refPos;
355 while (refPos < refSel.posCount()
356 && refSel.position(refPos).mappedId() == refIndex)
358 ++refPos;
360 refCountArray_[refIndex] = refPos - startPos;
364 /*! \brief
365 * Squared distance between each group
367 * One entry for each group pair for the current selection.
368 * Enough memory is allocated to fit the largest calculation selection.
369 * This is needed to support neighborhood searching, which may not
370 * return the pairs in order: for each group pair, we need to search
371 * through all the position pairs and update this array to find the
372 * minimum/maximum distance between them.
374 std::vector<real> distArray_;
375 /*! \brief
376 * Number of pairs within the cutoff that have contributed to the value
377 * in `distArray_`.
379 * This is needed to identify whether there were any pairs inside the
380 * cutoff and whether there were additional pairs outside the cutoff
381 * that were not covered by the neihborhood search.
383 std::vector<int> countArray_;
384 /*! \brief
385 * Number of positions within each reference group.
387 * This is used to more efficiently compute the total number of pairs
388 * (for comparison with `countArray_`), as otherwise these numbers
389 * would need to be recomputed for each selection.
391 std::vector<int> refCountArray_;
394 TrajectoryAnalysisModuleDataPointer PairDistance::startFrames(
395 const AnalysisDataParallelOptions &opt,
396 const SelectionCollection &selections)
398 return TrajectoryAnalysisModuleDataPointer(
399 new PairDistanceModuleData(this, opt, selections, refGroupCount_,
400 refSel_, maxGroupCount_));
403 void
404 PairDistance::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
405 TrajectoryAnalysisModuleData *pdata)
407 AnalysisDataHandle dh = pdata->dataHandle(distances_);
408 const Selection &refSel = pdata->parallelSelection(refSel_);
409 const SelectionList &sel = pdata->parallelSelections(sel_);
410 PairDistanceModuleData &frameData = *static_cast<PairDistanceModuleData *>(pdata);
411 std::vector<real> &distArray = frameData.distArray_;
412 std::vector<int> &countArray = frameData.countArray_;
414 if (cutoff_ > 0.0 && refSel.isDynamic())
416 // Count the number of reference positions in each group, so that
417 // this does not need to be computed again for each selection.
418 // This is needed only if it is possible that the neighborhood search
419 // does not cover all the pairs, hence the cutoff > 0.0 check.
420 // If refSel is static, then the array contents are static as well,
421 // and it has been initialized in the constructor of the data object.
422 frameData.initRefCountArray(refSel);
424 const std::vector<int> &refCountArray = frameData.refCountArray_;
426 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
427 dh.startFrame(frnr, fr.time);
428 for (size_t g = 0; g < sel.size(); ++g)
430 const int columnCount = distances_.columnCount(g);
431 std::fill(distArray.begin(), distArray.begin() + columnCount, initialDist2_);
432 std::fill(countArray.begin(), countArray.begin() + columnCount, 0);
434 // Accumulate the number of position pairs within the cutoff and the
435 // min/max distance for each group pair.
436 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
437 AnalysisNeighborhoodPair pair;
438 while (pairSearch.findNextPair(&pair))
440 const SelectionPosition &refPos = refSel.position(pair.refIndex());
441 const SelectionPosition &selPos = sel[g].position(pair.testIndex());
442 const int refIndex = refPos.mappedId();
443 const int selIndex = selPos.mappedId();
444 const int index = selIndex * refGroupCount_ + refIndex;
445 const real r2 = pair.distance2();
446 if (distanceType_ == eDistanceType_Min)
448 if (distArray[index] > r2)
450 distArray[index] = r2;
453 else
455 if (distArray[index] < r2)
457 distArray[index] = r2;
460 ++countArray[index];
463 // If it is possible that positions outside the cutoff (or lack of
464 // them) affects the result, then we need to check whether there were
465 // any. This is necessary for two cases:
466 // - With max distances, if there are pairs outside the cutoff, then
467 // the computed distance should be equal to the cutoff instead of
468 // the largest distance that was found above.
469 // - With either distance type, if all pairs are outside the cutoff,
470 // then countArray must be updated so that the presence flag
471 // in the output data reflects the dynamic selection status, not
472 // whether something was inside the cutoff or not.
473 if (cutoff_ > 0.0)
475 int selPos = 0;
476 // Loop over groups in this selection (at start, selPos is always
477 // the first position in the next group).
478 while (selPos < sel[g].posCount())
480 // Count the number of positions in this group.
481 const int selIndex = sel[g].position(selPos).mappedId();
482 const int startPos = selPos;
483 ++selPos;
484 while (selPos < sel[g].posCount()
485 && sel[g].position(selPos).mappedId() == selIndex)
487 ++selPos;
489 const int count = selPos - startPos;
490 // Check all group pairs that contain this group.
491 for (int i = 0; i < refGroupCount_; ++i)
493 const int index = selIndex * refGroupCount_ + i;
494 const int totalCount = refCountArray[i] * count;
495 // If there were positions outside the cutoff,
496 // update the distance if necessary and the count.
497 if (countArray[index] < totalCount)
499 if (distanceType_ == eDistanceType_Max)
501 distArray[index] = cutoff2_;
503 countArray[index] = totalCount;
509 // Write the computed distances to the output data.
510 dh.selectDataSet(g);
511 for (int i = 0; i < columnCount; ++i)
513 if (countArray[i] > 0)
515 dh.setPoint(i, std::sqrt(distArray[i]));
517 else
519 // If there are no contributing positions, write out the cutoff
520 // value.
521 dh.setPoint(i, cutoff_, false);
525 dh.finishFrame();
528 void
529 PairDistance::finishAnalysis(int /*nframes*/)
533 void
534 PairDistance::writeOutput()
538 //! \}
540 } // namespace
542 const char PairDistanceInfo::name[] = "pairdist";
543 const char PairDistanceInfo::shortDescription[] =
544 "Calculate pairwise distances between groups of positions";
546 TrajectoryAnalysisModulePointer PairDistanceInfo::create()
548 return TrajectoryAnalysisModulePointer(new PairDistance);
551 } // namespace analysismodules
553 } // namespace gmx