Extract Cartesian data from DDRankSetup
[gromacs.git] / src / gromacs / domdec / domdec_internal.h
blob0e38ee274138919c0da5370a9e9f16e60953c2b4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
37 * \brief Declares implementation functions and types for the domain
38 * decomposition module.
40 * \author Berk Hess <hess@kth.se>
41 * \ingroup module_domdec
43 #ifndef GMX_DOMDEC_DOMDEC_INTERNAL_H
44 #define GMX_DOMDEC_DOMDEC_INTERNAL_H
46 #include "config.h"
48 #include "gromacs/domdec/domdec.h"
49 #include "gromacs/domdec/domdec_struct.h"
50 #include "gromacs/mdlib/updategroupscog.h"
51 #include "gromacs/timing/cyclecounter.h"
52 #include "gromacs/topology/block.h"
54 struct t_commrec;
56 /*! \cond INTERNAL */
58 #define DD_NLOAD_MAX 9
60 struct BalanceRegion;
62 //! Indices to communicate in a dimension
63 struct gmx_domdec_ind_t
65 //! @{
66 /*! \brief The numbers of charge groups to send and receive for each
67 * cell that requires communication, the last entry contains the total
68 * number of atoms that needs to be communicated.
70 int nsend[DD_MAXIZONE+2] = {};
71 int nrecv[DD_MAXIZONE+2] = {};
72 //! @}
73 //! The charge groups to send
74 std::vector<int> index;
75 //! @{
76 /* The atom range for non-in-place communication */
77 int cell2at0[DD_MAXIZONE] = {};
78 int cell2at1[DD_MAXIZONE] = {};
79 //! @}
82 //! Things relating to index communication
83 struct gmx_domdec_comm_dim_t
85 /* Returns the number of grid pulses (the number of domains in the halo along this dimension) */
86 int numPulses() const
88 return ind.size();
91 /**< For dlb, for use with edlbAUTO */
92 int np_dlb = 0;
93 /**< The indices to communicate, size np */
94 std::vector<gmx_domdec_ind_t> ind;
95 /**< Can we receive data in place? */
96 bool receiveInPlace = false;
99 /*! \brief Load balancing data along a dim used on the master rank of that dim */
100 struct RowMaster
102 struct Bounds
104 /**< State var.: max lower bound., incl. neighbors */
105 real cellFracLowerMax = 0;
106 /**< State var.: min upper bound., incl. neighbors */
107 real cellFracUpperMin = 0;
108 /**< Temp. var.: lower limit for cell boundary */
109 real boundMin = 0;
110 /**< Temp. var.: upper limit for cell boundary */
111 real boundMax = 0;
114 /**< Temp. var.: is this cell size at the limit */
115 std::vector<bool> isCellMin;
116 /**< State var.: cell boundaries, box relative */
117 std::vector<real> cellFrac;
118 /**< Temp. var.: old cell size */
119 std::vector<real> oldCellFrac;
120 /**< Cell bounds */
121 std::vector<Bounds> bounds;
122 /**< State var.: is DLB limited in this row */
123 bool dlbIsLimited = false;
124 /**< Temp. var. */
125 std::vector<real> buf_ncd;
128 /*! \brief Struct for managing cell sizes with DLB along a dimension */
129 struct DDCellsizesWithDlb
131 /**< Cell row root struct, only available on the first rank in a row */
132 std::unique_ptr<RowMaster> rowMaster;
133 /**< The cell sizes, in fractions, along a row, not available on the first rank in a row */
134 std::vector<real> fracRow;
135 /**< The lower corner, in fractions, in triclinic space */
136 real fracLower = 0;
137 /**< The upper corner, in fractions, in triclinic space */
138 real fracUpper = 0;
139 /**< The maximum lower corner among all our neighbors */
140 real fracLowerMax = 0;
141 /**< The minimum upper corner among all our neighbors */
142 real fracUpperMin = 0;
145 /*! \brief Struct for compute load commuication
147 * Here floats are accurate enough, since these variables
148 * only influence the load balancing, not the actual MD results.
150 typedef struct
152 /**< The number of load recordings */
153 int nload = 0;
154 /**< Scan of the sum of load over dimensions */
155 float *load = nullptr;
156 /**< The sum of the load over the ranks up to our current dimension */
157 float sum = 0;
158 /**< The maximum over the ranks contributing to \p sum */
159 float max = 0;
160 /**< Like \p sum, but takes the maximum when the load balancing is limited */
161 float sum_m = 0;
162 /**< Minimum cell volume, relative to the box */
163 float cvol_min = 0;
164 /**< The PP time during which PME can overlap */
165 float mdf = 0;
166 /**< The PME-only rank load */
167 float pme = 0;
168 /**< Bit flags that tell if DLB was limited, per dimension */
169 int flags = 0;
170 } domdec_load_t;
172 /*! \brief Data needed to sort an atom to the desired location in the local state */
173 typedef struct
175 /**< Neighborsearch grid cell index */
176 int nsc = 0;
177 /**< Global atom/charge group index */
178 int ind_gl = 0;
179 /**< Local atom/charge group index */
180 int ind = 0;
181 } gmx_cgsort_t;
183 /*! \brief Temporary buffers for sorting atoms */
184 typedef struct
186 /**< Sorted array of indices */
187 std::vector<gmx_cgsort_t> sorted;
188 /**< Array of stationary atom/charge group indices */
189 std::vector<gmx_cgsort_t> stationary;
190 /**< Array of moved atom/charge group indices */
191 std::vector<gmx_cgsort_t> moved;
192 /**< Integer buffer for sorting */
193 std::vector<int> intBuffer;
194 } gmx_domdec_sort_t;
196 /*! \brief Manages atom ranges and order for the local state atom vectors */
197 class DDAtomRanges
199 public:
200 /*! \brief The local state atom order
202 * This enum determines the order of the atoms in the local state.
203 * ddnatHOME and ddnatZONE should be first and second,
204 * the others can be ordered as wanted.
206 enum class Type : int
208 Home, /**< The home atoms */
209 Zones, /**< All zones in the eighth shell */
210 Vsites, /**< Atoms communicated for virtual sites */
211 Constraints, /**< Atoms communicated for constraints */
212 Number /**< Not a count, only present for convenience */
215 /*! \brief Returns the start atom index for range \p rangeType */
216 int start(Type rangeType) const
218 if (rangeType == Type::Home)
220 return 0;
222 else
224 return end_[static_cast<int>(rangeType) - 1];
228 /*! \brief Returns the end atom index for range \p rangeType */
229 int end(Type rangeType) const
231 return end_[static_cast<int>(rangeType)];
234 /*! \brief Returns the number of home atoms */
235 int numHomeAtoms() const
237 return end_[static_cast<int>(Type::Home)];
240 /*! \brief Returns the total number of atoms */
241 int numAtomsTotal() const
243 return end_[static_cast<int>(Type::Number) - 1];
246 /*! \brief Sets the end index of range \p rangeType to \p end
248 * This should be called either with Type::Home or with a type
249 * that is larger than that passed in the previous call to setEnd.
250 * A release assertion for these conditions are present.
252 void setEnd(Type rangeType,
253 int end)
255 GMX_RELEASE_ASSERT(rangeType == Type::Home || rangeType > lastTypeSet_, "Can only set either home or a larger type than the last one");
257 for (int i = static_cast<int>(rangeType); i < static_cast<int>(Type::Number); i++)
259 end_[i] = end;
262 lastTypeSet_ = rangeType;
265 private:
266 /*! \brief The list of end atom indices */
267 std::array<int, static_cast<int>(Type::Number)> end_;
268 Type lastTypeSet_ = Type::Number;
271 /*! \brief Enum of dynamic load balancing states
273 * Allowed DLB states and transitions
274 * - intialization at startup:
275 * -> offUser ("-dlb no")
276 * -> onUser ("-dlb yes")
277 * -> offCanTurnOn ("-dlb auto")
279 * - in automatic mode (i.e. initial state offCanTurnOn):
280 * offCanTurnOn -> onCanTurnOff
281 * offCanTurnOn -> offForever
282 * offCanTurnOn -> offTemporarilyLocked
283 * offTemporarilyLocked -> offCanTurnOn
284 * onCanTurnOff -> offCanTurnOn
286 enum class DlbState
288 offUser, /**< DLB is permanently off per user request */
289 offForever, /**< DLB is off due to a runtime condition (not supported or causes performance loss) and will never be turned on */
290 offCanTurnOn, /**< DLB is off and will turn on on imbalance */
291 offTemporarilyLocked, /**< DLB is off and temporarily can't turn on */
292 onCanTurnOff, /**< DLB is on and can turn off when slow */
293 onUser, /**< DLB is permanently on per user request */
294 nr /**< The number of DLB states */
297 /*! \brief The PME domain decomposition for one dimension */
298 typedef struct
300 /**< The dimension */
301 int dim = 0;
302 /**< Tells if DD and PME dims match */
303 gmx_bool dim_match = false;
304 /**< The number of PME ranks/domains in this dimension */
305 int nslab = 0;
306 /**< Cell sizes for determining the PME comm. with SLB */
307 real *slb_dim_f = nullptr;
308 /**< The minimum pp node location, size nslab */
309 int *pp_min = nullptr;
310 /**< The maximum pp node location, size nslab */
311 int *pp_max = nullptr;
312 /**< The maximum shift for coordinate redistribution in PME */
313 int maxshift = 0;
314 } gmx_ddpme_t;
316 struct gmx_ddzone_t
318 /**< The minimum bottom of this zone */
319 real min0 = 0;
320 /**< The maximum top of this zone */
321 real max1 = 0;
322 /**< The minimum top of this zone */
323 real min1 = 0;
324 /**< The maximum bottom communicaton height for this zone */
325 real mch0 = 0;
326 /**< The maximum top communicaton height for this zone */
327 real mch1 = 0;
328 /**< The bottom value of the first cell in this zone */
329 real p1_0 = 0;
330 /**< The top value of the first cell in this zone */
331 real p1_1 = 0;
332 /**< Bool disguised as a real, 1 when the above data has been set. 0 otherwise */
333 real dataSet = 0;
336 /*! \brief The number of reals in gmx_ddzone_t */
337 constexpr int c_ddzoneNumReals = 8;
339 template<typename T> class DDBufferAccess;
341 /*! \brief Temporary storage container that minimizes (re)allocation and clearing
343 * This is only the storage, actual access happens through DDBufferAccess.
344 * All methods check if the buffer is (not) in use.
346 template<typename T>
347 class DDBuffer
349 private:
350 /*! \brief Returns a buffer of size \p numElements, the elements are undefined */
351 gmx::ArrayRef<T> resize(size_t numElements)
353 GMX_ASSERT(isInUse_, "Should only operate on acquired buffers");
355 if (numElements > buffer_.size())
357 buffer_.resize(numElements);
360 return gmx::arrayRefFromArray(buffer_.data(), numElements);
363 /*! \brief Acquire the buffer for use with size set to \p numElements, the elements are undefined */
364 gmx::ArrayRef<T> acquire(size_t numElements)
366 GMX_RELEASE_ASSERT(!isInUse_, "Should only request free buffers");
367 isInUse_ = true;
369 return resize(numElements);
372 /*! \brief Releases the buffer, buffer_ should not be used after this */
373 void release()
375 GMX_RELEASE_ASSERT(isInUse_, "Should only release buffers in use");
376 isInUse_ = false;
379 std::vector<T> buffer_; /**< The actual memory buffer */
380 bool isInUse_ = false; /**< Flag that tells whether the buffer is in use */
382 friend class DDBufferAccess<T>;
385 /*! \brief Class that manages access to a temporary memory buffer */
386 template<typename T>
387 class DDBufferAccess
389 public:
390 /*! \brief Constructor, returns a buffer of size \p numElements, element values are undefined
392 * \note The actual memory buffer \p ddBuffer can not be used to
393 * create other DDBufferAccess objects until the one created
394 * here is destroyed.
396 DDBufferAccess(DDBuffer<T> &ddBuffer,
397 size_t numElements) :
398 ddBuffer_(ddBuffer)
400 buffer = ddBuffer_.acquire(numElements);
403 ~DDBufferAccess()
405 ddBuffer_.release();
408 /*! \brief Resizes the buffer to \p numElements, new elements are undefined
410 * \note The buffer arrayref is updated after this call.
412 void resize(size_t numElements)
414 buffer = ddBuffer_.resize(numElements);
417 private:
418 DDBuffer<T> &ddBuffer_; /**< Reference to the storage class */
419 public:
420 gmx::ArrayRef<T> buffer; /**< The access to the memory buffer */
423 /*! \brief Temporary buffer for setting up communiation over one pulse and all zones in the halo */
424 struct dd_comm_setup_work_t
426 /**< The local atom group indices to send */
427 std::vector<int> localAtomGroupBuffer;
428 /**< Buffer for collecting the global atom group indices to send */
429 std::vector<int> atomGroupBuffer;
430 /**< Buffer for collecting the atom group positions to send */
431 std::vector<gmx::RVec> positionBuffer;
432 /**< The number of atoms contained in the atom groups to send */
433 int nat = 0;
434 /**< The number of atom groups to send for the last zone */
435 int nsend_zone = 0;
438 /*! \brief Information about the simulated system */
439 struct DDSystemInfo
441 //! True when update groups are used
442 bool useUpdateGroups = false;
443 //! Update atom grouping for each molecule type
444 std::vector<gmx::RangePartitioning> updateGroupingPerMoleculetype;
445 //! The maximum radius over all update groups
446 real maxUpdateGroupRadius;
448 //! Are there inter-domain bonded interactions?
449 bool haveInterDomainBondeds = false;
450 //! Are there inter-domain multi-body interactions?
451 bool haveInterDomainMultiBodyBondeds = false;
453 //! Cut-off for multi-body interactions
454 real minCutoffForMultiBody = 0;
455 //! Cut-off for non-bonded/2-body interactions
456 real cutoff = 0;
457 //! The lower limit for the DD cell size
458 real cellsizeLimit = 0;
460 //! Can atoms connected by constraints be assigned to different domains?
461 bool haveSplitConstraints = false;
462 //! Can atoms connected by settles be assigned to different domains?
463 bool haveSplitSettles = false;
464 //! Estimated communication range needed for constraints
465 real constraintCommunicationRange = 0;
467 //! Whether to only communicate atoms beyond the non-bonded cut-off when they are involved in bonded interactions with non-local atoms
468 bool filterBondedCommunication = false;
469 //! Whether to increase the multi-body cut-off beyond the minimum required
470 bool increaseMultiBodyCutoff = false;
473 /*! \brief Settings that affect the behavior of the domain decomposition
475 * These settings depend on options chosen by the user, set by enviroment
476 * variables, as well as hardware support. The initial DLB state also
477 * depends on the integrator.
479 * Note: Settings that depend on the simulated system are in DDSystemInfo.
481 struct DDSettings
483 //! Use MPI_Sendrecv communication instead of non-blocking calls
484 bool useSendRecv2 = false;
486 /* Information for managing the dynamic load balancing */
487 //! Maximum DLB scaling per load balancing step in percent
488 int dlb_scale_lim = 0;
489 //! Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise
490 int eFlop = 0;
492 //! Whether to order the DD dimensions from z to x
493 bool useDDOrderZYX = false;
495 //! Whether to use MPI Cartesian reordering of communicators, when supported (almost never)
496 bool useCartesianReorder = true;
498 //! Whether we should record the load
499 bool recordLoad = false;
501 /* Debugging */
502 //! Step interval for dumping the local+non-local atoms to pdb
503 int nstDDDump = 0;
504 //! Step interval for duming the DD grid to pdb
505 int nstDDDumpGrid = 0;
506 //! DD debug print level: 0, 1, 2
507 int DD_debug = 0;
509 //! The DLB state at the start of the run
510 DlbState initialDlbState = DlbState::offCanTurnOn;
513 /*! \brief Information on how the DD ranks are set up */
514 struct DDRankSetup
516 /**< The number of particle-particle (non PME-only) ranks */
517 int numPPRanks = 0;
518 /**< The DD PP grid */
519 ivec numPPCells = { 0, 0, 0 };
521 /* PME and Cartesian communicator stuff */
522 bool usePmeOnlyRanks = false;
523 /**< The number of decomposition dimensions for PME, 0: no PME */
524 int npmedecompdim = 0;
525 /**< The number of ranks doing PME (PP/PME or only PME) */
526 int numRanksDoingPme = 0;
527 /**< The number of PME ranks/domains along x */
528 int npmenodes_x = 0;
529 /**< The number of PME ranks/domains along y */
530 int npmenodes_y = 0;
531 /**< The 1D or 2D PME domain decomposition setup */
532 gmx_ddpme_t ddpme[2];
535 /*! \brief Information on Cartesian MPI setup of the DD ranks */
536 struct CartesianRankSetup
538 /**< Use Cartesian communication between PP and PME ranks */
539 bool bCartesianPP_PME = false;
540 /**< Cartesian grid for combinted PP+PME ranks */
541 ivec ntot = { };
542 /**< The number of dimensions for the PME setup that are Cartesian */
543 int cartpmedim = 0;
544 /**< The Cartesian index to sim rank conversion, used with bCartesianPP_PME */
545 std::vector<int> ddindex2simnodeid;
547 /* The DD particle-particle nodes only */
548 /**< Use a Cartesian communicator for PP */
549 bool bCartesianPP = false;
550 /**< The Cartesian index to DD rank conversion, used with bCartesianPP */
551 std::vector<int> ddindex2ddnodeid;
554 /*! \brief Struct for domain decomposition communication
556 * This struct contains most information about domain decomposition
557 * communication setup, some communication buffers, some statistics
558 * and also the setup for the communication between particle-particle
559 * and PME only ranks.
561 * All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
562 * unless stated otherwise.
564 struct gmx_domdec_comm_t // NOLINT (clang-analyzer-optin.performance.Padding)
566 /**< Constant parameters that control DD behavior */
567 DDSettings ddSettings;
569 /**< Information on how the DD ranks are set up */
570 DDRankSetup ddRankSetup;
571 /**< Information on the Cartesian part of the DD rank setup */
572 CartesianRankSetup cartesianRankSetup;
574 /* The DLB state, used for reloading old states, during e.g. EM */
575 /**< The global charge groups, this defined the DD state (except for the DLB state) */
576 t_block cgs_gl = { };
578 /* Charge group / atom sorting */
579 /**< Data structure for cg/atom sorting */
580 std::unique_ptr<gmx_domdec_sort_t> sort;
582 //! Centers of mass of local update groups
583 std::unique_ptr<gmx::UpdateGroupsCog> updateGroupsCog;
585 /* Data for the optional filtering of communication of atoms for bonded interactions */
586 /**< Links between cg's through bonded interactions */
587 t_blocka *cglink = nullptr;
588 /**< Local cg availability, TODO: remove when group scheme is removed */
589 char *bLocalCG = nullptr;
591 /* The DLB state, possible values are defined above */
592 DlbState dlbState;
593 /* With dlbState=DlbState::offCanTurnOn, should we check if to DLB on at the next DD? */
594 gmx_bool bCheckWhetherToTurnDlbOn = false;
595 /* The first DD count since we are running without DLB */
596 int ddPartioningCountFirstDlbOff = 0;
598 /* Cell sizes for static load balancing, first index cartesian */
599 real **slb_frac = nullptr;
601 /**< Information about the simulated system */
602 DDSystemInfo systemInfo;
604 /* The width of the communicated boundaries */
605 /**< Cut-off for multi-body interactions, also 2-body bonded when \p cutoff_mody > \p cutoff */
606 real cutoff_mbody = 0;
607 /**< The minimum guaranteed cell-size, Cartesian indexing */
608 rvec cellsize_min = { };
609 /**< The minimum guaranteed cell-size with dlb=auto */
610 rvec cellsize_min_dlb = { };
611 /**< The lower limit for the DD cell size with DLB */
612 real cellsize_limit = 0;
613 /**< Effectively no NB cut-off limit with DLB for systems without PBC? */
614 gmx_bool bVacDLBNoLimit = false;
616 /** With PME load balancing we set limits on DLB */
617 gmx_bool bPMELoadBalDLBLimits = false;
618 /** DLB needs to take into account that we want to allow this maximum
619 * cut-off (for PME load balancing), this could limit cell boundaries.
621 real PMELoadBal_max_cutoff = 0;
623 /**< tric_dir from \p gmx_ddbox_t is only stored here because dd_get_ns_ranges needs it */
624 ivec tric_dir = { };
625 /**< box lower corner, required with dim's without pbc and -gcom */
626 rvec box0 = { };
627 /**< box size, required with dim's without pbc and -gcom */
628 rvec box_size = { };
630 /**< The DD cell lower corner, in triclinic space */
631 rvec cell_x0 = { };
632 /**< The DD cell upper corner, in triclinic space */
633 rvec cell_x1 = { };
635 /**< The old \p cell_x0, to check cg displacements */
636 rvec old_cell_x0 = { };
637 /**< The old \p cell_x1, to check cg displacements */
638 rvec old_cell_x1 = { };
640 /** The communication setup and charge group boundaries for the zones */
641 gmx_domdec_zones_t zones;
643 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
644 * cell boundaries of neighboring cells for staggered grids when using
645 * dynamic load balancing.
647 /**< Zone limits for dim 1 with staggered grids */
648 gmx_ddzone_t zone_d1[2];
649 /**< Zone limits for dim 2 with staggered grids */
650 gmx_ddzone_t zone_d2[2][2];
652 /** The coordinate/force communication setup and indices */
653 gmx_domdec_comm_dim_t cd[DIM];
654 /** The maximum number of cells to communicate with in one dimension */
655 int maxpulse = 0;
657 /** Which cg distribution is stored on the master node,
658 * stored as DD partitioning call count.
660 int64_t master_cg_ddp_count = 0;
662 /** The number of cg's received from the direct neighbors */
663 int zone_ncg1[DD_MAXZONE] = {0};
665 /** The atom ranges in the local state */
666 DDAtomRanges atomRanges;
668 /** Array for signalling if atoms have moved to another domain */
669 std::vector<int> movedBuffer;
671 /** Communication int buffer for general use */
672 DDBuffer<int> intBuffer;
674 /** Communication rvec buffer for general use */
675 DDBuffer<gmx::RVec> rvecBuffer;
677 /* Temporary storage for thread parallel communication setup */
678 /**< Thread-local work data */
679 std::vector<dd_comm_setup_work_t> dth;
681 /* Communication buffer only used with multiple grid pulses */
682 /**< Another rvec comm. buffer */
683 DDBuffer<gmx::RVec> rvecBuffer2;
685 /* Communication buffers for local redistribution */
686 /**< Charge group flag comm. buffers */
687 std::array<std::vector<int>, DIM*2> cggl_flag;
688 /**< Charge group center comm. buffers */
689 std::array<std::vector<gmx::RVec>, DIM*2> cgcm_state;
691 /* Cell sizes for dynamic load balancing */
692 std::vector<DDCellsizesWithDlb> cellsizesWithDlb;
694 /* Stuff for load communication */
695 /**< The recorded load data */
696 domdec_load_t *load = nullptr;
697 /**< The number of MPI ranks sharing the GPU our rank is using */
698 int nrank_gpu_shared = 0;
699 #if GMX_MPI
700 /**< The MPI load communicator */
701 MPI_Comm *mpi_comm_load = nullptr;
702 /**< The MPI load communicator for ranks sharing a GPU */
703 MPI_Comm mpi_comm_gpu_shared;
704 #endif
706 /**< Struct for timing the force load balancing region */
707 BalanceRegion *balanceRegion = nullptr;
709 /* Cycle counters over nstlist steps */
710 /**< Total cycles counted */
711 float cycl[ddCyclNr] = { };
712 /**< The number of cycle recordings */
713 int cycl_n[ddCyclNr] = { };
714 /**< The maximum cycle count */
715 float cycl_max[ddCyclNr] = { };
716 /**< Total flops counted */
717 double flop = 0.0;
718 /**< The number of flop recordings */
719 int flop_n = 0;
720 /** How many times did we have load measurements */
721 int n_load_have = 0;
722 /** How many times have we collected the load measurements */
723 int n_load_collect = 0;
725 /* Cycle count history for DLB checks */
726 /**< The averaged cycles per step over the last nstlist step before turning on DLB */
727 float cyclesPerStepBeforeDLB = 0;
728 /**< The running average of the cycles per step during DLB */
729 float cyclesPerStepDlbExpAverage = 0;
730 /**< Have we turned off DLB (after turning DLB on)? */
731 bool haveTurnedOffDlb = false;
732 /**< The DD step at which we last measured that DLB off was faster than DLB on, 0 if there was no such step */
733 int64_t dlbSlowerPartitioningCount = 0;
735 /* Statistics for atoms */
736 /**< The atoms per range, summed over the steps */
737 double sum_nat[static_cast<int>(DDAtomRanges::Type::Number)] = { };
739 /* Statistics for calls and times */
740 /**< The number of partioning calls */
741 int ndecomp = 0;
742 /**< The number of load recordings */
743 int nload = 0;
744 /**< Total MD step time */
745 double load_step = 0.0;
746 /**< Total PP force time */
747 double load_sum = 0.0;
748 /**< Max \p load_sum over the ranks */
749 double load_max = 0.0;
750 /**< Was load balancing limited, per DD dim */
751 ivec load_lim = { };
752 /**< Total time on PP done during PME overlap time */
753 double load_mdf = 0.0;
754 /**< Total time on our PME-only rank */
755 double load_pme = 0.0;
757 /** The last partition step */
758 int64_t partition_step = INT_MIN;
761 /*! \brief DD zone permutation
763 * Zone permutation from the Cartesian x-major/z-minor order to an order
764 * that leads to consecutive charge groups for neighbor searching.
765 * TODO: remove when the group scheme is removed
767 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
769 /*! \brief DD zone reordering to Cartesian order
771 * Index to reorder the zone such that the end up in Cartesian order
772 * with dimension index 0 major and dimension index 2 minor.
774 static const int zone_reorder_cartesian[DD_MAXZONE] = { 0, 1, 3, 2, 5, 4, 6, 7 };
776 /* dd_zo and dd_zp3 is set up such that i zones with non-zero
777 * components see only j zones with that component 0.
780 /*! \brief Returns the DD cut-off distance for multi-body interactions */
781 real dd_cutoff_multibody(const gmx_domdec_t *dd);
783 /*! \brief Returns the DD cut-off distance for two-body interactions */
784 real dd_cutoff_twobody(const gmx_domdec_t *dd);
786 /*! \brief Returns the domain index given the number of domains and the domain coordinates
788 * This order is required to minimize the coordinate communication in PME
789 * which uses decomposition in the x direction.
791 static inline int dd_index(const ivec numDomains,
792 const ivec domainCoordinates)
794 return ((domainCoordinates[XX]*numDomains[YY] + domainCoordinates[YY])*numDomains[ZZ]) + domainCoordinates[ZZ];
797 /*! Returns the size of the buffer to hold fractional cell boundaries for DD dimension index dimIndex */
798 static inline int ddCellFractionBufferSize(const gmx_domdec_t *dd,
799 int dimIndex)
801 return dd->nc[dd->dim[dimIndex ]] + 1 + dimIndex*2 + 1 + dimIndex;
804 /*! \brief Maximum number of ranks for using send/recv for state scattering and gathering
806 * Use separate MPI send and receive commands
807 * when #ranks <= c_maxNumRanksUseSendRecvForScatterAndGather
808 * This saves memory (and some copying for small #ranks).
809 * For high parallelization scatter and gather calls are used.
811 static constexpr int c_maxNumRanksUseSendRecvForScatterAndGather = 4;
813 /*! \brief Make DD cells larger by this factor than the limit to avoid rounding issues */
814 static constexpr double DD_CELL_MARGIN = 1.0001;
816 /*! \brief Factor for checking DD cell size limitation during DLB, should be in between 1 and DD_CELL_MARGIN */
817 static constexpr double DD_CELL_MARGIN2 = 1.00005;
819 /*! \brief With pressure scaling, keep cell sizes 2% above the limit to allow for some scaling */
820 static constexpr double DD_PRES_SCALE_MARGIN = 1.02;
822 /*! \endcond */
824 #endif