2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Declares implementation functions and types for the domain
38 * decomposition module.
40 * \author Berk Hess <hess@kth.se>
41 * \ingroup module_domdec
43 #ifndef GMX_DOMDEC_DOMDEC_INTERNAL_H
44 #define GMX_DOMDEC_DOMDEC_INTERNAL_H
48 #include "gromacs/domdec/domdec.h"
49 #include "gromacs/domdec/domdec_struct.h"
50 #include "gromacs/mdlib/updategroupscog.h"
51 #include "gromacs/timing/cyclecounter.h"
52 #include "gromacs/topology/block.h"
58 #define DD_NLOAD_MAX 9
62 struct gmx_domdec_ind_t
64 /* The numbers of charge groups to send and receive for each cell
65 * that requires communication, the last entry contains the total
66 * number of atoms that needs to be communicated.
68 int nsend
[DD_MAXIZONE
+2];
69 int nrecv
[DD_MAXIZONE
+2];
70 /* The charge groups to send */
71 std::vector
<int> index
;
72 /* The atom range for non-in-place communication */
73 int cell2at0
[DD_MAXIZONE
];
74 int cell2at1
[DD_MAXIZONE
];
77 struct gmx_domdec_comm_dim_t
79 /* Returns the number of grid pulses (the number of domains in the halo along this dimension) */
85 int np_dlb
; /* For dlb, for use with edlbAUTO */
86 std::vector
<gmx_domdec_ind_t
> ind
; /* The indices to communicate, size np */
87 bool receiveInPlace
; /* Can we receive data in place? */
90 /*! \brief Load balancing data along a dim used on the master rank of that dim */
95 real cellFracLowerMax
; /**< State var.: max lower bound., incl. neighbors */
96 real cellFracUpperMin
; /**< State var.: min upper bound., incl. neighbors */
97 real boundMin
; /**< Temp. var.: lower limit for cell boundary */
98 real boundMax
; /**< Temp. var.: upper limit for cell boundary */
101 std::vector
<bool> isCellMin
; /**< Temp. var.: is this cell size at the limit */
102 std::vector
<real
> cellFrac
; /**< State var.: cell boundaries, box relative */
103 std::vector
<real
> oldCellFrac
; /**< Temp. var.: old cell size */
104 std::vector
<Bounds
> bounds
; /**< Cell bounds */
105 bool dlbIsLimited
; /**< State var.: is DLB limited in this row */
106 std::vector
<real
> buf_ncd
; /**< Temp. var. */
109 /*! \brief Struct for managing cell sizes with DLB along a dimension */
110 struct DDCellsizesWithDlb
112 /* Cell sizes for dynamic load balancing */
113 std::unique_ptr
<RowMaster
> rowMaster
; /**< Cell row root struct, only available on the first rank in a row */
114 std::vector
<real
> fracRow
; /**< The cell sizes, in fractions, along a row, not available on the first rank in a row */
115 real fracLower
; /**< The lower corner, in fractions, in triclinic space */
116 real fracUpper
; /**< The upper corner, in fractions, in triclinic space */
117 real fracLowerMax
; /**< The maximum lower corner among all our neighbors */
118 real fracUpperMin
; /**< The minimum upper corner among all our neighbors */
121 /*! \brief Struct for compute load commuication
123 * Here floats are accurate enough, since these variables
124 * only influence the load balancing, not the actual MD results.
128 int nload
; /**< The number of load recordings */
129 float *load
; /**< Scan of the sum of load over dimensions */
130 float sum
; /**< The sum of the load over the ranks up to our current dimension */
131 float max
; /**< The maximum over the ranks contributing to \p sum */
132 float sum_m
; /**< Like \p sum, but takes the maximum when the load balancing is limited */
133 float cvol_min
; /**< Minimum cell volume, relative to the box */
134 float mdf
; /**< The PP time during which PME can overlap */
135 float pme
; /**< The PME-only rank load */
136 int flags
; /**< Bit flags that tell if DLB was limited, per dimension */
139 /*! \brief Data needed to sort an atom to the desired location in the local state */
142 int nsc
; /**< Neighborsearch grid cell index */
143 int ind_gl
; /**< Global atom/charge group index */
144 int ind
; /**< Local atom/charge group index */
147 /*! \brief Temporary buffers for sorting atoms */
150 std::vector
<gmx_cgsort_t
> sorted
; /**< Sorted array of indices */
151 std::vector
<gmx_cgsort_t
> stationary
; /**< Array of stationary atom/charge group indices */
152 std::vector
<gmx_cgsort_t
> moved
; /**< Array of moved atom/charge group indices */
153 std::vector
<int> intBuffer
; /**< Integer buffer for sorting */
156 /*! \brief Manages atom ranges and order for the local state atom vectors */
160 /*! \brief The local state atom order
162 * This enum determines the order of the atoms in the local state.
163 * ddnatHOME and ddnatZONE should be first and second,
164 * the others can be ordered as wanted.
166 enum class Type
: int
168 Home
, /**< The home atoms */
169 Zones
, /**< All zones in the eighth shell */
170 Vsites
, /**< Atoms communicated for virtual sites */
171 Constraints
, /**< Atoms communicated for constraints */
172 Number
/**< Not a count, only present for convenience */
175 /*! \brief Returns the start atom index for range \p rangeType */
176 int start(Type rangeType
) const
178 if (rangeType
== Type::Home
)
184 return end_
[static_cast<int>(rangeType
) - 1];
188 /*! \brief Returns the end atom index for range \p rangeType */
189 int end(Type rangeType
) const
191 return end_
[static_cast<int>(rangeType
)];
194 /*! \brief Returns the number of home atoms */
195 int numHomeAtoms() const
197 return end_
[static_cast<int>(Type::Home
)];
200 /*! \brief Returns the total number of atoms */
201 int numAtomsTotal() const
203 return end_
[static_cast<int>(Type::Number
) - 1];
206 /*! \brief Sets the end index of range \p rangeType to \p end
208 * This should be called either with Type::Home or with a type
209 * that is larger than that passed in the previous call to setEnd.
210 * A release assertion for these conditions are present.
212 void setEnd(Type rangeType
,
215 GMX_RELEASE_ASSERT(rangeType
== Type::Home
|| rangeType
> lastTypeSet_
, "Can only set either home or a larger type than the last one");
217 for (int i
= static_cast<int>(rangeType
); i
< static_cast<int>(Type::Number
); i
++)
222 lastTypeSet_
= rangeType
;
226 /*! \brief The list of end atom indices */
227 std::array
<int, static_cast<int>(Type::Number
)> end_
;
228 Type lastTypeSet_
= Type::Number
;
231 /*! \brief Enum of dynamic load balancing states
233 * Allowed DLB states and transitions
234 * - intialization at startup:
235 * -> offUser ("-dlb no")
236 * -> onUser ("-dlb yes")
237 * -> offCanTurnOn ("-dlb auto")
239 * - in automatic mode (i.e. initial state offCanTurnOn):
240 * offCanTurnOn -> onCanTurnOff
241 * offCanTurnOn -> offForever
242 * offCanTurnOn -> offTemporarilyLocked
243 * offTemporarilyLocked -> offCanTurnOn
244 * onCanTurnOff -> offCanTurnOn
248 offUser
, /**< DLB is permanently off per user request */
249 offForever
, /**< DLB is off due to a runtime condition (not supported or causes performance loss) and will never be turned on */
250 offCanTurnOn
, /**< DLB is off and will turn on on imbalance */
251 offTemporarilyLocked
, /**< DLB is off and temporarily can't turn on */
252 onCanTurnOff
, /**< DLB is on and can turn off when slow */
253 onUser
, /**< DLB is permanently on per user request */
254 nr
/**< The number of DLB states */
257 /*! \brief The PME domain decomposition for one dimension */
260 int dim
; /**< The dimension */
261 gmx_bool dim_match
; /**< Tells if DD and PME dims match */
262 int nslab
; /**< The number of PME ranks/domains in this dimension */
263 real
*slb_dim_f
; /**< Cell sizes for determining the PME comm. with SLB */
264 int *pp_min
; /**< The minimum pp node location, size nslab */
265 int *pp_max
; /**< The maximum pp node location, size nslab */
266 int maxshift
; /**< The maximum shift for coordinate redistribution in PME */
271 real min0
; /* The minimum bottom of this zone */
272 real max1
; /* The maximum top of this zone */
273 real min1
; /* The minimum top of this zone */
274 real mch0
; /* The maximum bottom communicaton height for this zone */
275 real mch1
; /* The maximum top communicaton height for this zone */
276 real p1_0
; /* The bottom value of the first cell in this zone */
277 real p1_1
; /* The top value of the first cell in this zone */
278 real dataSet
; /* Bool disguised as a real, 1 when the above data has been set. 0 otherwise */
281 /*! \brief The number of reals in gmx_ddzone_t */
282 constexpr int c_ddzoneNumReals
= 8;
284 template<typename T
> class DDBufferAccess
;
286 /*! \brief Temporary storage container that minimizes (re)allocation and clearing
288 * This is only the storage, actual access happens through DDBufferAccess.
289 * All methods check if the buffer is (not) in use.
295 /*! \brief Returns a buffer of size \p numElements, the elements are undefined */
296 gmx::ArrayRef
<T
> resize(size_t numElements
)
298 GMX_ASSERT(isInUse_
, "Should only operate on acquired buffers");
300 if (numElements
> buffer_
.size())
302 buffer_
.resize(numElements
);
305 return gmx::arrayRefFromArray(buffer_
.data(), numElements
);
308 /*! \brief Acquire the buffer for use with size set to \p numElements, the elements are undefined */
309 gmx::ArrayRef
<T
> acquire(size_t numElements
)
311 GMX_RELEASE_ASSERT(!isInUse_
, "Should only request free buffers");
314 return resize(numElements
);
317 /*! \brief Releases the buffer, buffer_ should not be used after this */
320 GMX_RELEASE_ASSERT(isInUse_
, "Should only release buffers in use");
324 std::vector
<T
> buffer_
; /**< The actual memory buffer */
325 bool isInUse_
= false; /**< Flag that tells whether the buffer is in use */
327 friend class DDBufferAccess
<T
>;
330 /*! \brief Class that manages access to a temporary memory buffer */
335 /*! \brief Constructor, returns a buffer of size \p numElements, element values are undefined
337 * \note The actual memory buffer \p ddBuffer can not be used to
338 * create other DDBufferAccess objects until the one created
341 DDBufferAccess(DDBuffer
<T
> &ddBuffer
,
342 size_t numElements
) :
345 buffer
= ddBuffer_
.acquire(numElements
);
353 /*! \brief Resizes the buffer to \p numElements, new elements are undefined
355 * \note The buffer arrayref is updated after this call.
357 void resize(size_t numElements
)
359 buffer
= ddBuffer_
.resize(numElements
);
363 DDBuffer
<T
> &ddBuffer_
; /**< Reference to the storage class */
365 gmx::ArrayRef
<T
> buffer
; /**< The access to the memory buffer */
368 /*! brief Temporary buffer for setting up communiation over one pulse and all zones in the halo */
369 struct dd_comm_setup_work_t
371 std::vector
<int> localAtomGroupBuffer
; /**< The local atom group indices to send */
372 std::vector
<int> atomGroupBuffer
; /**< Buffer for collecting the global atom group indices to send */
373 std::vector
<gmx::RVec
> positionBuffer
; /**< Buffer for collecting the atom group positions to send */
374 int nat
; /**< The number of atoms contained in the atom groups to send */
375 int nsend_zone
; /**< The number of atom groups to send for the last zone */
378 /*! \brief Struct for domain decomposition communication
380 * This struct contains most information about domain decomposition
381 * communication setup, some communication buffers, some statistics
382 * and also the setup for the communication between particle-particle
383 * and PME only ranks.
385 * All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
386 * unless stated otherwise.
388 struct gmx_domdec_comm_t
// NOLINT (clang-analyzer-optin.performance.Padding)
390 /* PME and Cartesian communicator stuff */
391 int npmedecompdim
; /**< The number of decomposition dimensions for PME, 0: no PME */
392 int npmenodes
; /**< The number of ranks doing PME (PP/PME or only PME) */
393 int npmenodes_x
; /**< The number of PME ranks/domains along x */
394 int npmenodes_y
; /**< The number of PME ranks/domains along y */
395 gmx_bool bCartesianPP_PME
; /**< Use Cartesian communication between PP and PME ranks */
396 ivec ntot
; /**< Cartesian grid for combinted PP+PME ranks */
397 int cartpmedim
; /**< The number of dimensions for the PME setup that are Cartesian */
398 int *pmenodes
; /**< The PME ranks, size npmenodes */
399 int *ddindex2simnodeid
; /**< The Cartesian index to sim rank conversion, used with bCartesianPP_PME */
400 gmx_ddpme_t ddpme
[2]; /**< The 1D or 2D PME domain decomposition setup */
402 /* The DD particle-particle nodes only */
403 gmx_bool bCartesianPP
; /**< Use a Cartesian communicator for PP */
404 int *ddindex2ddnodeid
; /**< The Cartesian index to DD rank conversion, used with bCartesianPP */
406 /* The DLB state, used for reloading old states, during e.g. EM */
407 t_block cgs_gl
; /**< The global charge groups, this defined the DD state (except for the DLB state) */
409 /* Charge group / atom sorting */
410 std::unique_ptr
<gmx_domdec_sort_t
> sort
; /**< Data structure for cg/atom sorting */
412 //! True when update groups are used
413 bool useUpdateGroups
;
414 //! Update atom grouping for each molecule type
415 std::vector
<gmx::RangePartitioning
> updateGroupingPerMoleculetype
;
416 //! Centers of mass of local update groups
417 std::unique_ptr
<gmx::UpdateGroupsCog
> updateGroupsCog
;
419 /* Are there charge groups? */
420 gmx_bool bCGs
; /**< True when there are charge groups */
422 gmx_bool bInterCGBondeds
; /**< Are there inter-cg bonded interactions? */
423 gmx_bool bInterCGMultiBody
; /**< Are there inter-cg multi-body interactions? */
425 /* Data for the optional bonded interaction atom communication range */
426 gmx_bool bBondComm
; /**< Only communicate atoms beyond the non-bonded cut-off when they are involved in bonded interactions with non-local atoms */
427 t_blocka
*cglink
; /**< Links between cg's through bonded interactions */
428 char *bLocalCG
; /**< Local cg availability, TODO: remove when group scheme is removed */
430 /* The DLB state, possible values are defined above */
432 /* With dlbState=DlbState::offCanTurnOn, should we check if to DLB on at the next DD? */
433 gmx_bool bCheckWhetherToTurnDlbOn
;
434 /* The first DD count since we are running without DLB */
435 int ddPartioningCountFirstDlbOff
= 0;
437 /* Cell sizes for static load balancing, first index cartesian */
440 /* The width of the communicated boundaries */
441 real cutoff_mbody
; /**< Cut-off for multi-body interactions, also 2-body bonded when \p cutoff_mody > \p cutoff */
442 real cutoff
; /**< Cut-off for non-bonded/2-body interactions */
443 rvec cellsize_min
; /**< The minimum guaranteed cell-size, Cartesian indexing */
444 rvec cellsize_min_dlb
; /**< The minimum guaranteed cell-size with dlb=auto */
445 real cellsize_limit
; /**< The lower limit for the DD cell size with DLB */
446 gmx_bool bVacDLBNoLimit
; /**< Effectively no NB cut-off limit with DLB for systems without PBC? */
448 /** With PME load balancing we set limits on DLB */
449 gmx_bool bPMELoadBalDLBLimits
;
450 /** DLB needs to take into account that we want to allow this maximum
451 * cut-off (for PME load balancing), this could limit cell boundaries.
453 real PMELoadBal_max_cutoff
;
455 ivec tric_dir
; /**< tric_dir from \p gmx_ddbox_t is only stored here because dd_get_ns_ranges needs it */
456 rvec box0
; /**< box lower corner, required with dim's without pbc when avoiding communication */
457 rvec box_size
; /**< box size, required with dim's without pbc when avoiding communication */
459 rvec cell_x0
; /**< The DD cell lower corner, in triclinic space */
460 rvec cell_x1
; /**< The DD cell upper corner, in triclinic space */
462 rvec old_cell_x0
; /**< The old \p cell_x0, to check cg displacements */
463 rvec old_cell_x1
; /**< The old \p cell_x1, to check cg displacements */
465 /** The communication setup and charge group boundaries for the zones */
466 gmx_domdec_zones_t zones
;
468 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
469 * cell boundaries of neighboring cells for staggered grids when using
470 * dynamic load balancing.
472 gmx_ddzone_t zone_d1
[2]; /**< Zone limits for dim 1 with staggered grids */
473 gmx_ddzone_t zone_d2
[2][2]; /**< Zone limits for dim 2 with staggered grids */
475 /** The coordinate/force communication setup and indices */
476 gmx_domdec_comm_dim_t cd
[DIM
];
477 /** The maximum number of cells to communicate with in one dimension */
480 /** Which cg distribution is stored on the master node,
481 * stored as DD partitioning call count.
483 int64_t master_cg_ddp_count
;
485 /** The number of cg's received from the direct neighbors */
486 int zone_ncg1
[DD_MAXZONE
];
488 /** The atom ranges in the local state */
489 DDAtomRanges atomRanges
;
491 /** Array for signalling if atoms have moved to another domain */
492 std::vector
<int> movedBuffer
;
494 /** Communication int buffer for general use */
495 DDBuffer
<int> intBuffer
;
497 /** Communication rvec buffer for general use */
498 DDBuffer
<gmx::RVec
> rvecBuffer
;
500 /* Temporary storage for thread parallel communication setup */
501 std::vector
<dd_comm_setup_work_t
> dth
; /**< Thread-local work data */
503 /* Communication buffer only used with multiple grid pulses */
504 DDBuffer
<gmx::RVec
> rvecBuffer2
; /**< Another rvec comm. buffer */
506 /* Communication buffers for local redistribution */
507 std::array
<std::vector
<int>, DIM
*2> cggl_flag
; /**< Charge group flag comm. buffers */
508 std::array
<std::vector
<gmx::RVec
>, DIM
*2> cgcm_state
; /**< Charge group center comm. buffers */
510 /* Cell sizes for dynamic load balancing */
511 std::vector
<DDCellsizesWithDlb
> cellsizesWithDlb
;
513 /* Stuff for load communication */
514 gmx_bool bRecordLoad
; /**< Should we record the load */
515 domdec_load_t
*load
; /**< The recorded load data */
516 int nrank_gpu_shared
; /**< The number of MPI ranks sharing the GPU our rank is using */
518 MPI_Comm
*mpi_comm_load
; /**< The MPI load communicator */
519 MPI_Comm mpi_comm_gpu_shared
; /**< The MPI load communicator for ranks sharing a GPU */
522 /* Information for managing the dynamic load balancing */
523 int dlb_scale_lim
; /**< Maximum DLB scaling per load balancing step in percent */
525 BalanceRegion
*balanceRegion
; /**< Struct for timing the force load balancing region */
527 /* Cycle counters over nstlist steps */
528 float cycl
[ddCyclNr
]; /**< Total cycles counted */
529 int cycl_n
[ddCyclNr
]; /**< The number of cycle recordings */
530 float cycl_max
[ddCyclNr
]; /**< The maximum cycle count */
531 /** Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
533 double flop
; /**< Total flops counted */
534 int flop_n
; /**< The number of flop recordings */
535 /** How many times did we have load measurements */
537 /** How many times have we collected the load measurements */
540 /* Cycle count history for DLB checks */
541 float cyclesPerStepBeforeDLB
; /**< The averaged cycles per step over the last nstlist step before turning on DLB */
542 float cyclesPerStepDlbExpAverage
; /**< The running average of the cycles per step during DLB */
543 bool haveTurnedOffDlb
; /**< Have we turned off DLB (after turning DLB on)? */
544 int64_t dlbSlowerPartitioningCount
; /**< The DD step at which we last measured that DLB off was faster than DLB on, 0 if there was no such step */
546 /* Statistics for atoms */
547 double sum_nat
[static_cast<int>(DDAtomRanges::Type::Number
)]; /**< The atoms per range, summed over the steps */
549 /* Statistics for calls and times */
550 int ndecomp
; /**< The number of partioning calls */
551 int nload
; /**< The number of load recordings */
552 double load_step
; /**< Total MD step time */
553 double load_sum
; /**< Total PP force time */
554 double load_max
; /**< Max \p load_sum over the ranks */
555 ivec load_lim
; /**< Was load balancing limited, per DD dim */
556 double load_mdf
; /**< Total time on PP done during PME overlap time */
557 double load_pme
; /**< Total time on our PME-only rank */
559 /** The last partition step */
560 int64_t partition_step
;
563 int nstDDDump
; /**< Step interval for dumping the local+non-local atoms to pdb */
564 int nstDDDumpGrid
; /**< Step interval for duming the DD grid to pdb */
565 int DD_debug
; /**< DD debug print level: 0, 1, 2 */
568 /*! \brief DD zone permutation
570 * Zone permutation from the Cartesian x-major/z-minor order to an order
571 * that leads to consecutive charge groups for neighbor searching.
572 * TODO: remove when the group scheme is removed
574 static const int zone_perm
[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
576 /*! \brief DD zone reordering to Cartesian order
578 * Index to reorder the zone such that the end up in Cartesian order
579 * with dimension index 0 major and dimension index 2 minor.
581 static const int zone_reorder_cartesian
[DD_MAXZONE
] = { 0, 1, 3, 2, 5, 4, 6, 7 };
583 /* dd_zo and dd_zp3 is set up such that i zones with non-zero
584 * components see only j zones with that component 0.
587 /*! \brief Returns the DD cut-off distance for multi-body interactions */
588 real
dd_cutoff_multibody(const gmx_domdec_t
*dd
);
590 /*! \brief Returns the DD cut-off distance for two-body interactions */
591 real
dd_cutoff_twobody(const gmx_domdec_t
*dd
);
593 /*! \brief Returns the domain index given the number of domains and the domain coordinates
595 * This order is required to minimize the coordinate communication in PME
596 * which uses decomposition in the x direction.
598 static inline int dd_index(const ivec numDomains
,
599 const ivec domainCoordinates
)
601 return ((domainCoordinates
[XX
]*numDomains
[YY
] + domainCoordinates
[YY
])*numDomains
[ZZ
]) + domainCoordinates
[ZZ
];
604 /*! Returns the size of the buffer to hold fractional cell boundaries for DD dimension index dimIndex */
605 static inline int ddCellFractionBufferSize(const gmx_domdec_t
*dd
,
608 return dd
->nc
[dd
->dim
[dimIndex
]] + 1 + dimIndex
*2 + 1 + dimIndex
;
611 /*! \brief Maximum number of ranks for using send/recv for state scattering and gathering
613 * Use separate MPI send and receive commands
614 * when #ranks <= c_maxNumRanksUseSendRecvForScatterAndGather
615 * This saves memory (and some copying for small #ranks).
616 * For high parallelization scatter and gather calls are used.
618 static constexpr int c_maxNumRanksUseSendRecvForScatterAndGather
= 4;
620 /*! \brief Make DD cells larger by this factor than the limit to avoid rounding issues */
621 static constexpr double DD_CELL_MARGIN
= 1.0001;
623 /*! \brief Factor for checking DD cell size limitation during DLB, should be in between 1 and DD_CELL_MARGIN */
624 static constexpr double DD_CELL_MARGIN2
= 1.00005;
626 /*! \brief With pressure scaling, keep cell sizes 2% above the limit to allow for some scaling */
627 static constexpr double DD_PRES_SCALE_MARGIN
= 1.02;