2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2008,2009,2010,2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines functions used by the domdec module
39 * in its initial setup phase.
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_domdec
52 #include "gromacs/domdec/domdec.h"
53 #include "gromacs/domdec/domdec_struct.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/gmxlib/network.h"
56 #include "gromacs/math/utilities.h"
57 #include "gromacs/math/vec.h"
58 #include "gromacs/mdlib/perf_est.h"
59 #include "gromacs/mdtypes/commrec.h"
60 #include "gromacs/mdtypes/inputrec.h"
61 #include "gromacs/mdtypes/md_enums.h"
62 #include "gromacs/pbcutil/pbc.h"
63 #include "gromacs/topology/topology.h"
64 #include "gromacs/utility/fatalerror.h"
65 #include "gromacs/utility/logger.h"
66 #include "gromacs/utility/stringutil.h"
68 /*! \brief Margin for setting up the DD grid */
69 #define DD_GRID_MARGIN_PRES_SCALE 1.05
71 /*! \brief Factorize \p n.
73 * \param[in] n Value to factorize
74 * \param[out] fac Vector of factors (to be allocated in this function)
75 * \param[out] mfac Vector with the number of times each factor repeats in the factorization (to be allocated in this function)
77 static void factorize(int n
,
78 std::vector
<int> *fac
,
79 std::vector
<int> *mfac
)
83 gmx_fatal(FARGS
, "Can only factorize positive integers.");
86 /* Decompose n in factors */
94 if (fac
->empty() || fac
->back() != d
)
109 /*! \brief Find largest divisor of \p n smaller than \p n*/
110 static int largest_divisor(int n
)
112 std::vector
<int> div
;
113 std::vector
<int> mdiv
;
114 factorize(n
, &div
, &mdiv
);
119 /*! \brief Compute largest common divisor of \p n1 and \b n2 */
120 static int lcd(int n1
, int n2
)
125 for (i
= 2; (i
<= n1
&& i
<= n2
); i
++)
127 if (n1
% i
== 0 && n2
% i
== 0)
136 /*! \brief Returns TRUE when there are enough PME ranks for the ratio */
137 static gmx_bool
fits_pme_ratio(int nrank_tot
, int nrank_pme
, float ratio
)
139 return (static_cast<double>(nrank_pme
)/static_cast<double>(nrank_tot
) > 0.95*ratio
);
142 /*! \brief Returns TRUE when npme out of ntot ranks doing PME is expected to give reasonable performance */
143 static gmx_bool
fits_pp_pme_perf(int ntot
, int npme
, float ratio
)
145 std::vector
<int> div
;
146 std::vector
<int> mdiv
;
147 factorize(ntot
- npme
, &div
, &mdiv
);
149 int npp_root3
= gmx::roundToInt(std::cbrt(ntot
- npme
));
150 int npme_root2
= gmx::roundToInt(std::sqrt(static_cast<double>(npme
)));
152 /* The check below gives a reasonable division:
153 * factor 5 allowed at 5 or more PP ranks,
154 * factor 7 allowed at 49 or more PP ranks.
156 if (div
.back() > 3 + npp_root3
)
161 /* Check if the number of PP and PME ranks have a reasonable sized
162 * denominator in common, such that we can use 2D PME decomposition
163 * when required (which requires nx_pp == nx_pme).
164 * The factor of 2 allows for a maximum ratio of 2^2=4
165 * between nx_pme and ny_pme.
167 if (lcd(ntot
- npme
, npme
)*2 < npme_root2
)
172 /* Does this division gives a reasonable PME load? */
173 return fits_pme_ratio(ntot
, npme
, ratio
);
176 /*! \brief Make a guess for the number of PME ranks to use. */
177 static int guess_npme(const gmx::MDLogger
&mdlog
,
178 const gmx_mtop_t
*mtop
, const t_inputrec
*ir
,
185 ratio
= pme_load_estimate(mtop
, ir
, box
);
187 GMX_LOG(mdlog
.info
).appendTextFormatted(
188 "Guess for relative PME load: %.2f", ratio
);
190 /* We assume the optimal rank ratio is close to the load ratio.
191 * The communication load is neglected,
192 * but (hopefully) this will balance out between PP and PME.
195 if (!fits_pme_ratio(nrank_tot
, nrank_tot
/2, ratio
))
197 /* We would need more than nrank_tot/2 PME only nodes,
198 * which is not possible. Since the PME load is very high,
199 * we will not loose much performance when all ranks do PME.
205 /* First try to find npme as a factor of nrank_tot up to nrank_tot/3.
206 * We start with a minimum PME node fraction of 1/16
207 * and avoid ratios which lead to large prime factors in nnodes-npme.
209 npme
= (nrank_tot
+ 15)/16;
210 while (npme
<= nrank_tot
/3)
212 if (nrank_tot
% npme
== 0)
214 /* Note that fits_perf might change the PME grid,
215 * in the current implementation it does not.
217 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
224 if (npme
> nrank_tot
/3)
226 /* Try any possible number for npme */
228 while (npme
<= nrank_tot
/2)
230 /* Note that fits_perf may change the PME grid */
231 if (fits_pp_pme_perf(nrank_tot
, npme
, ratio
))
238 if (npme
> nrank_tot
/2)
240 gmx_fatal(FARGS
, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
241 "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.",
242 ratio
, gmx::roundToInt(0.95*ratio
*nrank_tot
), nrank_tot
/2, ir
->nkx
, ir
->nky
);
246 GMX_LOG(mdlog
.info
).appendTextFormatted(
247 "Will use %d particle-particle and %d PME only ranks\n"
248 "This is a guess, check the performance at the end of the log file",
249 nrank_tot
- npme
, npme
);
255 /*! \brief Return \p n divided by \p f rounded up to the next integer. */
256 static int div_up(int n
, int f
)
258 return (n
+ f
- 1)/f
;
261 real
comm_box_frac(const ivec dd_nc
, real cutoff
, const gmx_ddbox_t
*ddbox
)
267 for (i
= 0; i
< DIM
; i
++)
269 real bt
= ddbox
->box_size
[i
]*ddbox
->skew_fac
[i
];
270 nw
[i
] = dd_nc
[i
]*cutoff
/bt
;
274 for (i
= 0; i
< DIM
; i
++)
279 for (j
= i
+1; j
< DIM
; j
++)
283 comm_vol
+= nw
[i
]*nw
[j
]*M_PI
/4;
284 for (k
= j
+1; k
< DIM
; k
++)
288 comm_vol
+= nw
[i
]*nw
[j
]*nw
[k
]*M_PI
/6;
299 /*! \brief Return whether the DD inhomogeneous in the z direction */
300 static gmx_bool
inhomogeneous_z(const t_inputrec
*ir
)
302 return ((EEL_PME(ir
->coulombtype
) || ir
->coulombtype
== eelEWALD
) &&
303 ir
->ePBC
== epbcXYZ
&& ir
->ewald_geometry
== eewg3DC
);
306 /*! \brief Estimate cost of PME FFT communication
308 * This only takes the communication into account and not imbalance
309 * in the calculation. But the imbalance in communication and calculation
310 * are similar and therefore these formulas also prefer load balance
311 * in the FFT and pme_solve calculation.
313 static float comm_pme_cost_vol(int npme
, int a
, int b
, int c
)
315 /* We use a float here, since an integer might overflow */
320 comm_vol
*= div_up(a
, npme
);
321 comm_vol
*= div_up(b
, npme
);
327 /*! \brief Estimate cost of communication for a possible domain decomposition. */
328 static float comm_cost_est(real limit
, real cutoff
,
329 const matrix box
, const gmx_ddbox_t
*ddbox
,
330 int natoms
, const t_inputrec
*ir
,
332 int npme_tot
, ivec nc
)
334 ivec npme
= {1, 1, 1};
335 int i
, j
, nk
, overlap
;
337 float comm_vol
, comm_vol_xf
, comm_pme
, cost_pbcdx
;
338 /* This is the cost of a pbc_dx call relative to the cost
339 * of communicating the coordinate and force of an atom.
340 * This will be machine dependent.
341 * These factors are for x86 with SMP or Infiniband.
343 float pbcdx_rect_fac
= 0.1;
344 float pbcdx_tric_fac
= 0.2;
347 /* Check the DD algorithm restrictions */
348 if ((ir
->ePBC
== epbcXY
&& ir
->nwall
< 2 && nc
[ZZ
] > 1) ||
349 (ir
->ePBC
== epbcSCREW
&& (nc
[XX
] == 1 || nc
[YY
] > 1 || nc
[ZZ
] > 1)))
354 if (inhomogeneous_z(ir
) && nc
[ZZ
] > 1)
359 assert(ddbox
->npbcdim
<= DIM
);
361 /* Check if the triclinic requirements are met */
362 for (i
= 0; i
< DIM
; i
++)
364 for (j
= i
+1; j
< ddbox
->npbcdim
; j
++)
366 if (box
[j
][i
] != 0 || ir
->deform
[j
][i
] != 0 ||
367 (ir
->epc
!= epcNO
&& ir
->compress
[j
][i
] != 0))
369 if (nc
[j
] > 1 && nc
[i
] == 1)
377 for (i
= 0; i
< DIM
; i
++)
379 bt
[i
] = ddbox
->box_size
[i
]*ddbox
->skew_fac
[i
];
381 /* Without PBC and with 2 cells, there are no lower limits on the cell size */
382 if (!(i
>= ddbox
->npbcdim
&& nc
[i
] <= 2) && bt
[i
] < nc
[i
]*limit
)
386 /* With PBC, check if the cut-off fits in nc[i]-1 cells */
387 if (i
< ddbox
->npbcdim
&& nc
[i
] > 1 && (nc
[i
] - 1)*bt
[i
] < nc
[i
]*cutoff
)
395 /* The following choices should match those
396 * in init_domain_decomposition in domdec.c.
398 if (nc
[XX
] == 1 && nc
[YY
] > 1)
403 else if (nc
[YY
] == 1)
410 /* Will we use 1D or 2D PME decomposition? */
411 npme
[XX
] = (npme_tot
% nc
[XX
] == 0) ? nc
[XX
] : npme_tot
;
412 npme
[YY
] = npme_tot
/npme
[XX
];
416 if (EEL_PME(ir
->coulombtype
) || EVDW_PME(ir
->vdwtype
))
418 /* Check the PME grid restrictions.
419 * Currently these can only be invalid here with too few grid lines
420 * along the x dimension per rank doing PME.
422 int npme_x
= (npme_tot
> 1 ? npme
[XX
] : nc
[XX
]);
424 /* Currently we don't have the OpenMP thread count available here.
425 * But with threads we have only tighter restrictions and it's
426 * probably better anyhow to avoid settings where we need to reduce
427 * grid lines over multiple ranks, as the thread check will do.
429 bool useThreads
= true;
430 bool errorsAreFatal
= false;
431 if (!gmx_pme_check_restrictions(ir
->pme_order
, ir
->nkx
, ir
->nky
, ir
->nkz
,
432 npme_x
, useThreads
, errorsAreFatal
))
438 /* When two dimensions are (nearly) equal, use more cells
439 * for the smallest index, so the decomposition does not
440 * depend sensitively on the rounding of the box elements.
442 for (i
= 0; i
< DIM
; i
++)
444 for (j
= i
+1; j
< DIM
; j
++)
446 /* Check if the box size is nearly identical,
447 * in that case we prefer nx > ny and ny > nz.
449 if (std::fabs(bt
[j
] - bt
[i
]) < 0.01*bt
[i
] && nc
[j
] > nc
[i
])
451 /* The XX/YY check is a bit compact. If nc[YY]==npme[YY]
452 * this means the swapped nc has nc[XX]==npme[XX],
453 * and we can also swap X and Y for PME.
455 /* Check if dimension i and j are equivalent for PME.
456 * For x/y: if nc[YY]!=npme[YY], we can not swap x/y
457 * For y/z: we can not have PME decomposition in z
460 !((i
== XX
&& j
== YY
&& nc
[YY
] != npme
[YY
]) ||
461 (i
== YY
&& j
== ZZ
&& npme
[YY
] > 1)))
469 /* This function determines only half of the communication cost.
470 * All PP, PME and PP-PME communication is symmetric
471 * and the "back"-communication cost is identical to the forward cost.
474 comm_vol
= comm_box_frac(nc
, cutoff
, ddbox
);
477 for (i
= 0; i
< 2; i
++)
479 /* Determine the largest volume for PME x/f redistribution */
480 if (nc
[i
] % npme
[i
] != 0)
484 comm_vol_xf
= (npme
[i
] == 2 ? 1.0/3.0 : 0.5);
488 comm_vol_xf
= 1.0 - lcd(nc
[i
], npme
[i
])/static_cast<double>(npme
[i
]);
490 comm_pme
+= 3*natoms
*comm_vol_xf
;
493 /* Grid overlap communication */
496 nk
= (i
== 0 ? ir
->nkx
: ir
->nky
);
497 overlap
= (nk
% npme
[i
] == 0 ? ir
->pme_order
-1 : ir
->pme_order
);
505 /* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */
509 comm_pme
+= comm_pme_cost_vol(npme
[YY
], ir
->nky
, ir
->nkz
, ir
->nkx
);
510 comm_pme
+= comm_pme_cost_vol(npme
[XX
], ir
->nkx
, ir
->nky
, ir
->nkz
);
512 /* Add cost of pbc_dx for bondeds */
514 if ((nc
[XX
] == 1 || nc
[YY
] == 1) || (nc
[ZZ
] == 1 && ir
->ePBC
!= epbcXY
))
516 if ((ddbox
->tric_dir
[XX
] && nc
[XX
] == 1) ||
517 (ddbox
->tric_dir
[YY
] && nc
[YY
] == 1))
519 cost_pbcdx
= pbcdxr
*pbcdx_tric_fac
;
523 cost_pbcdx
= pbcdxr
*pbcdx_rect_fac
;
530 "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n",
531 nc
[XX
], nc
[YY
], nc
[ZZ
], npme
[XX
], npme
[YY
],
532 comm_vol
, cost_pbcdx
, comm_pme
/(3*natoms
),
533 comm_vol
+ cost_pbcdx
+ comm_pme
/(3*natoms
));
536 return 3*natoms
*(comm_vol
+ cost_pbcdx
) + comm_pme
;
539 /*! \brief Assign penalty factors to possible domain decompositions, based on the estimated communication costs. */
540 static void assign_factors(const gmx_domdec_t
*dd
,
541 real limit
, real cutoff
,
542 const matrix box
, const gmx_ddbox_t
*ddbox
,
543 int natoms
, const t_inputrec
*ir
,
544 float pbcdxr
, int npme
,
545 int ndiv
, const int *div
, const int *mdiv
,
546 ivec ir_try
, ivec opt
)
553 ce
= comm_cost_est(limit
, cutoff
, box
, ddbox
,
554 natoms
, ir
, pbcdxr
, npme
, ir_try
);
555 if (ce
>= 0 && (opt
[XX
] == 0 ||
556 ce
< comm_cost_est(limit
, cutoff
, box
, ddbox
,
560 copy_ivec(ir_try
, opt
);
566 for (x
= mdiv
[0]; x
>= 0; x
--)
568 for (i
= 0; i
< x
; i
++)
570 ir_try
[XX
] *= div
[0];
572 for (y
= mdiv
[0]-x
; y
>= 0; y
--)
574 for (i
= 0; i
< y
; i
++)
576 ir_try
[YY
] *= div
[0];
578 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
580 ir_try
[ZZ
] *= div
[0];
584 assign_factors(dd
, limit
, cutoff
, box
, ddbox
, natoms
, ir
, pbcdxr
, npme
,
585 ndiv
-1, div
+1, mdiv
+1, ir_try
, opt
);
587 for (i
= 0; i
< mdiv
[0]-x
-y
; i
++)
589 ir_try
[ZZ
] /= div
[0];
591 for (i
= 0; i
< y
; i
++)
593 ir_try
[YY
] /= div
[0];
596 for (i
= 0; i
< x
; i
++)
598 ir_try
[XX
] /= div
[0];
603 /*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */
604 static real
optimize_ncells(const gmx::MDLogger
&mdlog
,
605 int nnodes_tot
, int npme_only
,
606 gmx_bool bDynLoadBal
, real dlb_scale
,
607 const gmx_mtop_t
*mtop
,
608 const matrix box
, const gmx_ddbox_t
*ddbox
,
609 const t_inputrec
*ir
,
611 real cellsize_limit
, real cutoff
,
612 const bool haveInterDomainBondeds
,
615 int npp
, npme
, d
, nmax
;
620 limit
= cellsize_limit
;
626 npp
= nnodes_tot
- npme_only
;
627 if (EEL_PME(ir
->coulombtype
))
629 npme
= (npme_only
> 0 ? npme_only
: npp
);
636 if (haveInterDomainBondeds
)
638 /* If we can skip PBC for distance calculations in plain-C bondeds,
639 * we can save some time (e.g. 3D DD with pbc=xyz).
640 * Here we ignore SIMD bondeds as they always do (fast) PBC.
642 count_bonded_distances(mtop
, ir
, &pbcdxr
, nullptr);
643 pbcdxr
/= static_cast<double>(mtop
->natoms
);
647 /* Every molecule is a single charge group: no pbc required */
650 /* Add a margin for DLB and/or pressure scaling */
653 if (dlb_scale
>= 1.0)
655 gmx_fatal(FARGS
, "The value for option -dds should be smaller than 1");
657 GMX_LOG(mdlog
.info
).appendTextFormatted(
658 "Scaling the initial minimum size with 1/%g (option -dds) = %g",
659 dlb_scale
, 1/dlb_scale
);
662 else if (ir
->epc
!= epcNO
)
664 GMX_LOG(mdlog
.info
).appendTextFormatted(
665 "To account for pressure scaling, scaling the initial minimum size with %g",
666 DD_GRID_MARGIN_PRES_SCALE
);
667 limit
*= DD_GRID_MARGIN_PRES_SCALE
;
670 GMX_LOG(mdlog
.info
).appendTextFormatted(
671 "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm",
674 if (inhomogeneous_z(ir
))
676 GMX_LOG(mdlog
.info
).appendTextFormatted(
677 "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.",
678 eewg_names
[ir
->ewald_geometry
]);
683 std::string maximumCells
= "The maximum allowed number of cells is:";
684 for (d
= 0; d
< DIM
; d
++)
686 nmax
= static_cast<int>(ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/limit
);
687 if (d
>= ddbox
->npbcdim
&& nmax
< 2)
691 if (d
== ZZ
&& inhomogeneous_z(ir
))
695 maximumCells
+= gmx::formatString(" %c %d", 'X' + d
, nmax
);
697 GMX_LOG(mdlog
.info
).appendText(maximumCells
);
702 fprintf(debug
, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr
);
705 /* Decompose npp in factors */
706 std::vector
<int> div
;
707 std::vector
<int> mdiv
;
708 factorize(npp
, &div
, &mdiv
);
714 assign_factors(dd
, limit
, cutoff
, box
, ddbox
, mtop
->natoms
, ir
, pbcdxr
,
715 npme
, div
.size(), div
.data(), mdiv
.data(), itry
, nc
);
720 real
dd_choose_grid(const gmx::MDLogger
&mdlog
,
721 t_commrec
*cr
, gmx_domdec_t
*dd
,
722 const t_inputrec
*ir
,
723 const gmx_mtop_t
*mtop
,
724 const matrix box
, const gmx_ddbox_t
*ddbox
,
726 gmx_bool bDynLoadBal
, real dlb_scale
,
727 real cellsize_limit
, real cutoff_dd
,
728 const bool haveInterDomainBondeds
)
730 int64_t nnodes_div
, ldiv
;
735 nnodes_div
= cr
->nnodes
;
736 if (EEL_PME(ir
->coulombtype
))
740 if (nPmeRanks
>= cr
->nnodes
)
743 "Cannot have %d separate PME ranks with just %d total ranks",
744 nPmeRanks
, cr
->nnodes
);
747 /* If the user purposely selected the number of PME nodes,
748 * only check for large primes in the PP node count.
750 nnodes_div
-= nPmeRanks
;
760 ldiv
= largest_divisor(nnodes_div
);
761 /* Check if the largest divisor is more than nnodes^2/3 */
762 if (ldiv
*ldiv
*ldiv
> nnodes_div
*nnodes_div
)
764 gmx_fatal(FARGS
, "The number of ranks you selected (%" PRId64
") contains a large prime factor %" PRId64
". In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
769 if (EEL_PME(ir
->coulombtype
))
773 /* Use PME nodes when the number of nodes is more than 16 */
774 if (cr
->nnodes
<= 18)
777 GMX_LOG(mdlog
.info
).appendTextFormatted(
778 "Using %d separate PME ranks, as there are too few total\n"
779 " ranks for efficient splitting",
784 cr
->npmenodes
= guess_npme(mdlog
, mtop
, ir
, box
, cr
->nnodes
);
785 GMX_LOG(mdlog
.info
).appendTextFormatted(
786 "Using %d separate PME ranks, as guessed by mdrun", cr
->npmenodes
);
791 /* We checked above that nPmeRanks is a valid number */
792 cr
->npmenodes
= nPmeRanks
;
793 GMX_LOG(mdlog
.info
).appendTextFormatted(
794 "Using %d separate PME ranks", cr
->npmenodes
);
795 // TODO: there was a ", per user request" note here, but it's not correct anymore,
796 // as with GPUs decision about nPmeRanks can be made in runner() as well.
797 // Consider a single spot for setting nPmeRanks.
801 limit
= optimize_ncells(mdlog
, cr
->nnodes
, cr
->npmenodes
,
802 bDynLoadBal
, dlb_scale
,
803 mtop
, box
, ddbox
, ir
, dd
,
804 cellsize_limit
, cutoff_dd
,
805 haveInterDomainBondeds
,
812 /* Communicate the information set by the master to all nodes */
813 gmx_bcast(sizeof(dd
->nc
), dd
->nc
, cr
);
814 if (EEL_PME(ir
->coulombtype
))
816 gmx_bcast(sizeof(cr
->npmenodes
), &cr
->npmenodes
, cr
);