2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "dlbtiming.h"
42 #include "gromacs/domdec/domdec.h"
43 #include "gromacs/gmxlib/nrnb.h"
44 #include "gromacs/utility/gmxassert.h"
46 #include "domdec_internal.h"
48 /*! \brief Struct for timing the region for dynamic load balancing */
51 /*! \brief Constructor */
61 bool isOpen
; /**< Are we in an open balancing region? */
62 bool isOpenOnCpu
; /**< Is the, currently open, region still open on the CPU side? */
63 bool isOpenOnGpu
; /**< Is the, currently open, region open on the GPU side? */
64 gmx_cycles_t cyclesOpenCpu
; /**< Cycle count when opening the CPU region */
65 gmx_cycles_t cyclesLastCpu
; /**< Cycle count at the last call to \p ddCloseBalanceRegionCpu() */
68 BalanceRegion
* ddBalanceRegionAllocate()
70 return new BalanceRegion
;
73 /*! \brief Returns the pointer to the balance region.
75 * This should be replaced by a properly managed BalanceRegion class,
76 * but that requires a lot of refactoring in domdec.cpp.
78 static BalanceRegion
* getBalanceRegion(const gmx_domdec_t
* dd
)
80 GMX_ASSERT(dd
!= nullptr && dd
->comm
!= nullptr, "Balance regions should only be used with DD");
81 BalanceRegion
* region
= dd
->comm
->balanceRegion
;
82 GMX_ASSERT(region
!= nullptr, "Balance region should be initialized before use");
86 void DDBalanceRegionHandler::openRegionCpuImpl(DdAllowBalanceRegionReopen gmx_unused allowReopen
) const
88 BalanceRegion
* reg
= getBalanceRegion(dd_
);
89 if (dd_
->comm
->ddSettings
.recordLoad
)
91 GMX_ASSERT(allowReopen
== DdAllowBalanceRegionReopen::yes
|| !reg
->isOpen
,
92 "Should not open an already opened region");
94 reg
->cyclesOpenCpu
= gmx_cycles_read();
96 reg
->isOpenOnCpu
= true;
97 reg
->isOpenOnGpu
= false;
101 void DDBalanceRegionHandler::openRegionGpuImpl() const
103 BalanceRegion
* reg
= getBalanceRegion(dd_
);
104 GMX_ASSERT(reg
->isOpen
, "Can only open a GPU region inside an open CPU region");
105 GMX_ASSERT(!reg
->isOpenOnGpu
, "Can not re-open a GPU balance region");
106 reg
->isOpenOnGpu
= true;
109 void ddReopenBalanceRegionCpu(const gmx_domdec_t
* dd
)
111 BalanceRegion
* reg
= getBalanceRegion(dd
);
112 /* If the GPU is busy, don't reopen as we are overlapping with work */
113 if (reg
->isOpen
&& !reg
->isOpenOnGpu
)
115 reg
->cyclesOpenCpu
= gmx_cycles_read();
119 void DDBalanceRegionHandler::closeRegionCpuImpl() const
121 BalanceRegion
* reg
= getBalanceRegion(dd_
);
122 if (reg
->isOpen
&& reg
->isOpenOnCpu
)
124 GMX_ASSERT(reg
->isOpenOnCpu
, "Can only close an open region");
125 gmx_cycles_t cycles
= gmx_cycles_read();
126 reg
->isOpenOnCpu
= false;
128 if (reg
->isOpenOnGpu
)
130 /* Store the cycles for estimating the GPU/CPU overlap time */
131 reg
->cyclesLastCpu
= cycles
;
135 /* We can close the region */
136 float cyclesCpu
= cycles
- reg
->cyclesOpenCpu
;
137 dd_cycles_add(dd_
, cyclesCpu
, ddCyclF
);
143 void DDBalanceRegionHandler::closeRegionGpuImpl(float waitGpuCyclesInCpuRegion
,
144 DdBalanceRegionWaitedForGpu waitedForGpu
) const
146 BalanceRegion
* reg
= getBalanceRegion(dd_
);
149 GMX_ASSERT(reg
->isOpenOnGpu
, "Can not close a non-open GPU balance region");
150 GMX_ASSERT(!reg
->isOpenOnCpu
,
151 "The GPU region should be closed after closing the CPU region");
153 float waitGpuCyclesEstimate
= gmx_cycles_read() - reg
->cyclesLastCpu
;
154 if (waitedForGpu
== DdBalanceRegionWaitedForGpu::no
)
156 /* The actual time could be anywhere between 0 and
157 * waitCyclesEstimate. Using half is the best we can do.
159 const float unknownWaitEstimateFactor
= 0.5F
;
160 waitGpuCyclesEstimate
*= unknownWaitEstimateFactor
;
163 float cyclesCpu
= reg
->cyclesLastCpu
- reg
->cyclesOpenCpu
;
164 dd_cycles_add(dd_
, cyclesCpu
+ waitGpuCyclesEstimate
, ddCyclF
);
166 /* Register the total GPU wait time, to redistribute with GPU sharing */
167 dd_cycles_add(dd_
, waitGpuCyclesInCpuRegion
+ waitGpuCyclesEstimate
, ddCyclWaitGPU
);
169 /* Close the region */
170 reg
->isOpenOnGpu
= false;
175 //! Accumulates flop counts for force calculations.
176 static double force_flop_count(const t_nrnb
* nrnb
)
183 for (i
= 0; i
< eNR_NBKERNEL_FREE_ENERGY
; i
++)
185 /* To get closer to the real timings, we half the count
186 * for the normal loops and again half it for water loops.
189 if (strstr(name
, "W3") != nullptr || strstr(name
, "W4") != nullptr)
191 sum
+= nrnb
->n
[i
] * 0.25 * cost_nrnb(i
);
195 sum
+= nrnb
->n
[i
] * 0.50 * cost_nrnb(i
);
198 for (i
= eNR_NBKERNEL_FREE_ENERGY
; i
<= eNR_NB14
; i
++)
201 if (strstr(name
, "W3") != nullptr || strstr(name
, "W4") != nullptr)
203 sum
+= nrnb
->n
[i
] * cost_nrnb(i
);
206 for (i
= eNR_BONDS
; i
<= eNR_WALLS
; i
++)
208 sum
+= nrnb
->n
[i
] * cost_nrnb(i
);
214 void dd_force_flop_start(gmx_domdec_t
* dd
, t_nrnb
* nrnb
)
216 if (dd
->comm
->ddSettings
.eFlop
)
218 dd
->comm
->flop
-= force_flop_count(nrnb
);
222 void dd_force_flop_stop(gmx_domdec_t
* dd
, t_nrnb
* nrnb
)
224 if (dd
->comm
->ddSettings
.eFlop
)
226 dd
->comm
->flop
+= force_flop_count(nrnb
);
231 void clear_dd_cycle_counts(gmx_domdec_t
* dd
)
235 for (i
= 0; i
< ddCyclNr
; i
++)
237 dd
->comm
->cycl
[i
] = 0;
238 dd
->comm
->cycl_n
[i
] = 0;
239 dd
->comm
->cycl_max
[i
] = 0;
242 dd
->comm
->flop_n
= 0;