Update instructions in containers.rst
[gromacs.git] / src / gromacs / mdrunutility / multisim.cpp
blob70aba55b948f4cfa45462e09ab322756c5385a27
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
37 * \brief Implements the multi-simulation support routines.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_mdrunutility
42 #include "gmxpre.h"
44 #include "multisim.h"
46 #include "config.h"
48 #include "gromacs/gmxlib/network.h"
49 #include "gromacs/mdtypes/commrec.h"
50 #include "gromacs/utility/exceptions.h"
51 #include "gromacs/utility/fatalerror.h"
52 #include "gromacs/utility/futil.h"
53 #include "gromacs/utility/gmxassert.h"
54 #include "gromacs/utility/logger.h"
55 #include "gromacs/utility/smalloc.h"
57 std::unique_ptr<gmx_multisim_t> buildMultiSimulation(MPI_Comm worldComm,
58 gmx::ArrayRef<const std::string> multidirs)
60 if (multidirs.empty())
62 return nullptr;
65 if (!GMX_LIB_MPI && !multidirs.empty())
67 GMX_THROW(gmx::NotImplementedError(
68 "Multi-simulations are only supported when GROMACS has been "
69 "configured with a proper external MPI library."));
72 if (multidirs.size() == 1)
74 /* NOTE: It would be nice if this special case worked, but this requires checks/tests. */
75 GMX_THROW(gmx::NotImplementedError(
76 "To run mdrun in multi-simulation mode, more then one "
77 "actual simulation is required. The single simulation case is not supported."));
80 #if GMX_LIB_MPI
81 int numRanks;
82 MPI_Comm_size(worldComm, &numRanks);
83 if (numRanks % multidirs.size() != 0)
85 auto message = gmx::formatString(
86 "The number of ranks (%d) is not a multiple of the number of simulations (%td)",
87 numRanks, multidirs.ssize());
88 GMX_THROW(gmx::InconsistentInputError(message));
91 int numRanksPerSimulation = numRanks / multidirs.size();
92 int rankWithinWorldComm;
93 MPI_Comm_rank(worldComm, &rankWithinWorldComm);
95 if (debug)
97 fprintf(debug, "We have %td simulations, %d ranks per simulation, local simulation is %d\n",
98 multidirs.ssize(), numRanksPerSimulation, rankWithinWorldComm / numRanksPerSimulation);
101 int numSimulations = multidirs.size();
102 // Create a communicator for the master ranks of each simulation
103 std::vector<int> ranksOfMasters(numSimulations);
104 for (int i = 0; i < numSimulations; i++)
106 ranksOfMasters[i] = i * numRanksPerSimulation;
108 MPI_Group worldGroup;
109 // No need to free worldGroup later, we didn't create it.
110 MPI_Comm_group(worldComm, &worldGroup);
112 MPI_Group mastersGroup = MPI_GROUP_NULL;
113 MPI_Group_incl(worldGroup, numSimulations, ranksOfMasters.data(), &mastersGroup);
114 MPI_Comm mastersComm = MPI_COMM_NULL;
115 MPI_Comm_create(worldComm, mastersGroup, &mastersComm);
116 if (mastersGroup != MPI_GROUP_NULL)
118 MPI_Group_free(&mastersGroup);
121 int simulationIndex = rankWithinWorldComm / numRanksPerSimulation;
122 MPI_Comm simulationComm = MPI_COMM_NULL;
123 MPI_Comm_split(worldComm, simulationIndex, rankWithinWorldComm, &simulationComm);
127 gmx_chdir(multidirs[simulationIndex].c_str());
129 catch (gmx::GromacsException& e)
131 e.prependContext("While changing directory for multi-simulation to " + multidirs[simulationIndex]);
132 throw;
134 return std::make_unique<gmx_multisim_t>(numSimulations, simulationIndex, mastersComm, simulationComm);
135 #else
136 GMX_UNUSED_VALUE(worldComm);
137 return nullptr;
138 #endif
141 gmx_multisim_t::gmx_multisim_t(int numSimulations, int simulationIndex, MPI_Comm mastersComm, MPI_Comm simulationComm) :
142 numSimulations_(numSimulations),
143 simulationIndex_(simulationIndex),
144 mastersComm_(mastersComm),
145 simulationComm_(simulationComm)
149 gmx_multisim_t::~gmx_multisim_t()
151 #if GMX_LIB_MPI
152 // TODO This would work better if the result of MPI_Comm_split was
153 // put into an RAII-style guard, such as gmx::unique_cptr.
154 if (mastersComm_ != MPI_COMM_NULL && mastersComm_ != MPI_COMM_WORLD)
156 MPI_Comm_free(&mastersComm_);
158 if (simulationComm_ != MPI_COMM_NULL && simulationComm_ != MPI_COMM_WORLD)
160 MPI_Comm_free(&simulationComm_);
162 #endif
165 #if GMX_MPI
166 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
168 # if MPI_IN_PLACE_EXISTS
169 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
170 # else
171 /* this function is only used in code that is not performance critical,
172 (during setup, when comm_rec is not the appropriate communication
173 structure), so this isn't as bad as it looks. */
174 double* buf;
175 int i;
177 snew(buf, nr);
178 MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
179 for (i = 0; i < nr; i++)
181 r[i] = buf[i];
183 sfree(buf);
184 # endif
186 #endif
188 #if GMX_MPI
189 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
191 # if MPI_IN_PLACE_EXISTS
192 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
193 # else
194 /* this function is only used in code that is not performance critical,
195 (during setup, when comm_rec is not the appropriate communication
196 structure), so this isn't as bad as it looks. */
197 float* buf;
198 int i;
200 snew(buf, nr);
201 MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
202 for (i = 0; i < nr; i++)
204 r[i] = buf[i];
206 sfree(buf);
207 # endif
209 #endif
211 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
213 #if !GMX_MPI
214 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumd_sim");
215 #else
216 gmx_sumd_comm(nr, r, ms->mastersComm_);
217 #endif
220 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
222 #if !GMX_MPI
223 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumf_sim");
224 #else
225 gmx_sumf_comm(nr, r, ms->mastersComm_);
226 #endif
229 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
231 #if !GMX_MPI
232 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumi_sim");
233 #else
234 # if MPI_IN_PLACE_EXISTS
235 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mastersComm_);
236 # else
237 /* this is thread-unsafe, but it will do for now: */
238 ms->intBuffer.resize(nr);
239 MPI_Allreduce(r, ms->intBuffer.data(), ms->intBuffer.size(), MPI_INT, MPI_SUM, ms->mastersComm_);
240 std::copy(std::begin(ms->intBuffer), std::end(ms->intBuffer), r);
241 # endif
242 #endif
245 void gmx_sumli_sim(int gmx_unused nr, int64_t gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
247 #if !GMX_MPI
248 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumli_sim");
249 #else
250 # if MPI_IN_PLACE_EXISTS
251 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, ms->mastersComm_);
252 # else
253 /* this is thread-unsafe, but it will do for now: */
254 ms->int64Buffer.resize(nr);
255 MPI_Allreduce(r, ms->int64Buffer.data(), ms->int64Buffer.size(), MPI_INT64_T, MPI_SUM, ms->mastersComm_);
256 std::copy(std::begin(ms->int64Buffer), std::end(ms->int64Buffer), r);
257 # endif
258 #endif
261 std::vector<int> gatherIntFromMultiSimulation(const gmx_multisim_t* ms, const int localValue)
263 std::vector<int> valuesFromAllRanks;
264 #if GMX_MPI
265 if (ms != nullptr)
267 valuesFromAllRanks.resize(ms->numSimulations_);
268 valuesFromAllRanks[ms->simulationIndex_] = localValue;
269 gmx_sumi_sim(ms->numSimulations_, valuesFromAllRanks.data(), ms);
271 else
273 valuesFromAllRanks.emplace_back(localValue);
275 #else
276 GMX_UNUSED_VALUE(ms);
277 valuesFromAllRanks.emplace_back(localValue);
278 #endif
279 return valuesFromAllRanks;
282 void check_multi_int(FILE* log, const gmx_multisim_t* ms, int val, const char* name, gmx_bool bQuiet)
284 int * ibuf, p;
285 gmx_bool bCompatible;
287 if (nullptr != log && !bQuiet)
289 fprintf(log, "Multi-checking %s ... ", name);
292 if (ms == nullptr)
294 gmx_fatal(FARGS, "check_multi_int called with a NULL communication pointer");
297 snew(ibuf, ms->numSimulations_);
298 ibuf[ms->simulationIndex_] = val;
299 gmx_sumi_sim(ms->numSimulations_, ibuf, ms);
301 bCompatible = TRUE;
302 for (p = 1; p < ms->numSimulations_; p++)
304 bCompatible = bCompatible && (ibuf[p - 1] == ibuf[p]);
307 if (bCompatible)
309 if (nullptr != log && !bQuiet)
311 fprintf(log, "OK\n");
314 else
316 if (nullptr != log)
318 fprintf(log, "\n%s is not equal for all subsystems\n", name);
319 for (p = 0; p < ms->numSimulations_; p++)
321 fprintf(log, " subsystem %d: %d\n", p, ibuf[p]);
324 gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->numSimulations_);
327 sfree(ibuf);
330 void check_multi_int64(FILE* log, const gmx_multisim_t* ms, int64_t val, const char* name, gmx_bool bQuiet)
332 int64_t* ibuf;
333 int p;
334 gmx_bool bCompatible;
336 if (nullptr != log && !bQuiet)
338 fprintf(log, "Multi-checking %s ... ", name);
341 if (ms == nullptr)
343 gmx_fatal(FARGS, "check_multi_int called with a NULL communication pointer");
346 snew(ibuf, ms->numSimulations_);
347 ibuf[ms->simulationIndex_] = val;
348 gmx_sumli_sim(ms->numSimulations_, ibuf, ms);
350 bCompatible = TRUE;
351 for (p = 1; p < ms->numSimulations_; p++)
353 bCompatible = bCompatible && (ibuf[p - 1] == ibuf[p]);
356 if (bCompatible)
358 if (nullptr != log && !bQuiet)
360 fprintf(log, "OK\n");
363 else
365 // TODO Part of this error message would also be good to go to
366 // stderr (from one rank of one sim only)
367 if (nullptr != log)
369 fprintf(log, "\n%s is not equal for all subsystems\n", name);
370 for (p = 0; p < ms->numSimulations_; p++)
372 char strbuf[255];
373 /* first make the format string */
374 snprintf(strbuf, 255, " subsystem %%d: %s\n", "%" PRId64);
375 fprintf(log, strbuf, p, ibuf[p]);
378 gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->numSimulations_);
381 sfree(ibuf);
384 bool findIsSimulationMasterRank(const gmx_multisim_t* ms, MPI_Comm communicator)
386 if (GMX_LIB_MPI)
388 // Ranks of multi-simulations know whether they are a master
389 // rank. Ranks of non-multi simulation do not know until a
390 // t_commrec is available.
391 if ((ms != nullptr) && (ms->numSimulations_ > 1))
393 return ms->mastersComm_ != MPI_COMM_NULL;
395 else
397 int rank = 0;
398 #if GMX_LIB_MPI
399 MPI_Comm_rank(communicator, &rank);
400 #endif
401 return (rank == 0);
404 else if (GMX_THREAD_MPI)
406 GMX_RELEASE_ASSERT(communicator == MPI_COMM_NULL || communicator == MPI_COMM_WORLD,
407 "Invalid communicator");
408 // Spawned threads have MPI_COMM_WORLD upon creation, so if
409 // the communicator is MPI_COMM_NULL this is not a spawned thread,
410 // ie is the master thread
411 return (communicator == MPI_COMM_NULL);
413 else
415 // No MPI means it must be the master (and only) rank.
416 return true;
420 bool isMasterSim(const gmx_multisim_t* ms)
422 return !isMultiSim(ms) || ms->simulationIndex_ == 0;
425 bool isMasterSimMasterRank(const gmx_multisim_t* ms, const bool isMaster)
427 return (isMaster && isMasterSim(ms));
430 static bool multisim_int_all_are_equal(const gmx_multisim_t* ms, int64_t value)
432 bool allValuesAreEqual = true;
433 int64_t* buf;
435 GMX_RELEASE_ASSERT(ms, "Invalid use of multi-simulation pointer");
437 snew(buf, ms->numSimulations_);
438 /* send our value to all other master ranks, receive all of theirs */
439 buf[ms->simulationIndex_] = value;
440 gmx_sumli_sim(ms->numSimulations_, buf, ms);
442 for (int s = 0; s < ms->numSimulations_; s++)
444 if (buf[s] != value)
446 allValuesAreEqual = false;
447 break;
451 sfree(buf);
453 return allValuesAreEqual;
456 void logInitialMultisimStatus(const gmx_multisim_t* ms,
457 const t_commrec* cr,
458 const gmx::MDLogger& mdlog,
459 const bool simulationsShareState,
460 const int numSteps,
461 const int initialStep)
463 if (!multisim_int_all_are_equal(ms, numSteps))
465 GMX_LOG(mdlog.warning)
466 .appendText(
467 "Note: The number of steps is not consistent across multi "
468 "simulations,\n"
469 "but we are proceeding anyway!");
471 if (!multisim_int_all_are_equal(ms, initialStep))
473 if (simulationsShareState)
475 if (MASTER(cr))
477 gmx_fatal(FARGS,
478 "The initial step is not consistent across multi simulations which "
479 "share the state");
481 gmx_barrier(cr->mpi_comm_mygroup);
483 else
485 GMX_LOG(mdlog.warning)
486 .appendText(
487 "Note: The initial step is not consistent across multi "
488 "simulations,\n"
489 "but we are proceeding anyway!");