Remove unused function generate_excls and make clean_excls static
[gromacs.git] / src / gromacs / mdrunutility / threadaffinity.cpp
blob09434f885f797b59f6ea135b09e97450c8082d9b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 #include "threadaffinity.h"
39 #include "config.h"
41 #include <cerrno>
42 #include <cstdio>
43 #include <cstring>
45 #if HAVE_SCHED_AFFINITY
46 # include <sched.h>
47 # include <sys/syscall.h>
48 #endif
50 #include "thread_mpi/threads.h"
52 #include "gromacs/hardware/hardwaretopology.h"
53 #include "gromacs/hardware/hw_info.h"
54 #include "gromacs/mdtypes/commrec.h"
55 #include "gromacs/utility/basenetwork.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/exceptions.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/gmxomp.h"
61 #include "gromacs/utility/logger.h"
62 #include "gromacs/utility/physicalnodecommunicator.h"
63 #include "gromacs/utility/programcontext.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/unique_cptr.h"
67 namespace
70 class DefaultThreadAffinityAccess : public gmx::IThreadAffinityAccess
72 public:
73 bool isThreadAffinitySupported() const override
75 return tMPI_Thread_setaffinity_support() == TMPI_SETAFFINITY_SUPPORT_YES;
77 bool setCurrentThreadAffinityToCore(int core) override
79 const int ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
80 return ret == 0;
84 //! Global instance of DefaultThreadAffinityAccess
85 DefaultThreadAffinityAccess g_defaultAffinityAccess;
87 } // namespace
89 gmx::IThreadAffinityAccess::~IThreadAffinityAccess()
93 static bool invalidWithinSimulation(const t_commrec *cr, bool invalidLocally)
95 #if GMX_MPI
96 if (cr->nnodes > 1)
98 int value = invalidLocally ? 1 : 0;
99 int globalValue;
100 MPI_Reduce(&value, &globalValue, 1, MPI_INT, MPI_LOR, MASTERRANK(cr),
101 cr->mpi_comm_mysim);
102 return SIMMASTER(cr) ? (globalValue != 0) : invalidLocally;
104 #else
105 GMX_UNUSED_VALUE(cr);
106 #endif
107 return invalidLocally;
110 static bool
111 get_thread_affinity_layout(const gmx::MDLogger &mdlog,
112 const t_commrec *cr,
113 const gmx::HardwareTopology &hwTop,
114 int threads,
115 bool affinityIsAutoAndNumThreadsIsNotAuto,
116 int pin_offset, int * pin_stride,
117 int **localityOrder,
118 bool *issuedWarning)
120 int hwThreads;
121 int hwThreadsPerCore = 0;
122 bool bPickPinStride;
123 bool haveTopology;
124 bool invalidValue;
126 haveTopology = (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic);
128 if (pin_offset < 0)
130 gmx_fatal(FARGS, "Negative thread pinning offset requested");
132 if (*pin_stride < 0)
134 gmx_fatal(FARGS, "Negative thread pinning stride requested");
137 if (haveTopology)
139 hwThreads = hwTop.machine().logicalProcessorCount;
140 // Just use the value for the first core
141 hwThreadsPerCore = hwTop.machine().sockets[0].cores[0].hwThreads.size();
142 snew(*localityOrder, hwThreads);
143 int i = 0;
144 for (auto &s : hwTop.machine().sockets)
146 for (auto &c : s.cores)
148 for (auto &t : c.hwThreads)
150 (*localityOrder)[i++] = t.logicalProcessorId;
155 else
157 /* topology information not available or invalid, ignore it */
158 hwThreads = hwTop.machine().logicalProcessorCount;
159 *localityOrder = nullptr;
161 // Only warn about the first problem per node. Otherwise, the first test
162 // failing would essentially always cause also the other problems get
163 // reported, leading to bogus warnings. The order in the conditionals
164 // with this variable is important, since the MPI_Reduce() in
165 // invalidWithinSimulation() needs to always happen.
166 bool alreadyWarned = false;
167 invalidValue = (hwThreads <= 0);
168 if (invalidWithinSimulation(cr, invalidValue))
170 /* We don't know anything about the hardware, don't pin */
171 GMX_LOG(mdlog.warning).asParagraph().appendText(
172 "NOTE: No information on available cores, thread pinning disabled.");
173 alreadyWarned = true;
175 bool validLayout = !invalidValue;
177 if (affinityIsAutoAndNumThreadsIsNotAuto)
179 invalidValue = (threads != hwThreads);
180 bool warn = (invalidValue && threads > 1 && threads < hwThreads);
181 if (invalidWithinSimulation(cr, warn) && !alreadyWarned)
183 GMX_LOG(mdlog.warning).asParagraph().appendText(
184 "NOTE: The number of threads is not equal to the number of (logical) cores\n"
185 " and the -pin option is set to auto: will not pin threads to cores.\n"
186 " This can lead to significant performance degradation.\n"
187 " Consider using -pin on (and -pinoffset in case you run multiple jobs).");
188 alreadyWarned = true;
190 validLayout = validLayout && !invalidValue;
193 invalidValue = (threads > hwThreads);
194 if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
196 GMX_LOG(mdlog.warning).asParagraph().appendText(
197 "NOTE: Oversubscribing the CPU, will not pin threads");
198 alreadyWarned = true;
200 validLayout = validLayout && !invalidValue;
202 invalidValue = (pin_offset + threads > hwThreads);
203 if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
205 GMX_LOG(mdlog.warning).asParagraph().appendText(
206 "WARNING: Requested offset too large for available cores, thread pinning disabled.");
207 alreadyWarned = true;
210 validLayout = validLayout && !invalidValue;
212 invalidValue = false;
213 /* do we need to choose the pinning stride? */
214 bPickPinStride = (*pin_stride == 0);
216 if (bPickPinStride)
218 if (haveTopology && pin_offset + threads*hwThreadsPerCore <= hwThreads)
220 /* Put one thread on each physical core */
221 *pin_stride = hwThreadsPerCore;
223 else
225 /* We don't know if we have SMT, and if we do, we don't know
226 * if hw threads in the same physical core are consecutive.
227 * Without SMT the pinning layout should not matter too much.
228 * so we assume a consecutive layout and maximally spread out"
229 * the threads at equal threads per core.
230 * Note that IBM is the major non-x86 case with cpuid support
231 * and probably threads are already pinned by the queuing system,
232 * so we wouldn't end up here in the first place.
234 *pin_stride = (hwThreads - pin_offset)/threads;
237 else
239 /* Check the placement of the thread with the largest index to make sure
240 * that the offset & stride doesn't cause pinning beyond the last hardware thread. */
241 invalidValue = (pin_offset + (threads-1)*(*pin_stride) >= hwThreads);
243 if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
245 /* We are oversubscribing, don't pin */
246 GMX_LOG(mdlog.warning).asParagraph().appendText(
247 "WARNING: Requested stride too large for available cores, thread pinning disabled.");
248 alreadyWarned = true;
250 validLayout = validLayout && !invalidValue;
252 if (validLayout)
254 GMX_LOG(mdlog.info).appendTextFormatted(
255 "Pinning threads with a%s logical core stride of %d",
256 bPickPinStride ? "n auto-selected" : " user-specified",
257 *pin_stride);
260 *issuedWarning = alreadyWarned;
262 return validLayout;
265 static bool set_affinity(const t_commrec *cr, int nthread_local, int intraNodeThreadOffset,
266 int offset, int core_pinning_stride, const int *localityOrder,
267 gmx::IThreadAffinityAccess *affinityAccess)
269 // Set the per-thread affinity. In order to be able to check the success
270 // of affinity settings, we will set nth_affinity_set to 1 on threads
271 // where the affinity setting succeded and to 0 where it failed.
272 // Reducing these 0/1 values over the threads will give the total number
273 // of threads on which we succeeded.
275 // To avoid warnings from the static analyzer we initialize nth_affinity_set
276 // to zero outside the OpenMP block, and then add to it inside the block.
277 // The value will still always be 0 or 1 from each thread.
278 int nth_affinity_set = 0;
279 #pragma omp parallel num_threads(nthread_local) reduction(+:nth_affinity_set)
283 int thread_id, thread_id_node;
284 int index, core;
286 thread_id = gmx_omp_get_thread_num();
287 thread_id_node = intraNodeThreadOffset + thread_id;
288 index = offset + thread_id_node*core_pinning_stride;
289 if (localityOrder != nullptr)
291 core = localityOrder[index];
293 else
295 core = index;
298 const bool ret = affinityAccess->setCurrentThreadAffinityToCore(core);
300 /* store the per-thread success-values of the setaffinity */
301 nth_affinity_set += (ret ? 1 : 0);
303 if (debug)
305 fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n",
306 cr->nodeid, gmx_omp_get_thread_num(), index, core, ret ? 1 : 0);
309 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
312 if (nth_affinity_set > nthread_local)
314 char msg[STRLEN];
316 sprintf(msg, "Looks like we have set affinity for more threads than "
317 "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
318 gmx_incons(msg);
321 /* check & warn if some threads failed to set their affinities */
322 const bool allAffinitiesSet = (nth_affinity_set == nthread_local);
323 if (!allAffinitiesSet)
325 char sbuf1[STRLEN], sbuf2[STRLEN];
327 /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
328 sbuf1[0] = sbuf2[0] = '\0';
329 /* Only add rank info if we have more than one rank. */
330 if (cr->nnodes > 1)
332 #if GMX_MPI
333 #if GMX_THREAD_MPI
334 sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid);
335 #else /* GMX_LIB_MPI */
336 sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
337 #endif
338 #endif /* GMX_MPI */
341 if (nthread_local > 1)
343 sprintf(sbuf2, "for %d/%d thread%s ",
344 nthread_local - nth_affinity_set, nthread_local,
345 nthread_local > 1 ? "s" : "");
348 // TODO: This output should also go through mdlog.
349 fprintf(stderr, "NOTE: %sAffinity setting %sfailed.\n", sbuf1, sbuf2);
351 return allAffinitiesSet;
354 void analyzeThreadsOnThisNode(const gmx::PhysicalNodeCommunicator &physicalNodeComm,
355 int numThreadsOnThisRank,
356 int *numThreadsOnThisNode,
357 int *intraNodeThreadOffset)
359 *intraNodeThreadOffset = 0;
360 *numThreadsOnThisNode = numThreadsOnThisRank;
361 #if GMX_MPI
362 if (physicalNodeComm.size_ > 1)
364 /* We need to determine a scan of the thread counts in this
365 * compute node. */
366 MPI_Scan(&numThreadsOnThisRank, intraNodeThreadOffset, 1, MPI_INT, MPI_SUM, physicalNodeComm.comm_);
367 /* MPI_Scan is inclusive, but here we need exclusive */
368 *intraNodeThreadOffset -= numThreadsOnThisRank;
369 /* Get the total number of threads on this physical node */
370 MPI_Allreduce(&numThreadsOnThisRank, numThreadsOnThisNode, 1, MPI_INT, MPI_SUM, physicalNodeComm.comm_);
372 #else
373 GMX_UNUSED_VALUE(physicalNodeComm);
374 #endif
378 /* Set CPU affinity. Can be important for performance.
379 On some systems (e.g. Cray) CPU Affinity is set by default.
380 But default assigning doesn't work (well) with only some ranks
381 having threads. This causes very low performance.
382 External tools have cumbersome syntax for setting affinity
383 in the case that only some ranks have threads.
384 Thus it is important that GROMACS sets the affinity internally
385 if only PME is using threads.
387 void
388 gmx_set_thread_affinity(const gmx::MDLogger &mdlog,
389 const t_commrec *cr,
390 const gmx_hw_opt_t *hw_opt,
391 const gmx::HardwareTopology &hwTop,
392 int numThreadsOnThisRank,
393 int numThreadsOnThisNode,
394 int intraNodeThreadOffset,
395 gmx::IThreadAffinityAccess *affinityAccess)
397 int *localityOrder = nullptr;
399 if (hw_opt->threadAffinity == ThreadAffinity::Off)
401 /* Nothing to do */
402 return;
405 if (affinityAccess == nullptr)
407 affinityAccess = &g_defaultAffinityAccess;
410 /* If the tMPI thread affinity setting is not supported encourage the user
411 * to report it as it's either a bug or an exotic platform which we might
412 * want to support. */
413 if (!affinityAccess->isThreadAffinitySupported())
415 /* we know Mac OS does not support setting thread affinity, so there's
416 no point in warning the user in that case. In any other case
417 the user might be able to do something about it. */
418 #if !defined(__APPLE__)
419 GMX_LOG(mdlog.warning).asParagraph().appendText(
420 "NOTE: Cannot set thread affinities on the current platform.");
421 #endif /* __APPLE__ */
422 return;
425 int offset = hw_opt->core_pinning_offset;
426 int core_pinning_stride = hw_opt->core_pinning_stride;
427 if (offset != 0)
429 GMX_LOG(mdlog.warning).appendTextFormatted("Applying core pinning offset %d", offset);
432 bool affinityIsAutoAndNumThreadsIsNotAuto =
433 (hw_opt->threadAffinity == ThreadAffinity::Auto &&
434 !hw_opt->totNumThreadsIsAuto);
435 bool issuedWarning;
436 bool validLayout
437 = get_thread_affinity_layout(mdlog, cr, hwTop, numThreadsOnThisNode,
438 affinityIsAutoAndNumThreadsIsNotAuto,
439 offset, &core_pinning_stride, &localityOrder,
440 &issuedWarning);
441 const gmx::sfree_guard localityOrderGuard(localityOrder);
443 bool allAffinitiesSet;
444 if (validLayout)
446 allAffinitiesSet = set_affinity(cr, numThreadsOnThisRank, intraNodeThreadOffset,
447 offset, core_pinning_stride, localityOrder,
448 affinityAccess);
450 else
452 // Produce the warning if any rank fails.
453 allAffinitiesSet = false;
455 if (invalidWithinSimulation(cr, !allAffinitiesSet) && !issuedWarning)
457 GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: Thread affinity was not set.");
461 /* Detects and returns whether we have the default affinity mask
463 * Returns true when we can query thread affinities and CPU count is
464 * consistent and we have default affinity mask on all ranks.
466 * Should be called simultaneously by all MPI ranks.
468 static bool detectDefaultAffinityMask(const int nthreads_hw_avail)
470 bool detectedDefaultAffinityMask = true;
472 #if HAVE_SCHED_AFFINITY
473 cpu_set_t mask_current;
474 CPU_ZERO(&mask_current);
475 int ret;
476 if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
478 /* failed to query affinity mask, will just return */
479 if (debug)
481 fprintf(debug, "Failed to query affinity mask (error %d)", ret);
483 detectedDefaultAffinityMask = false;
486 /* Before proceeding with the actual check, make sure that the number of
487 * detected CPUs is >= the CPUs in the current set.
488 * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
489 #ifdef CPU_COUNT
490 if (detectedDefaultAffinityMask &&
491 nthreads_hw_avail < CPU_COUNT(&mask_current))
493 if (debug)
495 fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT",
496 nthreads_hw_avail, CPU_COUNT(&mask_current));
498 detectedDefaultAffinityMask = false;
500 #endif /* CPU_COUNT */
502 if (detectedDefaultAffinityMask)
504 /* Here we check whether all bits of the affinity mask are set.
505 * Note that this mask can change while the program is executing,
506 * e.g. by the system dynamically enabling or disabling cores or
507 * by some scheduler changing the affinities. Thus we can not assume
508 * that the result is the same on all ranks within a node
509 * when running this code at different times.
511 bool allBitsAreSet = true;
512 for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
514 allBitsAreSet = allBitsAreSet && (CPU_ISSET(i, &mask_current) != 0);
516 if (debug)
518 fprintf(debug, "%s affinity mask found\n",
519 allBitsAreSet ? "Default" : "Non-default");
521 if (!allBitsAreSet)
523 detectedDefaultAffinityMask = false;
526 #else
527 GMX_UNUSED_VALUE(nthreads_hw_avail);
528 #endif /* HAVE_SCHED_AFFINITY */
530 #if GMX_MPI
531 int mpiIsInitialized;
532 MPI_Initialized(&mpiIsInitialized);
533 if (mpiIsInitialized)
535 bool detectedDefaultAffinityMaskOnAllRanks;
536 MPI_Allreduce(&detectedDefaultAffinityMask,
537 &detectedDefaultAffinityMaskOnAllRanks,
538 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);
539 detectedDefaultAffinityMask = detectedDefaultAffinityMaskOnAllRanks;
541 #endif
543 return detectedDefaultAffinityMask;
546 /* Check the process affinity mask and if it is found to be non-zero,
547 * will honor it and disable mdrun internal affinity setting.
548 * Note that this will only work on Linux as we use a GNU feature.
550 void
551 gmx_check_thread_affinity_set(const gmx::MDLogger &mdlog,
552 gmx_hw_opt_t *hw_opt,
553 int gmx_unused nthreads_hw_avail,
554 gmx_bool bAfterOpenmpInit)
556 GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer");
558 if (!bAfterOpenmpInit)
560 /* Check for externally set OpenMP affinity and turn off internal
561 * pinning if any is found. We need to do this check early to tell
562 * thread-MPI whether it should do pinning when spawning threads.
563 * TODO: the above no longer holds, we should move these checks later
565 if (hw_opt->threadAffinity != ThreadAffinity::Off)
567 char *message;
568 if (!gmx_omp_check_thread_affinity(&message))
570 /* We only pin automatically with totNumThreadsIsAuto=true */
571 if (hw_opt->threadAffinity == ThreadAffinity::On ||
572 hw_opt->totNumThreadsIsAuto)
574 GMX_LOG(mdlog.warning).asParagraph().appendText(message);
576 sfree(message);
577 hw_opt->threadAffinity = ThreadAffinity::Off;
582 if (!detectDefaultAffinityMask(nthreads_hw_avail))
584 if (hw_opt->threadAffinity == ThreadAffinity::Auto)
586 if (!bAfterOpenmpInit)
588 GMX_LOG(mdlog.warning).asParagraph().appendText(
589 "Non-default thread affinity set, disabling internal thread affinity");
591 else
593 GMX_LOG(mdlog.warning).asParagraph().appendText(
594 "Non-default thread affinity set probably by the OpenMP library,\n"
595 "disabling internal thread affinity");
597 hw_opt->threadAffinity = ThreadAffinity::Off;
599 else
601 /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */
602 if (bAfterOpenmpInit)
604 GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
605 "Overriding thread affinity set outside %s",
606 gmx::getProgramContext().displayName());