From 6d99d7eaa33b77a703aabbe3bce070cf1a261787 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Thu, 7 Jul 2016 16:01:13 +0200 Subject: [PATCH] Hardware detection clean-up and pre/post-processing This change cleans up the various tests and hacks we have had in different places to remove a number of false warnings and errors. It also unifies all this processing to two small routines that are called just before/after the hardware is detected, so the user can choose whether it should be done or not in other places. - Rather than trying to guess when we should or shouldn't override the number of cores online, the preprocessing uses a piece of code that allows sleeping cores to come online automatically by running a small C++11 thread loop before doing the hardware topology detection. This way we can remove all ARM-specific paths. To avoid wasting a second on systems where SMT is disabled, we avoid calling it on x86. - All SMT warnings are handled in the post-processing call, but only as notes in the log file to avoid writing warnings on stderr. - The check for OpenMP thread mismatch has been removed since it caused incorrect warnings by comparing the number or threads configured for each OpenMP process with the total number of cores in the entire system. We will have to rewrite this later as a test in the MPI/OpenMP parallelization setup instead. By sticking with the hwloc/sysconf-online detection, we should now also handle all special cases where cores have been taken offline manually in a correct way without using hardware-specific paths. Change-Id: I37edb3eada3f4c8c0906c641c7041cc0270985e8 --- cmake/gmxTestCXX11.cmake | 9 +- docs/doxygen/includesorter.py | 4 +- docs/user-guide/mdrun-performance.rst | 15 +- src/gromacs/hardware/detecthardware.cpp | 186 +++++++++++++++++++++--- src/gromacs/hardware/detecthardware.h | 14 +- src/gromacs/hardware/hardwaretopology.cpp | 75 +--------- src/gromacs/hardware/hardwaretopology.h | 10 +- src/gromacs/hardware/tests/hardwaretopology.cpp | 8 +- src/programs/mdrun/runner.cpp | 2 - 9 files changed, 208 insertions(+), 115 deletions(-) diff --git a/cmake/gmxTestCXX11.cmake b/cmake/gmxTestCXX11.cmake index e32b00b99d..65a1598463 100644 --- a/cmake/gmxTestCXX11.cmake +++ b/cmake/gmxTestCXX11.cmake @@ -118,14 +118,21 @@ int main() { set(CMAKE_REQUIRED_FLAGS "${CXX11_CXX_FLAG} ${${STDLIB_CXX_FLAG_NAME}}") set(CMAKE_REQUIRED_LIBRARIES "${${STDLIB_LIBRARIES_NAME}}") check_cxx_source_compiles( -"#include +"#include +#include #include +#include #include int main() { typedef std::unique_ptr intPointer; intPointer p(new int(10)); std::map> m; m.insert(std::make_pair(5, std::move(p))); + auto start = std::chrono::system_clock::now(); + if (std::chrono::system_clock::now() - start < std::chrono::seconds(2)) + { + std::thread t; + } }" CXX11_STDLIB_PRESENT) if(NOT CXX11_STDLIB_PRESENT) message(FATAL_ERROR "This version of GROMACS requires C++11-compatible standard library. Please use a newer compiler, or a newer standard library, or use the GROMACS 5.1.x release. See the installation guide for details.") diff --git a/docs/doxygen/includesorter.py b/docs/doxygen/includesorter.py index 29d87200a8..f3f8b77a7b 100755 --- a/docs/doxygen/includesorter.py +++ b/docs/doxygen/includesorter.py @@ -104,11 +104,11 @@ class GroupedSorter(object): 'stddef.h', 'stdint.h', 'stdio.h', 'stdlib.h', 'string.h', 'time.h'] _std_c_cpp_headers = ['c' + x[:-2] for x in _std_c_headers] - _std_cpp_headers = ['algorithm', 'array', 'deque', 'exception', 'fstream', + _std_cpp_headers = ['algorithm', 'array', 'chrono', 'deque', 'exception', 'fstream', 'functional', 'iomanip', 'ios', 'iosfwd', 'iostream', 'istream', 'iterator', 'limits', 'list', 'map', 'memory', 'new', 'numeric', 'ostream', 'random', 'regex', 'set', 'sstream', 'stdexcept', 'streambuf', 'string', 'strstream', - 'tuple', 'type_traits', 'typeindex', 'typeinfo', 'vector', 'utility'] + 'thread', 'tuple', 'type_traits', 'typeindex', 'typeinfo', 'vector', 'utility'] def __init__(self, style='pub-priv', absolute=False): """Initialize a sorted with the given style.""" diff --git a/docs/user-guide/mdrun-performance.rst b/docs/user-guide/mdrun-performance.rst index 70e3e4971e..39004299e5 100644 --- a/docs/user-guide/mdrun-performance.rst +++ b/docs/user-guide/mdrun-performance.rst @@ -45,8 +45,19 @@ definitions. Experienced HPC users can skip this section. spreading computation over multiple threads, such as OpenMP, pthreads, winthreads, CUDA, OpenCL, and OpenACC. Some kinds of hardware can map more than one software thread to a core; on - Intel x86 processors this is called "hyper-threading." - Normally, :ref:`gmx mdrun` will not benefit from such mapping. + Intel x86 processors this is called "hyper-threading", while + the more general concept is often called SMT for + "simultaneous multi-threading". IBM Power8 can for instance use + up to 8 hardware threads per core. + This feature can usually be enabled or disabled either in + the hardware bios or through a setting in the Linux operating + system. GROMACS can typically make use of this, for a moderate + free performance boost. In most cases it will be + enabled by default e.g. on new x86 processors, but in some cases + the system administrators might have disabled it. If that is the + case, ask if they can re-enable it for you. If you are not sure + if it is enabled, check the output of the CPU information in + the log file and compare with CPU specifications you find online. thread affinity (pinning) By default, most operating systems allow software threads to migrate diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp index 16f76407c7..5dccfa37d4 100644 --- a/src/gromacs/hardware/detecthardware.cpp +++ b/src/gromacs/hardware/detecthardware.cpp @@ -43,7 +43,9 @@ #include #include +#include #include +#include #include #include "thread_mpi/threads.h" @@ -66,14 +68,41 @@ #include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" -#include "gromacs/utility/gmxomp.h" #include "gromacs/utility/programcontext.h" #include "gromacs/utility/smalloc.h" #include "gromacs/utility/stringutil.h" #include "gromacs/utility/sysinfo.h" +#ifdef HAVE_UNISTD_H +# include // sysconf() +#endif + +//! Convenience macro to help us avoid ifdefs each time we use sysconf +#if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN) +# define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN +#endif + +//! Convenience macro to help us avoid ifdefs each time we use sysconf +#if !defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROC_CONF) +# define _SC_NPROCESSORS_CONF _SC_NPROC_CONF +#endif + +#if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64) +//! Constant used to help minimize preprocessed code +static const bool isX86 = true; +#else +//! Constant used to help minimize preprocessed code +static const bool isX86 = false; +#endif + +#if defined __powerpc__ || defined __ppc__ || defined __PPC__ +static const bool isPowerPC = true; +#else +static const bool isPowerPC = false; +#endif + +//! Constant used to help minimize preprocessed code static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE; -static const bool bHasOmpSupport = GMX_OPENMP; /* Note that some of the following arrays must match the "GPU support * enumeration" in src/config.h.cmakein, so that GMX_GPU looks up an @@ -815,6 +844,130 @@ static void gmx_collect_hardware_mpi(const gmx::CpuInfo &cpuInfo) #endif } +/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores + * + * This routine will check the number of cores configured and online + * (using sysconf), and the spins doing dummy compute operations for up to + * 2 seconds, or until all cores have come online. This can be used prior to + * hardware detection for platforms that take unused processors offline. + * + * This routine will not throw exceptions. + */ +static void +spinUpCore() noexcept +{ +#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN) + // steady_clock is better than system_clock, but unsupported in gcc-4.6.4. + // For release-2017 we can retire gcc-4.6 support and move to steady_clock. + float dummy = 0.1; + int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept + auto start = std::chrono::system_clock::now(); // noexcept + + while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured && + std::chrono::system_clock::now() - start < std::chrono::seconds(2)) + { + for (int i = 1; i < 10000; i++) + { + dummy /= i; + } + } + + if (dummy < 0) + { + printf("This cannot happen, but prevents loop from being optimized away."); + } +#endif +} + +/*! \brief Prepare the system before hardware topology detection + * + * This routine should perform any actions we want to put the system in a state + * where we want it to be before detecting the hardware topology. For most + * processors there is nothing to do, but some architectures (in particular ARM) + * have support for taking configured cores offline, which will make them disappear + * from the online processor count. + * + * This routine checks if there is a mismatch between the number of cores + * configured and online, and in that case we issue a small workload that + * attempts to wake sleeping cores before doing the actual detection. + * + * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only + * been disabled in the kernel (rather than bios). Since those cores will never + * come online automatically, we currently skip this test for x86 & PowerPC to + * avoid wasting 2 seconds. We also skip the test if there is no thread support. + * + * \note Cores will sleep relatively quickly again, so it's important to issue + * the real detection code directly after this routine. + */ +static void +hardwareTopologyPrepareDetection() +{ +#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && \ + (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS)) + + // Modify this conditional when/if x86 or PowerPC starts to sleep some cores + if (!isX86 && !isPowerPC) + { + int countConfigured = sysconf(_SC_NPROCESSORS_CONF); + std::vector workThreads(countConfigured); + + for (auto &t : workThreads) + { + t = std::thread(spinUpCore); + } + + for (auto &t : workThreads) + { + t.join(); + } + } +#endif +} + +/*! \brief Sanity check hardware topology and optionally print some notes to log + * + * \param fplog Log file pointer. This can be NULL, but the then routine + * will not do anything. + * \param hardwareTopology Reference to hardwareTopology object. + */ +static void +hardwareTopologyDoubleCheckDetection(FILE gmx_unused *fplog, + const gmx::HardwareTopology gmx_unused &hardwareTopology) +{ +#if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF) + if (fplog == NULL || + hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount) + { + return; + } + + int countFromDetection = hardwareTopology.machine().logicalProcessorCount; + int countConfigured = sysconf(_SC_NPROCESSORS_CONF); + + /* BIOS, kernel or user actions can take physical processors + * offline. We already cater for the some of the cases inside the hardwareToplogy + * by trying to spin up cores just before we detect, but there could be other + * cases where it is worthwhile to hint that there might be more resources available. + */ + if (countConfigured >= 0 && countConfigured != countFromDetection) + { + fprintf(fplog, "Note: %d CPUs configured, but only %d were detected to be online.\n", countConfigured, countFromDetection); + + if (isX86 && countConfigured == 2*countFromDetection) + { + fprintf(fplog, " X86 Hyperthreading is likely disabled; enable it for better performance.\n"); + } + // For PowerPC (likely Power8) it is possible to set SMT to either 2,4, or 8-way hardware threads. + // We only warn if it is completely disabled since default performance drops with SMT8. + if (isPowerPC && countConfigured == 8*countFromDetection) + { + fprintf(fplog, " PowerPC SMT is likely disabled; enable SMT2/SMT4 for better performance.\n"); + } + } +#endif +} + + gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr, gmx_bool bDetectGPUs) { @@ -833,7 +986,15 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr, snew(hwinfo_g, 1); hwinfo_g->cpuInfo = new gmx::CpuInfo(gmx::CpuInfo::detect()); - hwinfo_g->hardwareTopology = new gmx::HardwareTopology(gmx::HardwareTopology::detect(fplog, cr)); + + hardwareTopologyPrepareDetection(); + hwinfo_g->hardwareTopology = new gmx::HardwareTopology(gmx::HardwareTopology::detect()); + + // If we detected the topology on this system, double-check that it makes sense + if (hwinfo_g->hardwareTopology->isThisSystem()) + { + hardwareTopologyDoubleCheckDetection(fplog, *(hwinfo_g->hardwareTopology)); + } // TODO: Get rid of this altogether. hwinfo_g->nthreads_hw_avail = hwinfo_g->hardwareTopology->machine().logicalProcessorCount; @@ -1135,25 +1296,6 @@ void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr, check_use_of_rdtscp_on_this_cpu(fplog, cr, cpuInfo); } -void checkLogicalProcessorCountIsConsistentWithOpenmp(FILE *fplog, const t_commrec *cr, - const gmx::HardwareTopology *hardwareTopology) -{ - if (bHasOmpSupport && - hardwareTopology->supportLevel() >= - gmx::HardwareTopology::SupportLevel::LogicalProcessorCount) - { - int countFromDetection = hardwareTopology->machine().logicalProcessorCount; - int countFromOpenmp = gmx_omp_get_num_procs(); - if (countFromDetection != countFromOpenmp) - { - md_print_warn(cr, fplog, - "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n" - "Consider setting the launch configuration manually!", - countFromDetection, countFromOpenmp); - } - } -} - //! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt) { diff --git a/src/gromacs/hardware/detecthardware.h b/src/gromacs/hardware/detecthardware.h index 4cea398e20..e002d47d16 100644 --- a/src/gromacs/hardware/detecthardware.h +++ b/src/gromacs/hardware/detecthardware.h @@ -64,8 +64,14 @@ gmx_bool gmx_multiple_gpu_per_node_supported(); * example. */ gmx_bool gmx_gpu_sharing_supported(); -/* Construct the global hwinfo structure and return a pointer to - it. Caller is responsible for freeing this pointer. */ +/*! \brief Run detection, consistency checks, and make available on all ranks. + * + * This routine constructs the global hwinfo structure and returns a pointer to + * it. It will run a preamble before executing cpu and hardware checks, and + * then run consistency checks afterwards. The results will also be made + * available on all nodes. + * Caller is responsible for freeing this pointer. + */ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr, gmx_bool bDetectGPUs); @@ -75,10 +81,6 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr, void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo); -//! Warn the user if the OpenMP system doesn't agree with the hardware detection about the number of logical processors. -void checkLogicalProcessorCountIsConsistentWithOpenmp(FILE *fplog, const t_commrec *cr, - const gmx::HardwareTopology *hardwareTopology); - void gmx_hardware_info_free(gmx_hw_info_t *hwinfo); void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt); diff --git a/src/gromacs/hardware/hardwaretopology.cpp b/src/gromacs/hardware/hardwaretopology.cpp index c23ff0f306..84b8e3cb38 100644 --- a/src/gromacs/hardware/hardwaretopology.cpp +++ b/src/gromacs/hardware/hardwaretopology.cpp @@ -56,7 +56,6 @@ # include #endif -#include "gromacs/gmxlib/md_logging.h" #include "gromacs/hardware/cpuinfo.h" #include "gromacs/utility/gmxassert.h" @@ -67,12 +66,9 @@ # include // GetSystemInfo() #endif -#if defined(_M_ARM) || defined(__arm__) || defined(__ARM_ARCH) || defined (__aarch64__) -//! Constant used to help minimize preprocessed code -static const bool isArm = true; -#else -//! Constant used to help minimize preprocessed code -static const bool isArm = false; +//! Convenience macro to help us avoid ifdefs each time we use sysconf +#if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN) +# define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN #endif namespace gmx @@ -558,7 +554,7 @@ parseHwLoc(HardwareTopology::Machine * machine, * \return The number of hardware processing units, or 0 if it fails. */ int -detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr) +detectLogicalProcessorCount() { int count = 0; @@ -568,78 +564,21 @@ detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr) SYSTEM_INFO sysinfo; GetSystemInfo( &sysinfo ); count = sysinfo.dwNumberOfProcessors; -#elif defined HAVE_SYSCONF +#elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) // We are probably on Unix. Check if we have the argument to use before executing any calls -# if defined(_SC_NPROCESSORS_CONF) - count = sysconf(_SC_NPROCESSORS_CONF); -# if defined(_SC_NPROCESSORS_ONLN) - /* On e.g. Arm, the Linux kernel can use advanced power saving features where - * processors are brought online/offline dynamically. This will cause - * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this - * reason we now warn if this mismatches with the detected core count. */ - int countOnline = sysconf(_SC_NPROCESSORS_ONLN); - if (count != countOnline) - { - /* We assume that this scenario means that the kernel has - disabled threads or cores, and that the only safe course is - to assume that _SC_NPROCESSORS_ONLN should be used. Even - this may not be valid if running in a containerized - environment, such system calls may read from - /sys/devices/system/cpu and report what the OS sees, rather - than what the container cgroup is supposed to set up as - limits. But we're not sure right now whether there's any - (standard-ish) way to handle that. - - On ARM, the kernel may have powered down the cores, - which we'll warn the user about now. On x86, this - means HT is disabled by the kernel, not in the - BIOS. We're not sure what it means on other - architectures, or even if it is possible, because - sysconf is rather non-standardized. */ - if (isArm) - { - md_print_warn(cr, fplog, - "%d CPUs configured, but only %d of them are online.\n" - "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n" - "off to save power, and will turn them back on later when the load increases.\n" - "However, this will likely mean GROMACS cannot pin threads to those cores. You\n" - "will likely see much better performance by forcing all cores to be online, and\n" - "making sure they run at their full clock frequency.", count, countOnline); - } - else - { - md_print_warn(cr, fplog, - "Note: %d CPUs configured, but only %d of them are online, so GROMACS will use the latter.", - count, countOnline); - // We use the online count to avoid (potential) oversubscription. - count = countOnline; - } - } -# endif -# elif defined(_SC_NPROC_CONF) - count = sysconf(_SC_NPROC_CONF); -# elif defined(_SC_NPROCESSORS_ONLN) count = sysconf(_SC_NPROCESSORS_ONLN); -# elif defined(_SC_NPROC_ONLN) - count = sysconf(_SC_NPROC_ONLN); -# else -# warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!" -# endif // End of check for sysconf argument values - #else count = 0; // Neither windows nor Unix. #endif } - GMX_UNUSED_VALUE(cr); - GMX_UNUSED_VALUE(fplog); return count; } } // namespace anonymous // static -HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr) +HardwareTopology HardwareTopology::detect() { HardwareTopology result; @@ -665,7 +604,7 @@ HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr) if (result.supportLevel_ == SupportLevel::None) { // No topology information; try to detect the number of logical processors at least - result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr); + result.machine_.logicalProcessorCount = detectLogicalProcessorCount(); if (result.machine_.logicalProcessorCount > 0) { result.supportLevel_ = SupportLevel::LogicalProcessorCount; diff --git a/src/gromacs/hardware/hardwaretopology.h b/src/gromacs/hardware/hardwaretopology.h index ca16710964..f766a4d695 100644 --- a/src/gromacs/hardware/hardwaretopology.h +++ b/src/gromacs/hardware/hardwaretopology.h @@ -44,12 +44,9 @@ #define GMX_HARDWARE_HARDWARETOPOLOGY_H #include -#include #include -struct t_commrec; - namespace gmx { @@ -190,11 +187,8 @@ class HardwareTopology public: - /*! \brief Detects the hardware topology. - * - * Writes any warnings to stderr, and \c fplog if it is not nullptr. - */ - static HardwareTopology detect(FILE *fplog, const t_commrec *cr); + /*! \brief Detects the hardware topology. */ + static HardwareTopology detect(); /*! \brief Check what topology information that is available and valid * diff --git a/src/gromacs/hardware/tests/hardwaretopology.cpp b/src/gromacs/hardware/tests/hardwaretopology.cpp index ac473627b3..ed7897b015 100644 --- a/src/gromacs/hardware/tests/hardwaretopology.cpp +++ b/src/gromacs/hardware/tests/hardwaretopology.cpp @@ -67,7 +67,7 @@ TEST(HardwareTopologyTest, Execute) // depends on the architecture, but we can at least make sure that it // works to execute the tests - gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect(nullptr, nullptr)); + gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect()); // If we cannot even find the number of logical processors we want to flag it EXPECT_GT(hwTop.supportLevel(), gmx::HardwareTopology::SupportLevel::None) @@ -80,7 +80,7 @@ TEST(HardwareTopologyTest, Execute) TEST(HardwareTopologyTest, HwlocExecute) { #if defined(__linux__) - gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect(nullptr, nullptr)); + gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect()); // On Linux with hwloc support we should be able to get at least basic information EXPECT_GE(hwTop.supportLevel(), gmx::HardwareTopology::SupportLevel::Basic) @@ -93,7 +93,7 @@ TEST(HardwareTopologyTest, HwlocExecute) TEST(HardwareTopologyTest, ProcessorSelfconsistency) { - gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect(nullptr, nullptr)); + gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect()); if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic) { @@ -129,7 +129,7 @@ TEST(HardwareTopologyTest, ProcessorSelfconsistency) TEST(HardwareTopologyTest, NumaCacheSelfconsistency) { - gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect(nullptr, nullptr)); + gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect()); if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Full) { diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp index abe5f47ddd..ddc5895c95 100644 --- a/src/programs/mdrun/runner.cpp +++ b/src/programs/mdrun/runner.cpp @@ -1111,8 +1111,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt, hw_opt->gpu_opt.n_dev_use = 0; } - checkLogicalProcessorCountIsConsistentWithOpenmp(fplog, cr, hwinfo->hardwareTopology); - /* check consistency across ranks of things like SIMD * support and number of GPUs selected */ gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU); -- 2.11.4.GIT