clang-tidy: google tests applicable
[gromacs.git] / src / gromacs / hardware / hardwaretopology.cpp
blob9fb24b97e47f61901c93bf3745c0c3372046589c
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief
38 * Implements gmx::HardwareTopology.
40 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
44 #include "gmxpre.h"
46 #include "hardwaretopology.h"
48 #include "config.h"
50 #include <cstdio>
52 #include <algorithm>
53 #include <functional>
54 #include <limits>
55 #include <utility>
56 #include <vector>
58 #if GMX_USE_HWLOC
59 # include <hwloc.h>
60 #endif
62 #include "gromacs/hardware/cpuinfo.h"
63 #include "gromacs/utility/gmxassert.h"
65 #ifdef HAVE_UNISTD_H
66 # include <unistd.h> // sysconf()
67 #endif
68 #if GMX_NATIVE_WINDOWS
69 # include <windows.h> // GetSystemInfo()
70 #endif
72 //! Convenience macro to help us avoid ifdefs each time we use sysconf
73 #if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN)
74 # define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
75 #endif
77 namespace gmx
80 namespace
83 /*****************************************************************************
84 * *
85 * Utility functions for extracting hardware topology from CpuInfo object *
86 * *
87 *****************************************************************************/
89 /*! \brief Initialize machine data from basic information in cpuinfo
91 * \param machine Machine tree structure where information will be assigned
92 * if the cpuinfo object contains topology information.
93 * \param supportLevel If topology information is available in CpuInfo,
94 * this will be updated to reflect the amount of
95 * information written to the machine structure.
97 void
98 parseCpuInfo(HardwareTopology::Machine * machine,
99 HardwareTopology::SupportLevel * supportLevel)
101 CpuInfo cpuInfo(CpuInfo::detect());
103 if (!cpuInfo.logicalProcessors().empty())
105 int nSockets = 0;
106 int nCores = 0;
107 int nHwThreads = 0;
109 // Copy the logical processor information from cpuinfo
110 for (auto &l : cpuInfo.logicalProcessors())
112 machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
113 nSockets = std::max(nSockets, l.socketRankInMachine);
114 nCores = std::max(nCores, l.coreRankInSocket);
115 nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
118 // Fill info form sockets/cores/hwthreads
119 int socketId = 0;
120 int coreId = 0;
121 int hwThreadId = 0;
123 machine->sockets.resize(nSockets + 1);
124 for (auto &s : machine->sockets)
126 s.id = socketId++;
127 s.cores.resize(nCores + 1);
128 for (auto &c : s.cores)
130 c.id = coreId++;
131 c.numaNodeId = -1; // No numa information
132 c.hwThreads.resize(nHwThreads + 1);
133 for (auto &t : c.hwThreads)
135 t.id = hwThreadId++;
136 t.logicalProcessorId = -1; // set as unassigned for now
141 // Fill the logical processor id in the right place
142 for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
144 const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
145 machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
147 machine->logicalProcessorCount = machine->logicalProcessors.size();
148 *supportLevel = HardwareTopology::SupportLevel::Basic;
150 else
152 *supportLevel = HardwareTopology::SupportLevel::None;
156 #if GMX_USE_HWLOC
158 #if HWLOC_API_VERSION < 0x00010b00
159 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
160 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
161 #endif
163 // Preprocessor variable for if hwloc api is version 1.x.x or 2.x.x
164 #if HWLOC_API_VERSION >= 0x00020000
165 # define GMX_HWLOC_API_VERSION_IS_2XX 1
166 #else
167 # define GMX_HWLOC_API_VERSION_IS_2XX 0
168 #endif
170 /*****************************************************************************
172 * Utility functions for extracting hardware topology from hwloc library *
174 *****************************************************************************/
176 // Compatibility function for accessing hwloc_obj_t object memory with different API versions of hwloc
177 std::size_t
178 getHwLocObjectMemory(const hwloc_obj_t obj)
180 #if GMX_HWLOC_API_VERSION_IS_2XX
181 return obj->total_memory;
182 #else
183 return obj->memory.total_memory;
184 #endif
187 /*! \brief Return vector of all descendants of a given type in hwloc topology
189 * \param topo hwloc topology handle that has been initialized and loaded
190 * \param obj Non-null hwloc object.
191 * \param type hwloc object type to find. The routine will only search
192 * on levels below obj.
194 * \return vector containing all the objects of given type that are
195 * descendants of the provided object. If no objects of this type
196 * were found, the vector will be empty.
198 const std::vector<hwloc_obj_t>
199 getHwLocDescendantsByType(const hwloc_topology_t topo, const hwloc_obj_t obj, const hwloc_obj_type_t type)
201 GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
203 std::vector<hwloc_obj_t> v;
205 if (obj->type == type)
207 v.push_back(obj);
209 // Go through children; if this object has no children obj->arity is 0,
210 // and we'll return an empty vector.
211 hwloc_obj_t tempNode = NULL;
212 while ((tempNode = hwloc_get_next_child(topo, obj, tempNode)) != NULL)
214 std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(topo, tempNode, type);
215 v.insert(v.end(), v2.begin(), v2.end());
217 return v;
220 /*! \brief Read information about sockets, cores and threads from hwloc topology
222 * \param topo hwloc topology handle that has been initialized and loaded
223 * \param machine Pointer to the machine structure in the HardwareTopology
224 * class, where the tree of sockets/cores/threads will be written.
226 * \return If all the data is found the return value is 0, otherwise non-zero.
229 parseHwLocSocketsCoresThreads(hwloc_topology_t topo,
230 HardwareTopology::Machine * machine)
232 const hwloc_obj_t root = hwloc_get_root_obj(topo);
233 std::vector<hwloc_obj_t> hwlocSockets = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PACKAGE);
235 machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
236 machine->logicalProcessors.resize(machine->logicalProcessorCount);
237 machine->sockets.resize(hwlocSockets.size());
239 bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
241 for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
243 // Assign information about this socket
244 machine->sockets[i].id = hwlocSockets[i]->logical_index;
246 // Get children (cores)
247 std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(topo, hwlocSockets[i], HWLOC_OBJ_CORE);
248 machine->sockets[i].cores.resize(hwlocCores.size());
250 topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
252 // Loop over child cores
253 for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
255 // Assign information about this core
256 machine->sockets[i].cores[j].id = hwlocCores[j]->logical_index;
257 machine->sockets[i].cores[j].numaNodeId = -1;
259 // Get children (hwthreads)
260 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocCores[j], HWLOC_OBJ_PU);
261 machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
263 topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
265 // Loop over child hwthreads
266 for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
268 // Assign information about this hwthread
269 std::size_t logicalProcessorId = hwlocPUs[k]->os_index;
270 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
271 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
273 if (logicalProcessorId < machine->logicalProcessors.size())
275 // Cross-assign data for this hwthread to the logicalprocess vector
276 machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
277 machine->logicalProcessors[logicalProcessorId].coreRankInSocket = static_cast<int>(j);
278 machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore = static_cast<int>(k);
279 machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
281 else
283 topologyOk = false;
289 if (topologyOk)
291 return 0;
293 else
295 machine->logicalProcessors.clear();
296 machine->sockets.clear();
297 return -1;
301 /*! \brief Read cache information from hwloc topology
303 * \param topo hwloc topology handle that has been initialized and loaded
304 * \param machine Pointer to the machine structure in the HardwareTopology
305 * class, where cache data will be filled.
307 * \return If any cache data is found the return value is 0, otherwise non-zero.
310 parseHwLocCache(hwloc_topology_t topo,
311 HardwareTopology::Machine * machine)
313 // Parse caches up to L5
314 for (int cachelevel : { 1, 2, 3, 4, 5})
316 int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
318 if (depth >= 0)
320 hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, nullptr);
321 if (cache != nullptr)
323 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(topo, cache, HWLOC_OBJ_PU);
325 machine->caches.push_back( {
326 static_cast<int>(cache->attr->cache.depth),
327 static_cast<std::size_t>(cache->attr->cache.size),
328 static_cast<int>(cache->attr->cache.linesize),
329 static_cast<int>(cache->attr->cache.associativity),
330 std::max(static_cast<int>(hwThreads.size()), 1)
331 } );
335 return machine->caches.empty();
339 /*! \brief Read numa information from hwloc topology
341 * \param topo hwloc topology handle that has been initialized and loaded
342 * \param machine Pointer to the machine structure in the HardwareTopology
343 * class, where numa information will be filled.
345 * Hwloc should virtually always be able to detect numa information, but if
346 * there is only a single numa node in the system it is not reported at all.
347 * In this case we create a single numa node covering all cores.
349 * This function uses the basic socket/core/thread information detected by
350 * parseHwLocSocketsCoresThreads(), which means that routine must have
351 * completed successfully before calling this one. If this is not the case,
352 * you will get an error return code.
354 * \return If the data found makes sense (either in the numa node or the
355 * entire machine) the return value is 0, otherwise non-zero.
358 parseHwLocNuma(hwloc_topology_t topo,
359 HardwareTopology::Machine * machine)
361 const hwloc_obj_t root = hwloc_get_root_obj(topo);
362 std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_NUMANODE);
363 bool topologyOk = true;
365 if (!hwlocNumaNodes.empty())
367 machine->numa.nodes.resize(hwlocNumaNodes.size());
369 for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
371 machine->numa.nodes[i].id = hwlocNumaNodes[i]->logical_index;
372 machine->numa.nodes[i].memory = getHwLocObjectMemory(hwlocNumaNodes[i]);;
373 machine->numa.nodes[i].logicalProcessorId.clear();
375 // Get list of PUs in this numa node. Get from numa node if v1.x.x, get from numa node's parent if 2.x.x
376 #if GMX_HWLOC_API_VERSION_IS_2XX
377 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i]->parent, HWLOC_OBJ_PU);
378 #else
379 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i], HWLOC_OBJ_PU);
380 #endif
381 for (auto &p : hwlocPUs)
383 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
385 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
387 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
388 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
389 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
391 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
392 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
393 // Set numaNodeId in core too
394 machine->sockets[s].cores[c].numaNodeId = i;
397 // Getting the distance matrix
398 #if GMX_HWLOC_API_VERSION_IS_2XX
399 // with hwloc api v. 2.x.x, distances are no longer directly accessible. Need to retrieve and release hwloc_distances_s object
400 // In addition, there can now be multiple types of distances, ie latency, bandwidth. We look only for latency, but have to check
401 // if multiple distance matrices are returned.
403 // If only 1 numa node exists, the v2.x.x hwloc api won't have a distances matrix, set manually
404 if (hwlocNumaNodes.size() == 1)
406 machine->numa.relativeLatency = { { 1.0 } };
408 else
410 hwloc_distances_s** dist = new hwloc_distances_s*;
411 // Set the number of distance matrices to return (1 in our case, but hwloc 2.x.x allows
412 // for multiple distances types and therefore multiple distance matrices)
413 unsigned nr = 1;
414 hwloc_distances_get(topo, &nr, dist, HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0);
415 // If no distances were found, nr will be 0, otherwise distances will be populated with 1
416 // hwloc_distances_s object
417 if (nr > 0 && dist[0]->nbobjs == hwlocNumaNodes.size())
420 machine->numa.relativeLatency.resize(dist[0]->nbobjs);
421 for (std::size_t i = 0; i < dist[0]->nbobjs; i++)
423 machine->numa.relativeLatency[i].resize(dist[0]->nbobjs);
424 for (std::size_t j = 0; j < dist[0]->nbobjs; j++)
426 machine->numa.relativeLatency[i][j] = dist[0]->values[i*dist[0]->nbobjs+j];
430 else
432 topologyOk = false;
434 hwloc_distances_release(topo, dist[0]);
437 // hwloc-2.x provides latencies as integers, but to make things more similar to the case of a single
438 // numa node as well as hwloc-1.x, we rescale to relative floating-point values and also set the
439 // largest relative latency value.
441 // find smallest value in matrix
442 float minLatency = std::numeric_limits<float>::max(); // large number
443 float maxLatency = std::numeric_limits<float>::min(); // 0.0
444 for (const auto &v : machine->numa.relativeLatency)
446 auto result = std::minmax_element(v.begin(), v.end());
447 minLatency = std::min(minLatency, *result.first);
448 maxLatency = std::max(maxLatency, *result.second);
451 // assign stuff
452 for (auto &v : machine->numa.relativeLatency)
454 std::transform(v.begin(), v.end(), v.begin(), std::bind(std::multiplies<float>(), std::placeholders::_1, 1.0/minLatency));
456 machine->numa.baseLatency = 1.0; // latencies still do not have any units in hwloc-2.x
457 machine->numa.maxRelativeLatency = maxLatency/minLatency;
459 #else // GMX_HWLOC_API_VERSION_IS_2XX == false, hwloc api is 1.x.x
460 int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
461 const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
462 if (dist != nullptr && dist->nbobjs == hwlocNumaNodes.size())
464 machine->numa.baseLatency = dist->latency_base;
465 machine->numa.maxRelativeLatency = dist->latency_max;
466 machine->numa.relativeLatency.resize(dist->nbobjs);
467 for (std::size_t i = 0; i < dist->nbobjs; i++)
469 machine->numa.relativeLatency[i].resize(dist->nbobjs);
470 for (std::size_t j = 0; j < dist->nbobjs; j++)
472 machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
476 else
478 topologyOk = false;
480 #endif // end GMX_HWLOC_API_VERSION_IS_2XX == false
482 else
483 // Deals with the case of no numa nodes found.
484 #if GMX_HWLOC_API_VERSION_IS_2XX
485 // If the hwloc version is 2.x.x, and there is no numa node, something went wrong
487 topologyOk = false;
489 #else
491 // No numa nodes found. Use the entire machine as a numa node.
492 // Note that this should only be the case with hwloc api v 1.x.x,
493 // a numa node is assigned to the machine by default in v 2.x.x
494 const hwloc_obj*const hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, nullptr);
496 if (hwlocMachine != nullptr)
498 machine->numa.nodes.resize(1);
499 machine->numa.nodes[0].id = 0;
500 machine->numa.nodes[0].memory = hwlocMachine->memory.total_memory;
501 machine->numa.baseLatency = 10;
502 machine->numa.maxRelativeLatency = 1;
503 machine->numa.relativeLatency = { { 1.0 } };
505 for (int i = 0; i < machine->logicalProcessorCount; i++)
507 machine->numa.nodes[0].logicalProcessorId.push_back(i);
509 for (auto &l : machine->logicalProcessors)
511 l.numaNodeId = 0;
513 for (auto &s : machine->sockets)
515 for (auto &c : s.cores)
517 c.numaNodeId = 0;
521 else
523 topologyOk = false;
526 #endif // end if not GMX_HWLOC_API_VERSION_IS_2XX
527 if (topologyOk)
529 return 0;
531 else
533 machine->numa.nodes.clear();
534 return -1;
539 /*! \brief Read PCI device information from hwloc topology
541 * \param topo hwloc topology handle that has been initialized and loaded
542 * \param machine Pointer to the machine structure in the HardwareTopology
543 * class, where PCI device information will be filled.
545 * \return If any devices were found the return value is 0, otherwise non-zero.
548 parseHwLocDevices(hwloc_topology_t topo,
549 HardwareTopology::Machine * machine)
551 const hwloc_obj_t root = hwloc_get_root_obj(topo);
552 std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PCI_DEVICE);
554 for (auto &p : pcidevs)
556 #if GMX_HWLOC_API_VERSION_IS_2XX
557 const hwloc_obj * ancestor = nullptr;
558 // Numa nodes not directly part of tree. Walk up the tree until we find an ancestor with a numa node
559 hwloc_obj_t parent = p->parent;
560 while (parent && !parent->memory_arity)
562 parent = parent->parent;
564 if (parent)
566 ancestor = parent->memory_first_child;
568 #else // GMX_HWLOC_API_VERSION_IS_2XX = false, api v 1.x.x
569 // numa nodes are normal part of tree, can use hwloc ancestor function
570 const hwloc_obj * const ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
571 #endif // end if GMX_HWLOC_API_VERSION_IS_2XX
572 int numaId;
573 if (ancestor != nullptr)
575 numaId = ancestor->logical_index;
577 else
579 // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
580 numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
583 GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
585 machine->devices.push_back( {
586 p->attr->pcidev.vendor_id,
587 p->attr->pcidev.device_id,
588 p->attr->pcidev.class_id,
589 p->attr->pcidev.domain,
590 p->attr->pcidev.bus,
591 p->attr->pcidev.dev,
592 p->attr->pcidev.func,
593 numaId
594 } );
596 return pcidevs.empty();
599 void
600 parseHwLoc(HardwareTopology::Machine * machine,
601 HardwareTopology::SupportLevel * supportLevel,
602 bool * isThisSystem)
604 hwloc_topology_t topo;
606 // Initialize a hwloc object, set flags to request IO device information too,
607 // try to load the topology, and get the root object. If either step fails,
608 // return that we do not have any support at all from hwloc.
609 if (hwloc_topology_init(&topo) != 0)
611 hwloc_topology_destroy(topo);
612 return; // SupportLevel::None.
615 // Flags to look for io devices
616 #if GMX_HWLOC_API_VERSION_IS_2XX
617 hwloc_topology_set_io_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
618 #else
619 hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
620 #endif
622 if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == nullptr)
624 hwloc_topology_destroy(topo);
625 return; // SupportLevel::None.
628 // If we get here, we can get a valid root object for the topology
629 *isThisSystem = hwloc_topology_is_thissystem(topo);
631 // Parse basic information about sockets, cores, and hardware threads
632 if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
634 *supportLevel = HardwareTopology::SupportLevel::Basic;
636 else
638 hwloc_topology_destroy(topo);
639 return; // SupportLevel::None.
642 // Get information about cache and numa nodes
643 if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
645 *supportLevel = HardwareTopology::SupportLevel::Full;
647 else
649 hwloc_topology_destroy(topo);
650 return; // SupportLevel::Basic.
653 // PCI devices
654 if (parseHwLocDevices(topo, machine) == 0)
656 *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
659 hwloc_topology_destroy(topo);
660 // SupportLevel::Full or SupportLevel::FullWithDevices.
663 #endif
665 /*! \brief Try to detect the number of logical processors.
667 * \return The number of hardware processing units, or 0 if it fails.
670 detectLogicalProcessorCount()
672 int count = 0;
675 #if GMX_NATIVE_WINDOWS
676 // Windows
677 SYSTEM_INFO sysinfo;
678 GetSystemInfo( &sysinfo );
679 count = sysinfo.dwNumberOfProcessors;
680 #elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
681 // We are probably on Unix. Check if we have the argument to use before executing any calls
682 count = sysconf(_SC_NPROCESSORS_ONLN);
683 #else
684 count = 0; // Neither windows nor Unix.
685 #endif
688 return count;
691 } // namespace
693 // static
694 HardwareTopology HardwareTopology::detect()
696 HardwareTopology result;
698 #if GMX_USE_HWLOC
699 parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
700 #endif
702 // If something went wrong in hwloc (or if it was not present) we might
703 // have more information in cpuInfo
704 if (result.supportLevel_ < SupportLevel::Basic)
706 // There might be topology information in cpuInfo
707 parseCpuInfo(&result.machine_, &result.supportLevel_);
709 // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
710 if (result.supportLevel_ == SupportLevel::None)
712 // No topology information; try to detect the number of logical processors at least
713 result.machine_.logicalProcessorCount = detectLogicalProcessorCount();
714 if (result.machine_.logicalProcessorCount > 0)
716 result.supportLevel_ = SupportLevel::LogicalProcessorCount;
719 return result;
722 HardwareTopology::Machine::Machine()
724 logicalProcessorCount = 0;
725 numa.baseLatency = 0.0;
726 numa.maxRelativeLatency = 0.0;
730 HardwareTopology::HardwareTopology()
731 : supportLevel_(SupportLevel::None),
732 machine_(),
733 isThisSystem_(true)
737 HardwareTopology::HardwareTopology(int logicalProcessorCount)
738 : supportLevel_(SupportLevel::None),
739 machine_(),
740 isThisSystem_(true)
742 if (logicalProcessorCount > 0)
744 machine_.logicalProcessorCount = logicalProcessorCount;
745 supportLevel_ = SupportLevel::LogicalProcessorCount;
749 int HardwareTopology::numberOfCores() const
751 if (supportLevel() >= SupportLevel::Basic)
753 // We assume all sockets have the same number of cores as socket 0.
754 // Since topology information is present, we can assume there is at least one socket.
755 return machine().sockets.size() * machine().sockets[0].cores.size();
757 else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
759 return machine().logicalProcessorCount;
761 else
763 return 0;
767 } // namespace gmx