2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \libinternal \file
37 * Declares gmx::HardwareTopology
39 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
43 #ifndef GMX_HARDWARE_HARDWARETOPOLOGY_H
44 #define GMX_HARDWARE_HARDWARETOPOLOGY_H
53 /*! \libinternal \brief Information about sockets, cores, threads, numa, caches
55 * This class is the main GROMACS interface to provide information about the
56 * hardware of the system we are running on. Internally, it uses either
57 * hwloc for full or almost-full information, or a fallback implementation
58 * that relies on CpuInfo on x86.
60 * You should always use this class (rather than CpuInfo directly) to query
61 * the hardware layout in user code. Note that you cannot rely on any
62 * information being present, but you must check with the supportLevel()
63 * method before trying to access any information.
65 class HardwareTopology
68 /*! \brief Amount of topology information present (incremental) */
69 enum class SupportLevel
71 None
, //!< No hardware information whatsoever. Sorry.
72 LogicalProcessorCount
, //!< Only machine().logicalProcessorCount is valid
73 Basic
, //!< Socket, core and hardware thread info
74 Full
, //!< Cache, memory and numa node info
75 FullWithDevices
//!< Information about devices on the PCI bus
78 /*! \libinternal \brief Information about a single cache level */
81 int level
; //!< Level relative to core (starts at 1)
82 std::size_t size
; //!< size in bytes, 0 if unknown
83 int linesize
; //!< size of each cache line in bytes, 0 if unknown
84 int associativity
; //!< associativity, -1 means fully associative
85 int shared
; //!< Number of logical processors sharing this cache
88 /*! \libinternal \brief Information about a single hardware thread in a core
90 * The id of the thread typically increases continuously as you walk
91 * through sockets and cores in order of their ids. In general, this can
92 * be different from the logical processor id provided by the operating
93 * system. To achieve better load balancing when using SMT, Linux
94 * typically assigns logical processors in a round-robin fashion
99 int id
; //!< Absolute id of this thread in hardware topology
100 int logicalProcessorId
; //!< Id of the operating system logical processor
103 /*! \libinternal \brief Information about a single core in a socket */
106 int id
; //!< Absolute id of this core in hardware topology
107 int numaNodeId
; //!< id of the numa node of this core
108 std::vector
<HWThread
> hwThreads
; //!< All the hardware threads in this core
111 /*! \libinternal \brief Information about a single socket in the system */
114 int id
; //!< Absolute id of this socket in hardware topology
115 std::vector
<Core
> cores
; //!< All the cores in this socket
118 /*! \libinternal \brief Information about each numa node in system */
121 int id
; //!< Absolute id of numa node in hardware topology
122 std::size_t memory
; //!< Total detected memory in bytes
123 std::vector
<int> logicalProcessorId
; //!< Vector of all the logical processors in this node
126 /*! \libinternal \brief Information about a single numa node */
129 std::vector
<NumaNode
> nodes
; //!< Information about each numa node
130 float baseLatency
; //!< Scale factor for relative latencies
131 std::vector
<std::vector
<float>> relativeLatency
; //!< 2D matrix of relative latencies between nodes
132 float maxRelativeLatency
; //!< Largest relative latency
135 /*! \libinternal \brief Information about a single PCI device.
137 * \note On many systems the PCI bus is not directly connected to any numa node.
138 * For these systems the numaNodeId will be -1, so you cannot rely on this
139 * number reflecting a specific numa node.
143 std::uint16_t vendorId
; //!< Vendor identification
144 std::uint16_t deviceId
; //!< Vendor-specific device identification
145 std::uint16_t classId
; //!< class (high 8 bits) and subclass (low 8 bits)
146 std::uint16_t domain
; //!< Domain, usually 0 for PCI bus
147 std::uint8_t bus
; //!< Bus number in domain
148 std::uint8_t dev
; //!< Device on bus
149 std::uint8_t func
; //!< Function id for multi-function devices
150 int numaNodeId
; //!< Numa node, -1 if the bus is not located inside a node
153 /*! \libinternal \brief Information about socket, core and hwthread for a logical processor */
154 struct LogicalProcessor
156 int socketRankInMachine
; //!< Index of socket in machine
157 int coreRankInSocket
; //!< Index of core in socket
158 int hwThreadRankInCore
; //!< Index of hardware thread in core
159 int numaNodeId
; //!< Index of numa node
162 /*! \libinternal \brief Hardware topology information about the entire machine
164 * The machine structure is a tree with top-down information about all
165 * sockets, cores, and hardware threads in the system. For example, an
166 * operating system logical processor index can be found as
167 * machine.socket[0].core[1].hwthread[2].logicalProcessorId.
168 * In some cases you might need the opposite lookup, i.e. the physical
169 * hardware data for a specific logical processor. This is present in the
170 * logicalProcessor vector for convenience.
172 * \note The logicalProcessor vector will only have non-zero length if the
173 * support level is SupportLevel::Basic or higher. You cannot use the
174 * size of this vector to query the number of logical processors on
175 * lower support levels.
181 int logicalProcessorCount
; //!< Number of logical processors in system
182 std::vector
<LogicalProcessor
> logicalProcessors
; //!< Map logical processors to socket/core
183 std::vector
<Socket
> sockets
; //!< All the sockets in the system
184 std::vector
<Cache
> caches
; //!< Caches in increasing level order
185 Numa numa
; //!< Structure with all numa information
186 std::vector
<Device
> devices
; //!< Devices on PCI bus
190 /*! \brief Detects the hardware topology. */
191 static HardwareTopology
detect();
193 /*! \brief Creates a topology with given number of logical cores.
195 * The support level will be either None or LogicalProcessorCount.
197 * Intended for testing of code that uses the hardware topology.
199 explicit HardwareTopology(int logicalProcessorCount
);
201 /*! \brief Check what topology information that is available and valid
203 * The amount of hardware topology information that can be detected depends
204 * on both the hardware and whether GROMACS was linked with the external
205 * hwloc library. You cannot assume that any information is present,
206 * although we can almost always provide the number of logical processors.
207 * On x86 we can usually get basic information about how sockets, cores
208 * and hardware threads are ordered even without hwloc.
209 * With the hwloc library we can usually also get information about cache,
210 * memory and concepts such as core groups and ccNUMA nodes.
211 * Finally, if hwloc was built with support for libpci we can also
212 * detect how the PCI devices are connected.
214 SupportLevel
supportLevel() const { return supportLevel_
; }
216 /*! \brief Return true if we actually detected hardware.
218 * \return This method will normally return true, when we actually ran
219 * the hardware detection as part of this process to construct
220 * the object. It will be false when the object was constructed
221 * by reading a cached XML file, or possibly generated from
224 bool isThisSystem() const { return isThisSystem_
; }
226 /*! \brief Return the machine topology tree
228 * You can always call this routine, but be aware that some or all contents
229 * will not be valid unless supportLevel() returns a sufficient level.
231 * - With SupportLevel::LogicalProcessorCount, only the field
232 * machine.logicalProcessorCount is valid.
233 * - With SupportLevel::Basic, you can access the vectors of sockets,
234 * cores, and hardware threads, and query what logical processorId
235 * each hardware thread corresponds to.
236 * - SupportLevel::Full adds cache, memory and ccNUMA information.
237 * - SupportLevel::FullWithDevices also adds the PCI express bus.
239 * While data that is not valid has been initialized to special values,
240 * you should not rely on those but query the supportLevel() method before
243 const Machine
& machine() const { return machine_
; }
245 /*! \brief Returns the number of cores.
247 * You can always call this routine, but if sufficient support is not
248 * available, it may return the logical processor count or zero instead
249 * of the physical core count.
251 int numberOfCores() const;
256 SupportLevel supportLevel_
; //!< Available topology information
257 Machine machine_
; //!< The machine map
258 bool isThisSystem_
; //!< Machine map is real (vs. cached/synthetic)
263 #endif // GMX_HARDWARE_HARDWARETOPOLOGY_H