128-bit AVX2 SIMD for AMD Ryzen
[gromacs.git] / src / gromacs / hardware / cpuinfo.h
blob676321a4a7dd71e012ed5e29cede0683991de6b1
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \libinternal \file
36 * \brief
37 * Declares gmx::CpuInfo
39 * \author Erik Lindahl <erik.lindahl@gmail.com>
40 * \inlibraryapi
41 * \ingroup module_hardware
43 #ifndef GMX_HARDWARE_CPUINFO_H
44 #define GMX_HARDWARE_CPUINFO_H
46 #include <map>
47 #include <set>
48 #include <string>
49 #include <vector>
51 namespace gmx
54 /*! \libinternal \brief Detect CPU capabilities and basic logical processor info
56 * This class provides a lot of information about x86 CPUs, and some very
57 * limited information about other hardware. The logical processor information
58 * is only available on x86, and is used as a fallback implementation in
59 * the HardwareTopology class.
60 * If you actually need information about the hardware topology, use the much
61 * more general implementation in the HardwareTopology class instead, since
62 * that will both be more portable and contain more information.
64 * \ingroup module_hardware
66 class CpuInfo
69 public:
71 /*! \brief Amount of cpu information present (incremental) */
72 enum class SupportLevel
74 None, //!< No cpu information whatsoever. Sorry.
75 Name, //!< Only vendor and/or brand is set
76 Features, //!< Some features are set
77 LogicalProcessorInfo //!< Everything includling logical processor information
80 /*! \brief Processor/system vendors */
81 enum class Vendor
83 Unknown, //!< Unidentified
84 Intel, //!< GenuineIntel
85 Amd, //!< AuthenticAMD
86 Fujitsu, //!< Only works on Linux (parsed from /proc/cpuinfo)
87 Ibm, //!< Only works on Linux (parsed from /proc/cpuinfo)
88 Arm, //!< Only works on Linux (parsed from /proc/cpuinfo)
91 /*! \brief List of CPU features
93 * These values can be used as arguments to the feature() method
94 * to check whether a specific feature was found on the CPU we are
95 * running on.
97 enum class Feature
99 X86_Aes, //!< x86 advanced encryption standard accel.
100 X86_Amd, //!< This is an AMD x86 processor
101 X86_Apic, //!< APIC support
102 X86_Avx, //!< Advanced vector extensions
103 X86_Avx2, //!< AVX2 including gather support (not used yet)
104 X86_Avx512F, //!< Foundation AVX-512 instructions
105 X86_Avx512PF, //!< Extended gather/scatter for AVX-512
106 X86_Avx512ER, //!< AVX-512 exponential and recpirocal extensions
107 X86_Avx512CD, //!< Memory conflict-detection for AVX-512
108 X86_Avx512BW, //!< AVX-512 byte and word instructions
109 X86_Avx512VL, //!< AVX-512 vector length extensions
110 X86_Clfsh, //!< Supports CLFLUSH instruction
111 X86_Cmov, //!< Conditional move insn support
112 X86_Cx8, //!< Supports CMPXCHG8B (8-byte compare-exchange)
113 X86_Cx16, //!< Supports CMPXCHG16B (16-byte compare-exchg)
114 X86_F16C, //!< Supports 16-bit FP conversion instructions
115 X86_Fma, //!< Fused-multiply add support (mainly for AVX)
116 X86_Fma4, //!< 4-operand FMA, only on AMD for now
117 X86_Hle, //!< Hardware lock elision
118 X86_Htt, //!< Hyper-Threading supported (but maybe not enabled)
119 X86_Intel, //!< This is an Intel x86 processor
120 X86_Lahf, //!< LAHF/SAHF support in 64 bits
121 X86_MisalignSse, //!< Support for misaligned SSE data instructions
122 X86_Mmx, //!< MMX registers and instructions
123 X86_Msr, //!< Supports Intel model-specific-registers
124 X86_NonstopTsc, //!< Invariant TSC (constant rate in ACPI states)
125 X86_Pcid, //!< Process context identifier support
126 X86_Pclmuldq, //!< Carry-less 64-bit multiplication supported
127 X86_Pdcm, //!< Perfmon and Debug Capability
128 X86_PDPE1GB, //!< Support for 1GB pages
129 X86_Popcnt, //!< Supports the POPCNT (population count) insn
130 X86_Pse, //!< Supports 4MB-pages (page size extension)
131 X86_Rdrnd, //!< RDRAND high-quality hardware random numbers
132 X86_Rdtscp, //!< Serializing rdtscp instruction available
133 X86_Rtm, //!< Restricted transactional memory
134 X86_Sha, //!< Intel SHA extensions
135 X86_Sse2, //!< SSE 2
136 X86_Sse3, //!< SSE 3
137 X86_Sse4A, //!< SSE 4A
138 X86_Sse4_1, //!< SSE 4.1
139 X86_Sse4_2, //!< SSE 4.2
140 X86_Ssse3, //!< Supplemental SSE3
141 X86_Tdt, //!< TSC deadline timer
142 X86_X2Apic, //!< Extended xAPIC Support
143 X86_Xop, //!< AMD extended instructions, only AMD for now
144 Arm_Neon, //!< 32-bit ARM NEON
145 Arm_NeonAsimd, //!< 64-bit ARM AArch64 Advanced SIMD
146 Ibm_Qpx, //!< IBM QPX SIMD (BlueGene/Q and later)
147 Ibm_Vmx, //!< IBM VMX SIMD (Altivec on Power6 and later)
148 Ibm_Vsx, //!< IBM VSX SIMD (Power7 and later)
149 Fujitsu_HpcAce //!< Fujitsu Sparc64 HPC-ACE
152 /*! \libinternal \brief Entry with basic information for a single logical processor */
153 struct LogicalProcessor
155 int socketRankInMachine; //!< Relative rank of the current socket in the system
156 int coreRankInSocket; //!< Relative rank of the current core in its socket
157 int hwThreadRankInCore; //!< Relative rank of logical processor in its core
160 public:
161 /*! \brief Perform detection and construct a CpuInfo class from the results.
163 * \note The detection should generally be performed again in different
164 * contexts. This might seem like overkill, but there
165 * are systems (e.g. Arm) where processors can go completely offline
166 * during deep sleep, so at least in theory it is good to have a
167 * possibility of forcing re-detection if necessary.
169 static CpuInfo detect();
171 /*! \brief Check what cpu information is available
173 * The amount of cpu information that can be detected depends on the
174 * OS, compiler, and CPU, and on non-x86 platforms it can be fragile.
175 * Before basing decisions on the output or warning the user about
176 * optimizations, you want to check whether it was possible to detect
177 * the information you need.
179 SupportLevel
180 supportLevel() const { return supportLevel_; }
182 /*! \brief Enumerated value for vendor */
183 Vendor
184 vendor() const { return vendor_; }
186 /*! \brief String description of vendor:
188 * \throws std::out_of_range if the vendor is not present in the internal
189 * map of vendor names. This can only happen if we extend the enum
190 * type but forget to add the string with the vendor name.
192 const std::string &
193 vendorString() const
195 return s_vendorStrings_.at(vendor_);
198 /*! \brief String description of processor */
199 const std::string &
200 brandString() const { return brandString_; }
202 /*! \brief Major version/generation of the processor */
204 family() const { return family_; }
206 /*! \brief Middle version of the processor */
208 model() const { return model_; }
210 /*! \brief Minor version of the processor */
212 stepping() const { return stepping_; }
214 /*! \brief Check for availability of specific feature
216 * \param f feature to query support for
218 * \return True if the feature is available, otherwise false.
220 bool
221 feature(Feature f) const
223 // If the entry is present in the set it is supported
224 return (features_.count(f) != 0);
227 /*! \brief String description of a specific feature
229 * \throws std::out_of_range if the feature is not present in the internal
230 * map of feature names. This can only happen if we extend the enum
231 * type but forget to add the string with the feature name.
233 static const std::string &
234 featureString(Feature f)
236 return s_featureStrings_.at(f);
239 /*! \brief Set of all supported features on this processor
241 * This is only intended for logfiles, debugging or similar output when we
242 * need a full list of all the features available on the CPU.
244 const std::set<Feature> &
245 featureSet() const
247 return features_;
250 /*! \brief Reference to processing unit topology
252 * Only a few systems (x86) provide logical processor information in cpuinfo.
253 * This method returns a reference to a vector, whose length will either be
254 * zero (if topology information is not available) or the number of enabled
255 * processing units, as defined by the operating system. In the latter
256 * case, each entry will contain information about the relative rank in the
257 * core and socket of this hardware thread.
259 * This is only meant to be use as a fallback implementation for our
260 * HardwareTopology class; any user code that needs access to hardware
261 * topology information should use that class instead.
263 * \note For clarity, it is likely better to use the supportLevel()
264 * method to check if this information is available rather than
265 * relying on the length of the vector.
267 const std::vector<LogicalProcessor> &
268 logicalProcessors() const { return logicalProcessors_; }
270 private:
271 CpuInfo();
273 SupportLevel supportLevel_; //!< Available cpuinfo information
274 Vendor vendor_; //!< Value of vendor for current cpu
275 std::string brandString_; //!< Text description of cpu
276 int family_; //!< Major version of current cpu
277 int model_; //!< Middle version of current cpu
278 int stepping_; //!< Minor version of current cpu
279 std::set<Feature> features_; //!< Set of features supported on this cpu
280 std::vector<LogicalProcessor> logicalProcessors_; //!< Simple logical processor topology
281 static const std::map<Vendor, std::string> s_vendorStrings_; //!< Text description of each vendor
282 static const std::map<Feature, std::string> s_featureStrings_; //!< Text description of each feature
283 }; // class CpuInfo
285 /*! \brief Return true if the CPU is an Intel x86 Nehalem
287 * \param cpuInfo Object with cpu information
289 * \returns True if running on Nehalem CPU
291 bool
292 cpuIsX86Nehalem(const CpuInfo &cpuInfo);
294 } // namespace gmx
296 #endif // GMX_HARDWARE_CPUINFO_H