2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
45 } /* fixes auto-indentation problems */
49 /* Currently identifiable CPU Vendors */
52 GMX_CPUID_VENDOR_CANNOTDETECT
, /* Should only be used if something fails */
53 GMX_CPUID_VENDOR_UNKNOWN
,
54 GMX_CPUID_VENDOR_INTEL
,
56 GMX_CPUID_VENDOR_FUJITSU
,
63 /* CPU feature/property list, to be used as indices into the feature array of the
64 * gmxcpuid_t data structure.
66 * To facilitate looking things up, we keep this list alphabetical.
67 * The list is NOT exhaustive - we have basically added stuff that might be
68 * useful in an application like Gromacs.
70 * AMD and Intel tend to share most architectural elements, and even if the
71 * flags might have to be detected in different ways (different cpuid registers),
72 * once the flag is present the functions should be identical. Unfortunately the
73 * trend right now (2012) seems to be that they are diverging. This means that
74 * we need to use specific flags to the compiler to maximize performance, and
75 * then the binaries might not be portable between Intel and AMD as they were
76 * before when we only needed to check for SSE and/or SSE2 support in Gromacs.
78 enum gmx_cpuid_feature
80 GMX_CPUID_FEATURE_CANNOTDETECT
, /* Flag set if we could not detect on this CPU */
81 GMX_CPUID_FEATURE_X86_AES
, /* x86 advanced encryption standard accel. */
82 GMX_CPUID_FEATURE_X86_APIC
, /* APIC support */
83 GMX_CPUID_FEATURE_X86_AVX
, /* Advanced vector extensions */
84 GMX_CPUID_FEATURE_X86_AVX2
, /* AVX2 including gather support (not used yet) */
85 GMX_CPUID_FEATURE_X86_AVX_512F
, /* Foundation AVX-512 instructions */
86 GMX_CPUID_FEATURE_X86_AVX_512PF
, /* Extended gather/scatter for AVX-512 */
87 GMX_CPUID_FEATURE_X86_AVX_512ER
, /* Extended-range 1/x and /1sqrt(x) for AVX-512 */
88 GMX_CPUID_FEATURE_X86_AVX_512CD
, /* Memory conflict-detection for AVX-512 */
89 GMX_CPUID_FEATURE_X86_CLFSH
, /* Supports CLFLUSH instruction */
90 GMX_CPUID_FEATURE_X86_CMOV
, /* Conditional move insn support */
91 GMX_CPUID_FEATURE_X86_CX8
, /* Supports CMPXCHG8B (8-byte compare-exchange) */
92 GMX_CPUID_FEATURE_X86_CX16
, /* Supports CMPXCHG16B (16-byte compare-exchg) */
93 GMX_CPUID_FEATURE_X86_F16C
, /* Supports 16-bit FP conversion instructions */
94 GMX_CPUID_FEATURE_X86_FMA
, /* Fused-multiply add support (mainly for AVX) */
95 GMX_CPUID_FEATURE_X86_FMA4
, /* 4-operand FMA, only on AMD for now */
96 GMX_CPUID_FEATURE_X86_HTT
, /* Hyper-Threading supported */
97 GMX_CPUID_FEATURE_X86_LAHF_LM
, /* LAHF/SAHF support in 64 bits */
98 GMX_CPUID_FEATURE_X86_MISALIGNSSE
, /* Support for misaligned SSE data instructions */
99 GMX_CPUID_FEATURE_X86_MMX
, /* MMX registers and instructions */
100 GMX_CPUID_FEATURE_X86_MSR
, /* Supports Intel model-specific-registers */
101 GMX_CPUID_FEATURE_X86_NONSTOP_TSC
, /* Invariant TSC (constant rate in ACPI states) */
102 GMX_CPUID_FEATURE_X86_PCID
, /* Process context identifier support */
103 GMX_CPUID_FEATURE_X86_PCLMULDQ
, /* Carry-less 64-bit multiplication supported */
104 GMX_CPUID_FEATURE_X86_PDCM
, /* Perfmon and Debug Capability */
105 GMX_CPUID_FEATURE_X86_PDPE1GB
, /* Support for 1GB pages */
106 GMX_CPUID_FEATURE_X86_POPCNT
, /* Supports the POPCNT (population count) insn */
107 GMX_CPUID_FEATURE_X86_PSE
, /* Supports 4MB-pages (page size extension) */
108 GMX_CPUID_FEATURE_X86_RDRND
, /* RDRAND high-quality hardware random numbers */
109 GMX_CPUID_FEATURE_X86_RDTSCP
, /* Serializing rdtscp instruction available */
110 GMX_CPUID_FEATURE_X86_SSE2
, /* SSE 2 */
111 GMX_CPUID_FEATURE_X86_SSE3
, /* SSE 3 */
112 GMX_CPUID_FEATURE_X86_SSE4A
, /* SSE 4A */
113 GMX_CPUID_FEATURE_X86_SSE4_1
, /* SSE 4.1 */
114 GMX_CPUID_FEATURE_X86_SSE4_2
, /* SSE 4.2 */
115 GMX_CPUID_FEATURE_X86_SSSE3
, /* Supplemental SSE3 */
116 GMX_CPUID_FEATURE_X86_TDT
, /* TSC deadline timer */
117 GMX_CPUID_FEATURE_X86_X2APIC
, /* Extended xAPIC Support */
118 GMX_CPUID_FEATURE_X86_XOP
, /* AMD extended instructions, only AMD for now */
119 GMX_CPUID_FEATURE_ARM_NEON
, /* 32-bit ARM NEON */
120 GMX_CPUID_FEATURE_ARM_NEON_ASIMD
, /* 64-bit ARM AArch64 Advanced SIMD */
121 GMX_CPUID_FEATURE_IBM_QPX
, /* IBM QPX SIMD (BlueGene/Q and later) */
122 GMX_CPUID_FEATURE_IBM_VMX
, /* IBM VMX SIMD (Altivec on Power6 and later) */
123 GMX_CPUID_FEATURE_IBM_VSX
, /* IBM VSX SIMD (Power7 and later) */
128 /* Currently supported SIMD instruction sets, intrinsics or other similar combinations
129 * in Gromacs. There is not always a 1-to-1 correspondence with feature flags; on some AMD
130 * hardware we prefer to use 128bit AVX instructions (although 256-bit ones could be executed),
131 * and we still haven't written the AVX2 kernels.
135 GMX_CPUID_SIMD_CANNOTDETECT
, /* Should only be used if something fails */
137 GMX_CPUID_SIMD_REFERENCE
,
138 GMX_CPUID_SIMD_X86_SSE2
,
139 GMX_CPUID_SIMD_X86_SSE4_1
,
140 GMX_CPUID_SIMD_X86_AVX_128_FMA
,
141 GMX_CPUID_SIMD_X86_AVX_256
,
142 GMX_CPUID_SIMD_X86_AVX2_256
,
143 GMX_CPUID_SIMD_X86_AVX_512F
,
144 GMX_CPUID_SIMD_X86_AVX_512ER
,
145 GMX_CPUID_SIMD_SPARC64_HPC_ACE
,
146 GMX_CPUID_SIMD_IBM_QPX
,
147 GMX_CPUID_SIMD_IBM_VMX
,
148 GMX_CPUID_SIMD_IBM_VSX
,
149 GMX_CPUID_SIMD_ARM_NEON
,
150 GMX_CPUID_SIMD_ARM_NEON_ASIMD
,
154 /* Text strings corresponding to CPU vendors */
156 gmx_cpuid_vendor_string
[GMX_CPUID_NVENDORS
];
158 /* Text strings for CPU feature indices */
160 gmx_cpuid_feature_string
[GMX_CPUID_NFEATURES
];
162 /* Text strings for Gromacs SIMD instruction sets */
164 gmx_cpuid_simd_string
[GMX_CPUID_NSIMD
];
167 /* Abstract data type with CPU detection information. Set by gmx_cpuid_init(). */
168 typedef struct gmx_cpuid
*
172 /* Fill the data structure by using CPU detection instructions.
173 * Return 0 on success, 1 if something bad happened.
176 gmx_cpuid_init (gmx_cpuid_t
* cpuid
);
179 /* Return the vendor id as enumerated type. Use gmx_cpuid_vendor_string[]
180 * to get the corresponding text string.
182 enum gmx_cpuid_vendor
183 gmx_cpuid_vendor (gmx_cpuid_t cpuid
);
186 /* Return a constant pointer to the processor brand string. */
188 gmx_cpuid_brand (gmx_cpuid_t cpuid
);
191 /* Return processor family version. For a chip of version 1.2.3, this is 1 */
193 gmx_cpuid_family (gmx_cpuid_t cpuid
);
195 /* Return processor model version, For a chip of version 1.2.3, this is 2. */
197 gmx_cpuid_model (gmx_cpuid_t cpuid
);
199 /* Return processor stepping version, For a chip of version 1.2.3, this is 3. */
201 gmx_cpuid_stepping (gmx_cpuid_t cpuid
);
204 /* Check whether a particular CPUID feature is set.
205 * Returns 0 if flag "feature" is not set, 1 if the flag is set. We cannot use
206 * gmx_bool here since this file must be possible to compile without simple.h.
209 gmx_cpuid_feature (gmx_cpuid_t cpuid
,
210 enum gmx_cpuid_feature feature
);
213 /* Check whether the CPU is an Intel with Nehalem microarchitecture.
214 * Return 0 if not Intel Nehalem, 1 if Intel Nehalem.
217 gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid
);
220 /* Return pointers to cpu topology information.
222 * Important: CPU topology requires more OS support than most other
223 * functions in this file, including support for thread pinning to hardware.
224 * This means it will not work on some platforms, including e.g. Mac OS X.
225 * Thus, it is IMPERATIVE that you check the return value from this routine
226 * before doing anything with the information. It is only if the return
227 * value is zero that the data is valid.
229 * For the returned values we have:
230 * - nprocessors Total number of logical processors reported by OS
231 * - npackages Usually number of CPU sockets
232 * - ncores_per_package Number of cores in each package
233 * - nhwthreads_per_core Number of hardware threads per core; 2 for hyperthreading.
234 * - package_id Array with the package index for each logical cpu
235 * - core_id Array with local core index for each logical cpu
236 * - hwthread_id Array with local hwthread index for each logical cpu
237 * - locality_order Array with logical cpu numbers, sorted in order
238 * of physical and logical locality in the system.
240 * All arrays are of length nprocessors.
243 gmx_cpuid_topology(gmx_cpuid_t cpuid
,
246 int * ncores_per_package
,
247 int * nhwthreads_per_core
,
248 const int ** package_id
,
249 const int ** core_id
,
250 const int ** hwthread_id
,
251 const int ** locality_order
);
253 /* Enumerated values for x86 SMT enabled-status. Note that this does not refer
254 * to Hyper-Threading support (that is the flag GMX_CPUID_FEATURE_X86_HTT), but
255 * whether Hyper-Threading is _enabled_ and _used_ in bios right now.
257 enum gmx_cpuid_x86_smt
259 GMX_CPUID_X86_SMT_CANNOTDETECT
,
260 GMX_CPUID_X86_SMT_DISABLED
,
261 GMX_CPUID_X86_SMT_ENABLED
264 /* Returns the status of x86 SMT support. IMPORTANT: There are non-zero
265 * return values for this routine that still do not indicate supported and
266 * enabled smt/Hyper-Threading. You need to carefully check the return value
267 * against the enumerated type values to see what you are getting.
269 * Long-term, this functionality will move to a new hardware topology detection
270 * layer, but that will require a lot of new code and a working interface to the
271 * hwloc library. Surprisingly, there is no simple way to find out that
272 * Hyper-Threading is actually turned on without fully enumerating and checking
273 * all the cores, which we presently can only do on Linux. This means a couple
276 * 1) If you want to know whether your CPU _supports_ Hyper-Threading in the
277 * first place, check the GMX_CPUID_FEATURE_X86_HTT flag instead!
278 * 2) There are several scenarios where this routine will say that it cannot
279 * detect whether SMT is enabled and used right now.
280 * 3) If you need support on non-Linux x86, you have to write it :-)
281 * 4) Don't invest too much efforts, since this will be replaced with
282 * full hardware topology detection in the future.
283 * 5) Don't worry if the detection does not work. It is not a catastrophe, but
284 * but we get slightly better performance on x86 if we use Hyper-Threading
285 * cores in direct space, but not reciprocal space.
287 * Since this routine presently only supports Hyper-Threading we say X86_SMT
288 * in order not to give the impression we can detect any SMT. We haven't
289 * even tested the performance on other SMT implementations, so it is not
290 * obvious we shouldn't use SMT there.
292 * Note that you can get more complete topology information from
293 * gmx_cpuid_topology(), although that requires slightly more OS support.
295 enum gmx_cpuid_x86_smt
296 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid
);
299 /* Formats a text string (up to n characters) from the data structure.
300 * The output will have max 80 chars between newline characters.
303 gmx_cpuid_formatstring (gmx_cpuid_t cpuid
,
308 /* Suggests a suitable gromacs SIMD based on the support in the
312 gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid
);
315 /* Check if this binary was compiled with the same SIMD instructions as we
316 * would suggest for the current hardware. Always print stats to the log file
317 * if it is non-NULL, and if we don't have a match, print a warning in log
318 * (if non-NULL) and if print_to_stderr!=0 also to stderr.
321 gmx_cpuid_simd_check (gmx_cpuid_t cpuid
,
323 int print_to_stderr
);
326 /* Release resources used by data structure. Note that the pointer to the
327 * CPU brand string will no longer be valid once this routine has been called.
330 gmx_cpuid_done (gmx_cpuid_t cpuid
);
340 #endif /* GMX_CPUID_H_ */