2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012-2018, The GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::CpuInfo.
41 * We need to be able to compile this file in stand-alone mode to use basic
42 * CPU feature detection to set the SIMD acceleration and similar things in
43 * CMake, while we still want to use more features that enable topology
44 * detection when config.h is present.
46 * We solve this by skipping the advanced stuff when the preprocessor
47 * macro GMX_CPUINFO_STANDALONE is defined. In this case you likely also need to
48 * define GMX_X86_GCC_INLINE_ASM if you are on x86; without inline assembly
49 * support it is not possible to perform the actual detection on Linux/Mac.
50 * Since these macros are specific to this file, they do not use the GMX prefix.
52 * The remaining defines (GMX_NATIVE_WINDOWS,HAVE_UNISTD_H,HAVE_SCHED_H,
53 * HAVE_SYSCONF, HAVE_SCHED_AFFINITY) are only used to determine the topology on
54 * 86, and for this we rely on including config.h.
56 * \author Erik Lindahl <erik.lindahl@gmail.com>
57 * \ingroup module_hardware
60 #ifndef GMX_CPUINFO_STANDALONE
66 #ifndef GMX_CPUINFO_STANDALONE
69 # define GMX_NATIVE_WINDOWS 0
73 # include <intrin.h> // __cpuid()
76 #if GMX_NATIVE_WINDOWS
77 # include <windows.h> // sysinfo(), necessary for topology stuff
81 # include <sched.h> // sched_getaffinity(), sched_setaffinity()
84 # include <unistd.h> // sysconf()
88 #include <cstdint> // uint32_t in X86 processor name code
98 #ifdef GMX_CPUINFO_STANDALONE
101 # include "gromacs/utility/basedefinitions.h"
104 #include "architecture.h"
112 /*! \cond internal */
114 /******************************************************************************
116 * Utility functions to make this file independent of the GROMACS library *
118 ******************************************************************************/
120 /*! \brief Remove initial and trailing whitespace from string
122 * \param s Pointer to string where whitespace will be removed
124 void trimString(std::string
* s
)
128 std::find_if(s
->begin(), s
->end(), [](char& c
) -> bool { return std::isspace(c
) == 0; }));
131 std::find_if(s
->rbegin(), s
->rend(), [](char& c
) -> bool { return std::isspace(c
) == 0; })
137 /******************************************************************************
139 * x86 detection functions *
141 ******************************************************************************/
143 /*! \brief execute x86 cpuid instructions with custom level and extended level
145 * \param level The main cpuid level (input argument for eax register)
146 * \param ecxval Extended level (input argument for ecx register)
147 * \param eax Output in eax register
148 * \param ebx Output in ebx register
149 * \param ecx Output in ecx register
150 * \param edx Output in edx register
152 * \return 0 on success, or non-zero if the instruction could not execute.
154 int executeX86CpuID(unsigned int gmx_unused level
,
155 unsigned int gmx_unused ecxval
,
161 if (c_architecture
== Architecture::X86
)
163 #if defined __GNUC__ || GMX_X86_GCC_INLINE_ASM
165 // any compiler that understands gcc inline assembly
171 # if GMX_IS_X86_32 && defined(__PIC__)
172 // Avoid clobbering the global offset table in 32-bit pic code (ebx register)
173 __asm__
__volatile__(
174 "xchgl %%ebx, %1 \n\t"
176 "xchgl %%ebx, %1 \n\t"
177 : "+a"(*eax
), "+r"(*ebx
), "+c"(*ecx
), "+d"(*edx
));
179 // i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want
180 __asm__
__volatile__("cpuid \n\t"
181 : "+a"(*eax
), "+b"(*ebx
), "+c"(*ecx
), "+d"(*edx
));
183 // Not a normal x86, which could happen when a compiler
184 // targetting non-x86 pretends to be GCC.
188 #elif defined _MSC_VER
190 // MSVC (and icc on windows) on ia32 or x86-64
192 __cpuidex(cpuInfo
, level
, ecxval
);
193 *eax
= static_cast<unsigned int>(cpuInfo
[0]);
194 *ebx
= static_cast<unsigned int>(cpuInfo
[1]);
195 *ecx
= static_cast<unsigned int>(cpuInfo
[2]);
196 *edx
= static_cast<unsigned int>(cpuInfo
[3]);
201 // We are on x86, but without compiler support for cpuid if we get here
208 #endif // check for inline asm on x86
222 /*! \brief Detect x86 vendors by using the cpuid assembly instructions
224 * If support for the cpuid instruction is present, we check for Intel,
225 * AMD or Hygon vendors
227 * \return gmx::CpuInfo::Vendor::Intel, gmx::CpuInfo::Vendor::Amd,
228 * gmx::CpuInfl::Vendor::Hygon, . If neither Intel, Amd nor
229 * Hygon can be identified, or if the code fails to execute,
230 * gmx::CpuInfo::Vendor::Unknown is returned.
232 CpuInfo::Vendor
detectX86Vendor()
234 unsigned int eax
, ebx
, ecx
, edx
;
235 CpuInfo::Vendor v
= CpuInfo::Vendor::Unknown
;
237 if (executeX86CpuID(0x0, 0, &eax
, &ebx
, &ecx
, &edx
) == 0)
239 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
241 v
= CpuInfo::Vendor::Intel
; // ebx=='uneG', ecx=='letn', edx=='Ieni'
243 else if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
245 v
= CpuInfo::Vendor::Amd
; // ebx=='htuA', ecx=='DMAc', edx=='itne'
247 else if (ebx
== 0x6f677948 && ecx
== 0x656e6975 && edx
== 0x6e65476e)
249 v
= CpuInfo::Vendor::Hygon
; // ebx=='ogyH', ecx=='eniu', edx=='neGn'
255 /*! \brief Detect second AVX-512 FMA from the processor name
257 * Should only be called for processors already determined to support AVX-512.
259 * \param [in] brand x86 processor name
260 * \param [in] model x86 model
261 * \return True if second FMA present
263 bool detectProcCpuInfoSecondAvx512FMA(const std::string
& brand
, int model
)
269 if (brand
.find("Xeon") == 9)
271 // detect Silver or Bronze or specific models
272 if (brand
.find("Silver") == 17 || brand
.find("Bronze") == 17
273 || (brand
.find('W') == 17 && brand
.find('0') == 21) // detect Xeon W 210x
274 || (brand
.find('D') == 17 && brand
.find("21") == 19)) // detect Xeon D 2xxx
278 // detect Gold 5xxx - can be corrected once Cooper Lake is added
279 else if (brand
.find("Gold") == 17 && brand
.find('5') == 22)
281 return (brand
.find("53") == 22 || // detect Cooper Lake
282 brand
.find("22") == 24); // detect 5[12]22
287 // Cannon Lake client
293 if (model
== 0x7d || model
== 0x7e)
297 // This is the right default...
301 /*! \brief Simple utility function to set/clear feature in a set
303 * \param featureSet Pointer to the feature set to update
304 * \param feature The specific feature to set/clear
305 * \param registerValue Register value (returned from cpuid)
306 * \param bit Bit to check in registerValue. The feature will be
307 * added to the featureSet if this bit is set.
309 * \note Nothing is done if the bit is not set. In particular, this will not
310 * erase anything if the feature already exists in the set.
312 void setFeatureFromBit(std::set
<CpuInfo::Feature
>* featureSet
,
313 CpuInfo::Feature feature
,
314 unsigned int registerValue
,
317 if (registerValue
& (1 << bit
))
319 featureSet
->insert(feature
);
323 /*! \brief Process x86 cpuinfo features that are common to Intel and AMD CPUs
325 * \param[out] brand String where to write the x86 brand string
326 * \param[out] family Major version of processor
327 * \param[out] model Middle version of processor
328 * \param[out] stepping Minor version of processor
329 * \param[out] features Feature set where supported features are inserted
331 void detectX86Features(std::string
* brand
, int* family
, int* model
, int* stepping
, std::set
<CpuInfo::Feature
>* features
)
333 unsigned int eax
, ebx
, ecx
, edx
;
335 // Return if we cannot execute any levels
336 if (executeX86CpuID(0x0, 0, &eax
, &ebx
, &ecx
, &edx
) != 0)
340 unsigned int maxStdLevel
= eax
;
342 if (maxStdLevel
>= 0x1)
344 executeX86CpuID(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
346 *family
= ((eax
& 0x0ff00000) >> 20) + ((eax
& 0x00000f00) >> 8);
347 *model
= ((eax
& 0x000f0000) >> 12) + ((eax
& 0x000000f0) >> 4);
348 *stepping
= (eax
& 0x0000000f);
350 setFeatureFromBit(features
, CpuInfo::Feature::X86_Sse3
, ecx
, 0);
351 setFeatureFromBit(features
, CpuInfo::Feature::X86_Pclmuldq
, ecx
, 1);
352 setFeatureFromBit(features
, CpuInfo::Feature::X86_Ssse3
, ecx
, 9);
353 setFeatureFromBit(features
, CpuInfo::Feature::X86_Fma
, ecx
, 12);
354 setFeatureFromBit(features
, CpuInfo::Feature::X86_Cx16
, ecx
, 13);
355 setFeatureFromBit(features
, CpuInfo::Feature::X86_Pdcm
, ecx
, 15);
356 setFeatureFromBit(features
, CpuInfo::Feature::X86_Pcid
, ecx
, 17);
357 setFeatureFromBit(features
, CpuInfo::Feature::X86_Sse4_1
, ecx
, 19);
358 setFeatureFromBit(features
, CpuInfo::Feature::X86_Sse4_2
, ecx
, 20);
359 setFeatureFromBit(features
, CpuInfo::Feature::X86_X2Apic
, ecx
, 21);
360 setFeatureFromBit(features
, CpuInfo::Feature::X86_Popcnt
, ecx
, 23);
361 setFeatureFromBit(features
, CpuInfo::Feature::X86_Tdt
, ecx
, 24);
362 setFeatureFromBit(features
, CpuInfo::Feature::X86_Aes
, ecx
, 25);
363 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx
, ecx
, 28);
364 setFeatureFromBit(features
, CpuInfo::Feature::X86_F16C
, ecx
, 29);
365 setFeatureFromBit(features
, CpuInfo::Feature::X86_Rdrnd
, ecx
, 30);
367 setFeatureFromBit(features
, CpuInfo::Feature::X86_Pse
, edx
, 3);
368 setFeatureFromBit(features
, CpuInfo::Feature::X86_Msr
, edx
, 5);
369 setFeatureFromBit(features
, CpuInfo::Feature::X86_Cx8
, edx
, 8);
370 setFeatureFromBit(features
, CpuInfo::Feature::X86_Apic
, edx
, 9);
371 setFeatureFromBit(features
, CpuInfo::Feature::X86_Cmov
, edx
, 15);
372 setFeatureFromBit(features
, CpuInfo::Feature::X86_Clfsh
, edx
, 19);
373 setFeatureFromBit(features
, CpuInfo::Feature::X86_Mmx
, edx
, 23);
374 setFeatureFromBit(features
, CpuInfo::Feature::X86_Sse2
, edx
, 26);
375 setFeatureFromBit(features
, CpuInfo::Feature::X86_Htt
, edx
, 28);
378 // Check whether Hyper-threading is really possible to enable in the hardware,
379 // not just technically supported by this generation of processors
380 if ((features
->count(CpuInfo::Feature::X86_Htt
) != 0U) && maxStdLevel
>= 0x4)
382 executeX86CpuID(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
383 unsigned int maxLogicalCores
= (ebx
>> 16) & 0x0ff;
384 executeX86CpuID(0x4, 0, &eax
, &ebx
, &ecx
, &edx
);
385 unsigned int maxPhysicalCores
= ((eax
>> 26) & 0x3f) + 1;
386 if (maxLogicalCores
/ maxPhysicalCores
< 2)
388 features
->erase(CpuInfo::Feature::X86_Htt
);
392 if (executeX86CpuID(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
) != 0)
394 // No point in continuing if we don't support any extended levels
397 unsigned int maxExtLevel
= eax
;
399 if (maxExtLevel
>= 0x80000001)
401 executeX86CpuID(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
403 setFeatureFromBit(features
, CpuInfo::Feature::X86_Lahf
, ecx
, 0);
404 setFeatureFromBit(features
, CpuInfo::Feature::X86_Sse4A
, ecx
, 6);
405 setFeatureFromBit(features
, CpuInfo::Feature::X86_MisalignSse
, ecx
, 7);
406 setFeatureFromBit(features
, CpuInfo::Feature::X86_Xop
, ecx
, 11);
407 setFeatureFromBit(features
, CpuInfo::Feature::X86_Fma4
, ecx
, 16);
408 setFeatureFromBit(features
, CpuInfo::Feature::X86_PDPE1GB
, edx
, 26);
409 setFeatureFromBit(features
, CpuInfo::Feature::X86_Rdtscp
, edx
, 27);
412 if (maxExtLevel
>= 0x80000005)
414 // Get the x86 CPU brand string (3 levels, 16 bytes in each)
416 for (unsigned int level
= 0x80000002; level
< 0x80000005; level
++)
418 executeX86CpuID(level
, 0, &eax
, &ebx
, &ecx
, &edx
);
419 // Add eax, ebx, ecx, edx contents as 4 chars each to the brand string
420 brand
->append(reinterpret_cast<const char*>(&eax
), sizeof(eax
));
421 brand
->append(reinterpret_cast<const char*>(&ebx
), sizeof(ebx
));
422 brand
->append(reinterpret_cast<const char*>(&ecx
), sizeof(ecx
));
423 brand
->append(reinterpret_cast<const char*>(&edx
), sizeof(edx
));
428 if (maxStdLevel
>= 0x7)
430 executeX86CpuID(0x7, 0, &eax
, &ebx
, &ecx
, &edx
);
432 setFeatureFromBit(features
, CpuInfo::Feature::X86_Hle
, ebx
, 4);
433 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx2
, ebx
, 5);
434 setFeatureFromBit(features
, CpuInfo::Feature::X86_Rtm
, ebx
, 11);
435 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512F
, ebx
, 16);
436 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512PF
, ebx
, 26);
437 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512ER
, ebx
, 27);
438 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512CD
, ebx
, 28);
439 setFeatureFromBit(features
, CpuInfo::Feature::X86_Sha
, ebx
, 29);
440 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512BW
, ebx
, 30);
441 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512VL
, ebx
, 31);
443 executeX86CpuID(0x7, 0x1, &eax
, &ebx
, &ecx
, &edx
);
444 setFeatureFromBit(features
, CpuInfo::Feature::X86_Avx512BF16
, eax
, 5);
446 if (features
->count(CpuInfo::Feature::X86_Avx512F
) != 0)
448 // Only checking if the CPU supports AVX-512. There is no CPUID bit for this.
449 if (detectProcCpuInfoSecondAvx512FMA(*brand
, *model
))
451 features
->insert(CpuInfo::Feature::X86_Avx512secondFMA
);
457 if (maxExtLevel
>= 0x80000007)
459 executeX86CpuID(0x80000007, 0, &eax
, &ebx
, &ecx
, &edx
);
461 setFeatureFromBit(features
, CpuInfo::Feature::X86_NonstopTsc
, edx
, 8);
466 /*! \brief Return a vector with x86 APIC IDs for all threads
468 * \param haveX2Apic True if the processors supports x2APIC, otherwise vanilla APIC.
470 * \returns A new std::vector of unsigned integer APIC IDs, one for each
471 * logical processor in the system.
473 std::vector
<unsigned int> detectX86ApicIDs(bool gmx_unused haveX2Apic
)
475 std::vector
<unsigned int> apicID
;
477 // We cannot just ask for all APIC IDs, but must force execution on each
478 // hardware thread and extract the APIC id there.
479 #if HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF
480 unsigned int eax
, ebx
, ecx
, edx
;
481 unsigned int nApic
= sysconf(_SC_NPROCESSORS_ONLN
);
482 cpu_set_t saveCpuSet
;
484 sched_getaffinity(0, sizeof(cpu_set_t
), &saveCpuSet
);
486 for (unsigned int i
= 0; i
< nApic
; i
++)
489 sched_setaffinity(0, sizeof(cpu_set_t
), &cpuSet
);
492 executeX86CpuID(0xb, 0, &eax
, &ebx
, &ecx
, &edx
);
493 apicID
.push_back(edx
);
497 executeX86CpuID(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
498 apicID
.push_back(ebx
>> 24);
502 sched_setaffinity(0, sizeof(cpu_set_t
), &saveCpuSet
);
503 #elif GMX_NATIVE_WINDOWS
504 unsigned int eax
, ebx
, ecx
, edx
;
506 GetSystemInfo(&sysinfo
);
507 unsigned int nApic
= sysinfo
.dwNumberOfProcessors
;
508 unsigned int saveAffinity
= SetThreadAffinityMask(GetCurrentThread(), 1);
509 for (DWORD_PTR i
= 0; i
< nApic
; i
++)
511 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR
)1) << i
));
515 executeX86CpuID(0xb, 0, &eax
, &ebx
, &ecx
, &edx
);
516 apicID
.push_back(edx
);
520 executeX86CpuID(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
521 apicID
.push_back(ebx
>> 24);
524 SetThreadAffinityMask(GetCurrentThread(), saveAffinity
);
530 /*! \brief Utility to renumber indices extracted from APIC IDs
532 * \param v Vector with unsigned integer indices
534 * This routine returns the number of unique different elements found in the vector,
535 * and renumbers these starting from 0. For example, the vector {0,1,2,8,9,10,8,9,10,0,1,2}
536 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
537 * number of unique elements.
539 void renumberIndex(std::vector
<unsigned int>* v
)
541 std::vector
<unsigned int> sortedV(*v
);
542 std::sort(sortedV
.begin(), sortedV
.end());
544 std::vector
<unsigned int> uniqueSortedV(sortedV
);
545 auto it
= std::unique(uniqueSortedV
.begin(), uniqueSortedV
.end());
546 uniqueSortedV
.resize(std::distance(uniqueSortedV
.begin(), it
));
548 for (std::size_t i
= 0; i
< uniqueSortedV
.size(); i
++)
550 unsigned int val
= uniqueSortedV
[i
];
551 std::replace_if(v
->begin(), v
->end(), [val
](unsigned int& c
) -> bool { return c
== val
; },
552 static_cast<unsigned int>(i
));
556 /*! \brief The layout of the bits in the APIC ID */
559 unsigned int hwThreadBits
; //!< The number of least significant bits for hw-threads
560 unsigned int coreBits
; //!< The number of core bits following the hw-thread bits
563 /*! \brief Detect the APIC ID layout for x2APIC
565 ApicIdLayout
detectX2ApicIdLayout()
573 executeX86CpuID(0xb, 0, &eax
, &ebx
, &ecx
, &edx
);
574 layout
.hwThreadBits
= eax
& 0x1f;
575 executeX86CpuID(0xb, 1, &eax
, &ebx
, &ecx
, &edx
);
576 layout
.coreBits
= (eax
& 0x1f) - layout
.hwThreadBits
;
581 /*! \brief Detect the APIC ID layout for standard APIC or xAPIC on AMD
583 * \param[in] maxExtLevel The largest CPUID extended function input value supported by the processor implementation
585 ApicIdLayout
detectAmdApicIdLayout(unsigned int maxExtLevel
)
593 executeX86CpuID(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
594 int family
= ((eax
& 0x0ff00000) >> 20) + ((eax
& 0x00000f00) >> 8);
595 executeX86CpuID(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
596 bool haveExtendedTopology
= (ecx
& (1 << 22)) != 0U;
598 // NOTE: Here we assume 1 thread per core, unless we have family >= 17h
599 layout
.hwThreadBits
= 0;
600 if (family
>= 0x17 && haveExtendedTopology
&& maxExtLevel
>= 0x8000001e)
602 executeX86CpuID(0x8000001e, 1, &eax
, &ebx
, &ecx
, &edx
);
603 int numThreadsPerCore
= ((ebx
>> 8) & 0xff) + 1;
604 // NOTE: The AMD documentation only specifies the layout of apicid
605 // when we have 1 or 2 threads per core.
606 while (numThreadsPerCore
> (1 << layout
.hwThreadBits
))
608 layout
.hwThreadBits
++;
612 // Get number of core bits in apic ID - try modern extended method first
613 executeX86CpuID(0x80000008, 0, &eax
, &ebx
, &ecx
, &edx
);
614 layout
.coreBits
= (ecx
>> 12) & 0xf;
615 if (layout
.coreBits
== 0)
617 // Legacy method for old single/dual core AMD CPUs
619 while (i
>> layout
.coreBits
)
628 /*! \brief Try to detect basic CPU topology information using x86 cpuid
630 * If x2APIC support is present, this is our first choice, otherwise we
631 * attempt to use old vanilla APIC.
633 * \return A new vector of entries with socket, core, hwthread information
634 * for each logical processor.
636 std::vector
<CpuInfo::LogicalProcessor
> detectX86LogicalProcessors()
642 unsigned int maxStdLevel
;
643 unsigned int maxExtLevel
;
647 std::vector
<CpuInfo::LogicalProcessor
> logicalProcessors
;
649 // Find largest standard & extended level input values allowed
650 executeX86CpuID(0x0, 0, &eax
, &ebx
, &ecx
, &edx
);
652 executeX86CpuID(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
655 if (maxStdLevel
>= 0x1)
657 executeX86CpuID(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
658 haveX2Apic
= ((ecx
& (1 << 21)) != 0U) && maxStdLevel
>= 0xb;
659 haveApic
= ((edx
& (1 << 9)) != 0U) && maxExtLevel
>= 0x80000008;
667 if (haveX2Apic
|| haveApic
)
670 // Get bits for cores and hardware threads
673 layout
= detectX2ApicIdLayout();
677 if (detectX86Vendor() == CpuInfo::Vendor::Amd
|| detectX86Vendor() == CpuInfo::Vendor::Hygon
)
679 layout
= detectAmdApicIdLayout(maxExtLevel
);
681 if (layout
.hwThreadBits
> 1)
683 // At the time of writing this code we do not know what
684 // to do with more than 2 threads, so return empty.
685 return logicalProcessors
;
690 // We do not know the APIC ID layout, return empty.
691 return logicalProcessors
;
695 std::vector
<unsigned int> apicID
= detectX86ApicIDs(haveX2Apic
);
699 // APIC IDs can be buggy, and it is always a mess. Typically more bits are
700 // reserved than needed, and the numbers might not increment by 1 even in
701 // a single socket or core. Extract, renumber, and check that things make sense.
702 unsigned int hwThreadMask
= (1 << layout
.hwThreadBits
) - 1;
703 unsigned int coreMask
= (1 << layout
.coreBits
) - 1;
704 std::vector
<unsigned int> hwThreadRanks
;
705 std::vector
<unsigned int> coreRanks
;
706 std::vector
<unsigned int> socketRanks
;
708 for (auto a
: apicID
)
710 hwThreadRanks
.push_back(static_cast<int>(a
& hwThreadMask
));
711 coreRanks
.push_back(static_cast<int>((a
>> layout
.hwThreadBits
) & coreMask
));
712 socketRanks
.push_back(static_cast<int>(a
>> (layout
.coreBits
+ layout
.hwThreadBits
)));
715 renumberIndex(&hwThreadRanks
);
716 renumberIndex(&coreRanks
);
717 renumberIndex(&socketRanks
);
719 unsigned int hwThreadRankSize
=
720 1 + *std::max_element(hwThreadRanks
.begin(), hwThreadRanks
.end());
721 unsigned int coreRankSize
= 1 + *std::max_element(coreRanks
.begin(), coreRanks
.end());
722 unsigned int socketRankSize
= 1 + *std::max_element(socketRanks
.begin(), socketRanks
.end());
724 if (socketRankSize
* coreRankSize
* hwThreadRankSize
== apicID
.size())
726 // Alright, everything looks consistent, so put it in the result
727 for (std::size_t i
= 0; i
< apicID
.size(); i
++)
729 // While the internal APIC IDs are always unsigned integers, we also cast to
730 // plain integers for the externally exposed vectors, since that will make
731 // it possible to use '-1' for invalid entries in the future.
732 logicalProcessors
.push_back(
733 { int(socketRanks
[i
]), int(coreRanks
[i
]), int(hwThreadRanks
[i
]) });
738 return logicalProcessors
; // Will only have contents if everything worked
742 /******************************************************************************
744 * Generic Linux detection by parsing /proc/cpuinfo *
746 ******************************************************************************/
748 /*! \brief Parse /proc/cpuinfo into a simple string map
750 * This routine will read the contents of /proc/cpuinfo, and for each
751 * line that is not empty we will assign the (trimmed) string to the right of
752 * the colon as a key, and the left-hand side as the value in the map.
753 * For multi-processor systems where lines are repeated the latter lines will
754 * overwrite the first occurrence.
756 * \return New map with the contents. If the file is not available, the returned
759 std::map
<std::string
, std::string
> parseProcCpuInfo()
761 std::ifstream
procCpuInfo("/proc/cpuinfo");
763 std::map
<std::string
, std::string
> cpuInfo
;
765 while (std::getline(procCpuInfo
, line
))
769 std::stringstream
iss(line
);
772 std::getline(iss
, key
, ':'); // part before colon
773 std::getline(iss
, val
); // part after colon
776 // put it in the map. This will overwrite previous processors, but we don't care.
784 /*! \brief Try to detect vendor from /proc/cpuinfo
786 * \param cpuInfo Map returned from parseProcCpuinfo()
788 * This routine tries to match a few common labels in /proc/cpuinfo to see if
789 * they begin with the name of a standard vendor. If the file cannot be read
790 * or if no match is found, we return gmx::CpuInfo::Vendor::Unknown.
792 CpuInfo::Vendor
detectProcCpuInfoVendor(const std::map
<std::string
, std::string
>& cpuInfo
)
794 const std::map
<std::string
, CpuInfo::Vendor
> testVendors
= {
795 { "GenuineIntel", CpuInfo::Vendor::Intel
},
796 { "Intel", CpuInfo::Vendor::Intel
},
797 { "AuthenticAmd", CpuInfo::Vendor::Amd
},
798 { "AMD", CpuInfo::Vendor::Amd
},
799 { "ARM", CpuInfo::Vendor::Arm
},
800 { "AArch64", CpuInfo::Vendor::Arm
},
801 { "Fujitsu", CpuInfo::Vendor::Fujitsu
},
802 { "IBM", CpuInfo::Vendor::Ibm
},
803 { "POWER", CpuInfo::Vendor::Ibm
},
804 { "Oracle", CpuInfo::Vendor::Oracle
},
805 { "HygonGenuine", CpuInfo::Vendor::Hygon
},
806 { "Hygon", CpuInfo::Vendor::Hygon
},
809 // For each label in /proc/cpuinfo, compare the value to the name in the
810 // testNames map above, and if it's a match return the vendor.
811 for (auto& l
: { "vendor_id", "vendor", "manufacture", "model", "processor", "cpu" })
813 if (cpuInfo
.count(l
) != 0U)
815 // there was a line with this left-hand side in /proc/cpuinfo
816 const std::string
& s1
= cpuInfo
.at(l
);
818 for (auto& t
: testVendors
)
820 const std::string
& s2
= t
.first
;
822 // If the entire name we are testing (s2) matches the first part of
823 // the string after the colon in /proc/cpuinfo (s1) we found our vendor
824 if (std::equal(s2
.begin(), s2
.end(), s1
.begin(), [](const char& x
, const char& y
) -> bool {
825 return tolower(x
) == tolower(y
);
833 return CpuInfo::Vendor::Unknown
;
837 /*! \brief Detect IBM processor name and features from /proc/cpuinfo
839 * \param cpuInfo Map returned from parseProcCpuinfo()
840 * \param[out] brand String where to write the brand string
841 * \param[out] features Feature set where supported features are inserted
843 * This routine tries to match a few common labels in /proc/cpuinfo to see if
844 * we can find the processor name and features. It is likely fragile.
846 void detectProcCpuInfoIbm(const std::map
<std::string
, std::string
>& cpuInfo
,
848 std::set
<CpuInfo::Feature
>* features
)
850 // Get brand string from 'cpu' label if present, otherwise 'Processor'
851 if (cpuInfo
.count("cpu") != 0U)
853 *brand
= cpuInfo
.at("cpu");
855 else if (cpuInfo
.count("Processor") != 0U)
857 *brand
= cpuInfo
.at("Processor");
860 if (brand
->find("A2") != std::string::npos
)
862 // If the processor identification contains "A2", this is BlueGene/Q with QPX
863 features
->insert(CpuInfo::Feature::Ibm_Qpx
);
866 for (auto& l
: { "model name", "model", "Processor", "cpu" })
868 if (cpuInfo
.count(l
) != 0U)
870 std::string s1
= cpuInfo
.at(l
);
871 std::transform(s1
.begin(), s1
.end(), s1
.begin(), ::tolower
);
873 if (s1
.find("altivec") != std::string::npos
)
875 features
->insert(CpuInfo::Feature::Ibm_Vmx
);
876 // If this is a power6, we only have VMX. All later processors have VSX.
877 if (s1
.find("power6") == std::string::npos
)
879 features
->insert(CpuInfo::Feature::Ibm_Vsx
);
887 /*! \brief Detect ARM processor name and features from /proc/cpuinfo
889 * \param cpuInfo Map returned from parseProcCpuinfo()
890 * \param[out] brand String where to write the brand string
891 * \param[out] family Major version of processor
892 * \param[out] model Middle version of processor
893 * \param[out] stepping Minor version of processor
894 * \param[out] features Feature set where supported features are inserted
896 * This routine tries to match a few common labels in /proc/cpuinfo to see if
897 * we can find the processor name and features. It is likely fragile.
899 void detectProcCpuInfoArm(const std::map
<std::string
, std::string
>& cpuInfo
,
904 std::set
<CpuInfo::Feature
>* features
)
906 if (cpuInfo
.count("Processor") != 0U)
908 *brand
= cpuInfo
.at("Processor");
910 else if (cpuInfo
.count("model name") != 0U)
912 *brand
= cpuInfo
.at("model name");
915 if (cpuInfo
.count("CPU architecture") != 0U)
917 *family
= std::strtol(cpuInfo
.at("CPU architecture").c_str(), nullptr, 10);
918 // For some 64-bit CPUs it appears to say 'AArch64' instead
919 if (*family
== 0 && cpuInfo
.at("CPU architecture").find("AArch64") != std::string::npos
)
921 *family
= 8; // fragile - no idea how a future ARMv9 will be represented in this case
924 if (cpuInfo
.count("CPU variant") != 0U)
926 *model
= std::strtol(cpuInfo
.at("CPU variant").c_str(), nullptr, 16);
928 if (cpuInfo
.count("CPU revision") != 0U)
930 *stepping
= std::strtol(cpuInfo
.at("CPU revision").c_str(), nullptr, 10);
933 if (cpuInfo
.count("Features") != 0U)
935 const std::string
& s
= cpuInfo
.at("Features");
936 if (s
.find("neon") != std::string::npos
)
938 features
->insert(CpuInfo::Feature::Arm_Neon
);
940 if (s
.find("asimd") != std::string::npos
)
942 // At least Jetson TX1 runs a 32-bit environment by default, although
943 // the kernel is 64-bits, and reports asimd feature flags. We cannot
944 // use Neon-asimd in this case, so make sure we are on a 64-bit platform.
945 if (sizeof(void*) == 8)
947 features
->insert(CpuInfo::Feature::Arm_NeonAsimd
);
950 if (s
.find("sve") != std::string::npos
)
952 features
->insert(CpuInfo::Feature::Arm_Sve
);
958 /*! \brief Try to detect vendor, cpu and features from /proc/cpuinfo
960 * \param[out] vendor Detected hardware vendor
961 * \param[out] brand String where to write the brand string
962 * \param[out] family Major version of processor
963 * \param[out] model Middle version of processor
964 * \param[out] stepping Minor version of processor
965 * \param[out] features Feature set where supported features are inserted
967 * This routine reads the /proc/cpuinfo file into a map and calls subroutines
968 * that attempt to parse by matching keys and values to known strings. It is
969 * much more fragile than our x86 detection, but it does not depend on
970 * specific system calls, intrinsics or assembly instructions.
972 void detectProcCpuInfo(CpuInfo::Vendor
* vendor
,
977 std::set
<CpuInfo::Feature
>* features
)
979 std::map
<std::string
, std::string
> cpuInfo
= parseProcCpuInfo();
981 if (*vendor
== CpuInfo::Vendor::Unknown
)
983 *vendor
= detectProcCpuInfoVendor(cpuInfo
);
986 // Unfortunately there is no standard for contents in /proc/cpuinfo. We cannot
987 // indiscriminately look for e.g. 'cpu' since it could be either name or an index.
988 // To handle this slightly better we use one subroutine per vendor.
991 case CpuInfo::Vendor::Ibm
: detectProcCpuInfoIbm(cpuInfo
, brand
, features
); break;
993 case CpuInfo::Vendor::Arm
:
994 detectProcCpuInfoArm(cpuInfo
, brand
, family
, model
, stepping
, features
);
998 // We only have a single check for fujitsu for now
1000 features
->insert(CpuInfo::Feature::Fujitsu_HpcAce
);
1010 CpuInfo
CpuInfo::detect()
1014 if (c_architecture
== Architecture::X86
)
1016 result
.vendor_
= detectX86Vendor();
1018 if (result
.vendor_
== CpuInfo::Vendor::Intel
)
1020 result
.features_
.insert(CpuInfo::Feature::X86_Intel
);
1022 else if (result
.vendor_
== CpuInfo::Vendor::Amd
)
1024 result
.features_
.insert(CpuInfo::Feature::X86_Amd
);
1026 else if (result
.vendor_
== CpuInfo::Vendor::Hygon
)
1028 result
.features_
.insert(CpuInfo::Feature::X86_Hygon
);
1030 detectX86Features(&result
.brandString_
, &result
.family_
, &result
.model_
, &result
.stepping_
,
1032 result
.logicalProcessors_
= detectX86LogicalProcessors();
1037 if (c_architecture
== Architecture::Arm
)
1039 result
.vendor_
= CpuInfo::Vendor::Arm
;
1041 else if (c_architecture
== Architecture::PowerPC
)
1043 result
.vendor_
= CpuInfo::Vendor::Ibm
;
1046 #if defined __aarch64__ || (defined _M_ARM && _M_ARM >= 8)
1047 result
.features_
.insert(Feature::Arm_Neon
); // ARMv8 always has Neon
1048 result
.features_
.insert(Feature::Arm_NeonAsimd
); // ARMv8 always has Neon-asimd
1050 #if defined __arch64__ && defined __ARM_FEATURE_SVE
1051 result
.features_
.insert(Feature::Arm_Sve
);
1055 result
.vendor_
= CpuInfo::Vendor::Oracle
;
1058 // On Linux we might be able to find information in /proc/cpuinfo. If vendor or brand
1059 // is set to a known value this routine will not overwrite it.
1060 detectProcCpuInfo(&result
.vendor_
, &result
.brandString_
, &result
.family_
, &result
.model_
,
1061 &result
.stepping_
, &result
.features_
);
1064 if (!result
.logicalProcessors_
.empty())
1066 result
.supportLevel_
= CpuInfo::SupportLevel::LogicalProcessorInfo
;
1068 else if (!result
.features_
.empty())
1070 result
.supportLevel_
= CpuInfo::SupportLevel::Features
;
1072 else if (result
.vendor_
!= CpuInfo::Vendor::Unknown
1073 || result
.brandString_
!= "Unknown CPU brand")
1075 result
.supportLevel_
= CpuInfo::SupportLevel::Name
;
1079 result
.supportLevel_
= CpuInfo::SupportLevel::None
;
1085 CpuInfo::CpuInfo() :
1086 vendor_(CpuInfo::Vendor::Unknown
),
1087 brandString_("Unknown CPU brand"),
1094 const std::string
& CpuInfo::vendorString() const
1096 static const std::map
<Vendor
, std::string
> vendorStrings
= {
1097 { Vendor::Unknown
, "Unknown vendor" }, { Vendor::Intel
, "Intel" }, { Vendor::Amd
, "AMD" },
1098 { Vendor::Fujitsu
, "Fujitsu" }, { Vendor::Ibm
, "IBM" }, { Vendor::Arm
, "ARM" },
1099 { Vendor::Oracle
, "Oracle" }, { Vendor::Hygon
, "Hygon" },
1102 return vendorStrings
.at(vendor_
);
1106 const std::string
& CpuInfo::featureString(Feature f
)
1108 static const std::map
<Feature
, std::string
> featureStrings
= {
1109 { Feature::X86_Aes
, "aes" },
1110 { Feature::X86_Amd
, "amd" },
1111 { Feature::X86_Apic
, "apic" },
1112 { Feature::X86_Avx
, "avx" },
1113 { Feature::X86_Avx2
, "avx2" },
1114 { Feature::X86_Avx512F
, "avx512f" },
1115 { Feature::X86_Avx512PF
, "avx512pf" },
1116 { Feature::X86_Avx512ER
, "avx512er" },
1117 { Feature::X86_Avx512CD
, "avx512cd" },
1118 { Feature::X86_Avx512BW
, "avx512bw" },
1119 { Feature::X86_Avx512VL
, "avx512vl" },
1120 { Feature::X86_Avx512BF16
, "avx512bf16" },
1121 { Feature::X86_Avx512secondFMA
, "avx512secondFMA" },
1122 { Feature::X86_Clfsh
, "clfsh" },
1123 { Feature::X86_Cmov
, "cmov" },
1124 { Feature::X86_Cx8
, "cx8" },
1125 { Feature::X86_Cx16
, "cx16" },
1126 { Feature::X86_F16C
, "f16c" },
1127 { Feature::X86_Fma
, "fma" },
1128 { Feature::X86_Fma4
, "fma4" },
1129 { Feature::X86_Hle
, "hle" },
1130 { Feature::X86_Htt
, "htt" },
1131 { Feature::X86_Intel
, "intel" },
1132 { Feature::X86_Lahf
, "lahf" },
1133 { Feature::X86_MisalignSse
, "misalignsse" },
1134 { Feature::X86_Mmx
, "mmx" },
1135 { Feature::X86_Msr
, "msr" },
1136 { Feature::X86_NonstopTsc
, "nonstop_tsc" },
1137 { Feature::X86_Pcid
, "pcid" },
1138 { Feature::X86_Pclmuldq
, "pclmuldq" },
1139 { Feature::X86_Pdcm
, "pdcm" },
1140 { Feature::X86_PDPE1GB
, "pdpe1gb" },
1141 { Feature::X86_Popcnt
, "popcnt" },
1142 { Feature::X86_Pse
, "pse" },
1143 { Feature::X86_Rdrnd
, "rdrnd" },
1144 { Feature::X86_Rdtscp
, "rdtscp" },
1145 { Feature::X86_Rtm
, "rtm" },
1146 { Feature::X86_Sha
, "sha" },
1147 { Feature::X86_Sse2
, "sse2" },
1148 { Feature::X86_Sse3
, "sse3" },
1149 { Feature::X86_Sse4A
, "sse4a" },
1150 { Feature::X86_Sse4_1
, "sse4.1" },
1151 { Feature::X86_Sse4_2
, "sse4.2" },
1152 { Feature::X86_Ssse3
, "ssse3" },
1153 { Feature::X86_Tdt
, "tdt" },
1154 { Feature::X86_X2Apic
, "x2apic" },
1155 { Feature::X86_Xop
, "xop" },
1156 { Feature::Arm_Neon
, "neon" },
1157 { Feature::Arm_NeonAsimd
, "neon_asimd" },
1158 { Feature::Arm_Sve
, "sve" },
1159 { Feature::Ibm_Qpx
, "qpx" },
1160 { Feature::Ibm_Vmx
, "vmx" },
1161 { Feature::Ibm_Vsx
, "vsx" },
1162 { Feature::Fujitsu_HpcAce
, "hpc-ace" },
1163 { Feature::X86_Hygon
, "hygon" }
1165 return featureStrings
.at(f
);
1169 bool cpuIsX86Nehalem(const CpuInfo
& cpuInfo
)
1171 return (cpuInfo
.vendor() == CpuInfo::Vendor::Intel
&& cpuInfo
.family() == 6
1172 && (cpuInfo
.model() == 0x2E || cpuInfo
.model() == 0x1A || cpuInfo
.model() == 0x1E
1173 || cpuInfo
.model() == 0x2F || cpuInfo
.model() == 0x2C || cpuInfo
.model() == 0x25));
1176 bool cpuIsAmdZen1(const CpuInfo
& cpuInfo
)
1178 /* Both Zen/Zen+/Zen2 have family==23
1179 * Model numbers for Zen:
1180 * 1) Naples, Whitehaven, Summit Ridge, and Snowy Owl;
1182 * Model numbers for Zen+:
1183 * 8) Pinnacle Ridge;
1185 * Hygon got license for Zen1, but not Zen2 (https://www.tomshardware.com/news/amd-zen-china-x86-ip-license,39573.html)
1187 return (cpuInfo
.vendor() == CpuInfo::Vendor::Amd
&& cpuInfo
.family() == 23
1188 && (cpuInfo
.model() == 1 || cpuInfo
.model() == 17 || cpuInfo
.model() == 8
1189 || cpuInfo
.model() == 24))
1190 || (cpuInfo
.vendor() == CpuInfo::Vendor::Hygon
);
1195 #ifdef GMX_CPUINFO_STANDALONE
1196 int main(int argc
, char** argv
)
1201 "Usage:\n\n%s [flags]\n\n"
1202 "Available flags:\n"
1203 "-vendor Print CPU vendor.\n"
1204 "-brand Print CPU brand string.\n"
1205 "-family Print CPU family version.\n"
1206 "-model Print CPU model version.\n"
1207 "-stepping Print CPU stepping version.\n"
1208 "-features Print CPU feature flags.\n",
1213 std::string
arg(argv
[1]);
1214 gmx::CpuInfo
cpuInfo(gmx::CpuInfo::detect());
1216 if (arg
== "-vendor")
1218 printf("%s\n", cpuInfo
.vendorString().c_str());
1220 else if (arg
== "-brand")
1222 printf("%s\n", cpuInfo
.brandString().c_str());
1224 else if (arg
== "-family")
1226 printf("%d\n", cpuInfo
.family());
1228 else if (arg
== "-model")
1230 printf("%d\n", cpuInfo
.model());
1232 else if (arg
== "-stepping")
1234 printf("%d\n", cpuInfo
.stepping());
1236 else if (arg
== "-features")
1238 // Separate the feature strings with spaces. Note that in the
1239 // GROMACS cmake code, surrounding whitespace is first
1240 // stripped by the CPU detection routine, and then added back
1241 // in the code for making the SIMD suggestion.
1242 for (auto& f
: cpuInfo
.featureSet())
1244 printf("%s ", cpuInfo
.featureString(f
).c_str());
1248 else if (arg
== "-topology")
1250 // Undocumented debug option, usually not present in standalone version
1251 for (auto& t
: cpuInfo
.logicalProcessors())
1253 printf("%3u %3u %3u\n", t
.socketRankInMachine
, t
.coreRankInSocket
, t
.hwThreadRankInCore
);