1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library for x86 and
12 // __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
14 //===----------------------------------------------------------------------===//
16 #ifndef __has_attribute
17 #define __has_attribute(attr) 0
20 #if __has_attribute(constructor)
22 // Ordinarily init priorities below 101 are disallowed as they are reserved for the
23 // implementation. However, we are the implementation, so silence the diagnostic,
24 // since it doesn't apply to us.
25 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
27 // We're choosing init priority 90 to force our constructors to run before any
28 // constructors in the end user application (starting at priority 101). This value
29 // matches the libgcc choice for the same functions.
30 #define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
32 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
33 // this runs during initialization.
34 #define CONSTRUCTOR_ATTRIBUTE
37 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
39 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
51 enum VendorSignatures
{
52 SIG_INTEL
= 0x756e6547, // Genu
53 SIG_AMD
= 0x68747541, // Auth
56 enum ProcessorVendors
{
85 enum ProcessorSubtypes
{
86 INTEL_COREI7_NEHALEM
= 1,
87 INTEL_COREI7_WESTMERE
,
88 INTEL_COREI7_SANDYBRIDGE
,
97 INTEL_COREI7_IVYBRIDGE
,
99 INTEL_COREI7_BROADWELL
,
100 INTEL_COREI7_SKYLAKE
,
101 INTEL_COREI7_SKYLAKE_AVX512
,
102 INTEL_COREI7_CANNONLAKE
,
103 INTEL_COREI7_ICELAKE_CLIENT
,
104 INTEL_COREI7_ICELAKE_SERVER
,
106 INTEL_COREI7_CASCADELAKE
,
107 INTEL_COREI7_TIGERLAKE
,
108 INTEL_COREI7_COOPERLAKE
,
109 INTEL_COREI7_SAPPHIRERAPIDS
,
110 INTEL_COREI7_ALDERLAKE
,
112 INTEL_COREI7_ROCKETLAKE
,
113 ZHAOXIN_FAM7H_LUJIAZUI
,
115 INTEL_COREI7_GRANITERAPIDS
,
119 enum ProcessorFeatures
{
148 FEATURE_AVX5124VNNIW
,
149 FEATURE_AVX5124FMAPS
,
150 FEATURE_AVX512VPOPCNTDQ
,
155 FEATURE_AVX512BITALG
,
157 FEATURE_AVX512VP2INTERSECT
,
161 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
162 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
163 // support. Consequently, for i386, the presence of CPUID is checked first
164 // via the corresponding eflags bit.
165 static bool isCpuIdSupported(void) {
166 #if defined(__GNUC__) || defined(__clang__)
167 #if defined(__i386__)
168 int __cpuid_supported
;
171 " movl %%eax,%%ecx\n"
172 " xorl $0x00200000,%%eax\n"
178 " cmpl %%eax,%%ecx\n"
182 : "=r"(__cpuid_supported
)
185 if (!__cpuid_supported
)
193 // This code is copied from lib/Support/Host.cpp.
194 // Changes to either file should be mirrored in the other.
196 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
197 /// the specified arguments. If we can't run cpuid on the host, return true.
198 static bool getX86CpuIDAndInfo(unsigned value
, unsigned *rEAX
, unsigned *rEBX
,
199 unsigned *rECX
, unsigned *rEDX
) {
200 #if defined(__GNUC__) || defined(__clang__)
201 #if defined(__x86_64__)
202 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
203 // FIXME: should we save this for Clang?
204 __asm__("movq\t%%rbx, %%rsi\n\t"
206 "xchgq\t%%rbx, %%rsi\n\t"
207 : "=a"(*rEAX
), "=S"(*rEBX
), "=c"(*rECX
), "=d"(*rEDX
)
210 #elif defined(__i386__)
211 __asm__("movl\t%%ebx, %%esi\n\t"
213 "xchgl\t%%ebx, %%esi\n\t"
214 : "=a"(*rEAX
), "=S"(*rEBX
), "=c"(*rECX
), "=d"(*rEDX
)
220 #elif defined(_MSC_VER)
221 // The MSVC intrinsic is portable across x86 and x64.
223 __cpuid(registers
, value
);
224 *rEAX
= registers
[0];
225 *rEBX
= registers
[1];
226 *rECX
= registers
[2];
227 *rEDX
= registers
[3];
234 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
235 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
237 static bool getX86CpuIDAndInfoEx(unsigned value
, unsigned subleaf
,
238 unsigned *rEAX
, unsigned *rEBX
, unsigned *rECX
,
240 #if defined(__GNUC__) || defined(__clang__)
241 #if defined(__x86_64__)
242 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
243 // FIXME: should we save this for Clang?
244 __asm__("movq\t%%rbx, %%rsi\n\t"
246 "xchgq\t%%rbx, %%rsi\n\t"
247 : "=a"(*rEAX
), "=S"(*rEBX
), "=c"(*rECX
), "=d"(*rEDX
)
248 : "a"(value
), "c"(subleaf
));
250 #elif defined(__i386__)
251 __asm__("movl\t%%ebx, %%esi\n\t"
253 "xchgl\t%%ebx, %%esi\n\t"
254 : "=a"(*rEAX
), "=S"(*rEBX
), "=c"(*rECX
), "=d"(*rEDX
)
255 : "a"(value
), "c"(subleaf
));
260 #elif defined(_MSC_VER)
262 __cpuidex(registers
, value
, subleaf
);
263 *rEAX
= registers
[0];
264 *rEBX
= registers
[1];
265 *rECX
= registers
[2];
266 *rEDX
= registers
[3];
273 // Read control register 0 (XCR0). Used to detect features such as AVX.
274 static bool getX86XCR0(unsigned *rEAX
, unsigned *rEDX
) {
275 #if defined(__GNUC__) || defined(__clang__)
276 // Check xgetbv; this uses a .byte sequence instead of the instruction
277 // directly because older assemblers do not include support for xgetbv and
278 // there is no easy way to conditionally compile based on the assembler used.
279 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX
), "=d"(*rEDX
) : "c"(0));
281 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
282 unsigned long long Result
= _xgetbv(_XCR_XFEATURE_ENABLED_MASK
);
284 *rEDX
= Result
>> 32;
291 static void detectX86FamilyModel(unsigned EAX
, unsigned *Family
,
293 *Family
= (EAX
>> 8) & 0xf; // Bits 8 - 11
294 *Model
= (EAX
>> 4) & 0xf; // Bits 4 - 7
295 if (*Family
== 6 || *Family
== 0xf) {
297 // Examine extended family ID if family ID is F.
298 *Family
+= (EAX
>> 20) & 0xff; // Bits 20 - 27
299 // Examine extended model ID if family ID is 6 or F.
300 *Model
+= ((EAX
>> 16) & 0xf) << 4; // Bits 16 - 19
305 getIntelProcessorTypeAndSubtype(unsigned Family
, unsigned Model
,
306 const unsigned *Features
,
307 unsigned *Type
, unsigned *Subtype
) {
308 #define testFeature(F) \
309 (Features[F / 32] & (1 << (F % 32))) != 0
311 // We select CPU strings to match the code in Host.cpp, but we don't use them
318 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
319 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
320 // mobile processor, Intel Core 2 Extreme processor, Intel
321 // Pentium Dual-Core processor, Intel Xeon processor, model
322 // 0Fh. All processors are manufactured using the 65 nm process.
323 case 0x16: // Intel Celeron processor model 16h. All processors are
324 // manufactured using the 65 nm process
328 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
329 // 17h. All processors are manufactured using the 45 nm process.
331 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
332 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
333 // the 45 nm process.
337 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
338 // processors are manufactured using the 45 nm process.
339 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
340 // As found in a Summer 2010 model iMac.
342 case 0x2e: // Nehalem EX
344 *Type
= INTEL_COREI7
;
345 *Subtype
= INTEL_COREI7_NEHALEM
;
347 case 0x25: // Intel Core i7, laptop version.
348 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
349 // processors are manufactured using the 32 nm process.
350 case 0x2f: // Westmere EX
352 *Type
= INTEL_COREI7
;
353 *Subtype
= INTEL_COREI7_WESTMERE
;
355 case 0x2a: // Intel Core i7 processor. All processors are manufactured
356 // using the 32 nm process.
359 *Type
= INTEL_COREI7
;
360 *Subtype
= INTEL_COREI7_SANDYBRIDGE
;
363 case 0x3e: // Ivy Bridge EP
365 *Type
= INTEL_COREI7
;
366 *Subtype
= INTEL_COREI7_IVYBRIDGE
;
375 *Type
= INTEL_COREI7
;
376 *Subtype
= INTEL_COREI7_HASWELL
;
385 *Type
= INTEL_COREI7
;
386 *Subtype
= INTEL_COREI7_BROADWELL
;
390 case 0x4e: // Skylake mobile
391 case 0x5e: // Skylake desktop
392 case 0x8e: // Kaby Lake mobile
393 case 0x9e: // Kaby Lake desktop
394 case 0xa5: // Comet Lake-H/S
395 case 0xa6: // Comet Lake-U
397 *Type
= INTEL_COREI7
;
398 *Subtype
= INTEL_COREI7_SKYLAKE
;
404 *Type
= INTEL_COREI7
;
405 *Subtype
= INTEL_COREI7_ROCKETLAKE
;
410 *Type
= INTEL_COREI7
;
411 if (testFeature(FEATURE_AVX512BF16
)) {
413 *Subtype
= INTEL_COREI7_COOPERLAKE
;
414 } else if (testFeature(FEATURE_AVX512VNNI
)) {
416 *Subtype
= INTEL_COREI7_CASCADELAKE
;
418 CPU
= "skylake-avx512";
419 *Subtype
= INTEL_COREI7_SKYLAKE_AVX512
;
426 *Type
= INTEL_COREI7
;
427 *Subtype
= INTEL_COREI7_CANNONLAKE
;
433 CPU
= "icelake-client";
434 *Type
= INTEL_COREI7
;
435 *Subtype
= INTEL_COREI7_ICELAKE_CLIENT
;
442 *Type
= INTEL_COREI7
;
443 *Subtype
= INTEL_COREI7_TIGERLAKE
;
455 *Type
= INTEL_COREI7
;
456 *Subtype
= INTEL_COREI7_ALDERLAKE
;
462 CPU
= "icelake-server";
463 *Type
= INTEL_COREI7
;
464 *Subtype
= INTEL_COREI7_ICELAKE_SERVER
;
471 CPU
= "sapphirerapids";
472 *Type
= INTEL_COREI7
;
473 *Subtype
= INTEL_COREI7_SAPPHIRERAPIDS
;
479 CPU
= "graniterapids";
480 *Type
= INTEL_COREI7
;
481 *Subtype
= INTEL_COREI7_GRANITERAPIDS
;
484 case 0x1c: // Most 45 nm Intel Atom processors
485 case 0x26: // 45 nm Atom Lincroft
486 case 0x27: // 32 nm Atom Medfield
487 case 0x35: // 32 nm Atom Midview
488 case 0x36: // 32 nm Atom Midview
490 *Type
= INTEL_BONNELL
;
493 // Atom Silvermont codes from the Intel software optimization guide.
499 case 0x4c: // really airmont
501 *Type
= INTEL_SILVERMONT
;
504 case 0x5c: // Apollo Lake
505 case 0x5f: // Denverton
507 *Type
= INTEL_GOLDMONT
;
510 CPU
= "goldmont-plus";
511 *Type
= INTEL_GOLDMONT_PLUS
;
515 *Type
= INTEL_TREMONT
;
520 CPU
= "sierraforest";
521 *Type
= INTEL_SIERRAFOREST
;
527 *Type
= INTEL_GRANDRIDGE
;
540 default: // Unknown family 6 CPU.
552 getAMDProcessorTypeAndSubtype(unsigned Family
, unsigned Model
,
553 const unsigned *Features
,
554 unsigned *Type
, unsigned *Subtype
) {
555 // We select CPU strings to match the code in Host.cpp, but we don't use them
565 *Subtype
= AMDFAM10H_BARCELONA
;
568 *Subtype
= AMDFAM10H_SHANGHAI
;
571 *Subtype
= AMDFAM10H_ISTANBUL
;
582 if (Model
>= 0x60 && Model
<= 0x7f) {
584 *Subtype
= AMDFAM15H_BDVER4
;
585 break; // 60h-7Fh: Excavator
587 if (Model
>= 0x30 && Model
<= 0x3f) {
589 *Subtype
= AMDFAM15H_BDVER3
;
590 break; // 30h-3Fh: Steamroller
592 if ((Model
>= 0x10 && Model
<= 0x1f) || Model
== 0x02) {
594 *Subtype
= AMDFAM15H_BDVER2
;
595 break; // 02h, 10h-1Fh: Piledriver
598 *Subtype
= AMDFAM15H_BDVER1
;
599 break; // 00h-0Fh: Bulldozer
609 if ((Model
>= 0x30 && Model
<= 0x3f) || Model
== 0x71) {
611 *Subtype
= AMDFAM17H_ZNVER2
;
612 break; // 30h-3fh, 71h: Zen2
615 *Subtype
= AMDFAM17H_ZNVER1
;
616 break; // 00h-0Fh: Zen1
622 if (Model
<= 0x0f || (Model
>= 0x20 && Model
<= 0x5f)) {
623 // Family 19h Models 00h-0Fh - Zen3
624 // Family 19h Models 20h-2Fh - Zen3
625 // Family 19h Models 30h-3Fh - Zen3
626 // Family 19h Models 40h-4Fh - Zen3+
627 // Family 19h Models 50h-5Fh - Zen3+
628 *Subtype
= AMDFAM19H_ZNVER3
;
631 if ((Model
>= 0x10 && Model
<= 0x1f) ||
632 (Model
>= 0x60 && Model
<= 0x74) ||
633 (Model
>= 0x78 && Model
<= 0x7b) ||
634 (Model
>= 0xA0 && Model
<= 0xAf)) {
636 *Subtype
= AMDFAM19H_ZNVER4
;
641 break; // Unknown AMD CPU.
647 static void getAvailableFeatures(unsigned ECX
, unsigned EDX
, unsigned MaxLeaf
,
648 unsigned *Features
) {
649 unsigned EAX
= 0, EBX
= 0;
651 #define setFeature(F) \
652 Features[F / 32] |= 1U << (F % 32)
655 setFeature(FEATURE_CMOV
);
657 setFeature(FEATURE_MMX
);
659 setFeature(FEATURE_SSE
);
661 setFeature(FEATURE_SSE2
);
664 setFeature(FEATURE_SSE3
);
666 setFeature(FEATURE_PCLMUL
);
668 setFeature(FEATURE_SSSE3
);
670 setFeature(FEATURE_FMA
);
672 setFeature(FEATURE_SSE4_1
);
674 setFeature(FEATURE_SSE4_2
);
676 setFeature(FEATURE_POPCNT
);
678 setFeature(FEATURE_AES
);
680 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
681 // indicates that the AVX registers will be saved and restored on context
682 // switch, then we have full AVX support.
683 const unsigned AVXBits
= (1 << 27) | (1 << 28);
684 bool HasAVX
= ((ECX
& AVXBits
) == AVXBits
) && !getX86XCR0(&EAX
, &EDX
) &&
685 ((EAX
& 0x6) == 0x6);
686 #if defined(__APPLE__)
687 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
688 // save the AVX512 context if we use AVX512 instructions, even the bit is not
690 bool HasAVX512Save
= true;
692 // AVX512 requires additional context to be saved by the OS.
693 bool HasAVX512Save
= HasAVX
&& ((EAX
& 0xe0) == 0xe0);
697 setFeature(FEATURE_AVX
);
700 MaxLeaf
>= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX
, &EBX
, &ECX
, &EDX
);
702 if (HasLeaf7
&& ((EBX
>> 3) & 1))
703 setFeature(FEATURE_BMI
);
704 if (HasLeaf7
&& ((EBX
>> 5) & 1) && HasAVX
)
705 setFeature(FEATURE_AVX2
);
706 if (HasLeaf7
&& ((EBX
>> 8) & 1))
707 setFeature(FEATURE_BMI2
);
708 if (HasLeaf7
&& ((EBX
>> 16) & 1) && HasAVX512Save
)
709 setFeature(FEATURE_AVX512F
);
710 if (HasLeaf7
&& ((EBX
>> 17) & 1) && HasAVX512Save
)
711 setFeature(FEATURE_AVX512DQ
);
712 if (HasLeaf7
&& ((EBX
>> 21) & 1) && HasAVX512Save
)
713 setFeature(FEATURE_AVX512IFMA
);
714 if (HasLeaf7
&& ((EBX
>> 26) & 1) && HasAVX512Save
)
715 setFeature(FEATURE_AVX512PF
);
716 if (HasLeaf7
&& ((EBX
>> 27) & 1) && HasAVX512Save
)
717 setFeature(FEATURE_AVX512ER
);
718 if (HasLeaf7
&& ((EBX
>> 28) & 1) && HasAVX512Save
)
719 setFeature(FEATURE_AVX512CD
);
720 if (HasLeaf7
&& ((EBX
>> 30) & 1) && HasAVX512Save
)
721 setFeature(FEATURE_AVX512BW
);
722 if (HasLeaf7
&& ((EBX
>> 31) & 1) && HasAVX512Save
)
723 setFeature(FEATURE_AVX512VL
);
725 if (HasLeaf7
&& ((ECX
>> 1) & 1) && HasAVX512Save
)
726 setFeature(FEATURE_AVX512VBMI
);
727 if (HasLeaf7
&& ((ECX
>> 6) & 1) && HasAVX512Save
)
728 setFeature(FEATURE_AVX512VBMI2
);
729 if (HasLeaf7
&& ((ECX
>> 8) & 1))
730 setFeature(FEATURE_GFNI
);
731 if (HasLeaf7
&& ((ECX
>> 10) & 1) && HasAVX
)
732 setFeature(FEATURE_VPCLMULQDQ
);
733 if (HasLeaf7
&& ((ECX
>> 11) & 1) && HasAVX512Save
)
734 setFeature(FEATURE_AVX512VNNI
);
735 if (HasLeaf7
&& ((ECX
>> 12) & 1) && HasAVX512Save
)
736 setFeature(FEATURE_AVX512BITALG
);
737 if (HasLeaf7
&& ((ECX
>> 14) & 1) && HasAVX512Save
)
738 setFeature(FEATURE_AVX512VPOPCNTDQ
);
740 if (HasLeaf7
&& ((EDX
>> 2) & 1) && HasAVX512Save
)
741 setFeature(FEATURE_AVX5124VNNIW
);
742 if (HasLeaf7
&& ((EDX
>> 3) & 1) && HasAVX512Save
)
743 setFeature(FEATURE_AVX5124FMAPS
);
744 if (HasLeaf7
&& ((EDX
>> 8) & 1) && HasAVX512Save
)
745 setFeature(FEATURE_AVX512VP2INTERSECT
);
747 bool HasLeaf7Subleaf1
=
748 MaxLeaf
>= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX
, &EBX
, &ECX
, &EDX
);
749 if (HasLeaf7Subleaf1
&& ((EAX
>> 5) & 1) && HasAVX512Save
)
750 setFeature(FEATURE_AVX512BF16
);
752 unsigned MaxExtLevel
;
753 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel
, &EBX
, &ECX
, &EDX
);
755 bool HasExtLeaf1
= MaxExtLevel
>= 0x80000001 &&
756 !getX86CpuIDAndInfo(0x80000001, &EAX
, &EBX
, &ECX
, &EDX
);
757 if (HasExtLeaf1
&& ((ECX
>> 6) & 1))
758 setFeature(FEATURE_SSE4_A
);
759 if (HasExtLeaf1
&& ((ECX
>> 11) & 1))
760 setFeature(FEATURE_XOP
);
761 if (HasExtLeaf1
&& ((ECX
>> 16) & 1))
762 setFeature(FEATURE_FMA4
);
767 __attribute__((visibility("hidden")))
769 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE
;
772 __attribute__((visibility("hidden")))
774 struct __processor_model
{
775 unsigned int __cpu_vendor
;
776 unsigned int __cpu_type
;
777 unsigned int __cpu_subtype
;
778 unsigned int __cpu_features
[1];
779 } __cpu_model
= {0, 0, 0, {0}};
782 __attribute__((visibility("hidden")))
784 unsigned int __cpu_features2
= 0;
786 // A constructor function that is sets __cpu_model and __cpu_features2 with
787 // the right values. This needs to run only once. This constructor is
788 // given the highest priority and it should run before constructors without
789 // the priority set. However, it still runs after ifunc initializers and
790 // needs to be called explicitly there.
792 int CONSTRUCTOR_ATTRIBUTE
__cpu_indicator_init(void) {
793 unsigned EAX
, EBX
, ECX
, EDX
;
794 unsigned MaxLeaf
= 5;
796 unsigned Model
, Family
;
797 unsigned Features
[(CPU_FEATURE_MAX
+ 31) / 32] = {0};
799 // This function needs to run just once.
800 if (__cpu_model
.__cpu_vendor
)
803 if (!isCpuIdSupported() ||
804 getX86CpuIDAndInfo(0, &MaxLeaf
, &Vendor
, &ECX
, &EDX
) || MaxLeaf
< 1) {
805 __cpu_model
.__cpu_vendor
= VENDOR_OTHER
;
809 getX86CpuIDAndInfo(1, &EAX
, &EBX
, &ECX
, &EDX
);
810 detectX86FamilyModel(EAX
, &Family
, &Model
);
812 // Find available features.
813 getAvailableFeatures(ECX
, EDX
, MaxLeaf
, &Features
[0]);
815 assert((sizeof(Features
)/sizeof(Features
[0])) == 2);
816 __cpu_model
.__cpu_features
[0] = Features
[0];
817 __cpu_features2
= Features
[1];
819 if (Vendor
== SIG_INTEL
) {
821 getIntelProcessorTypeAndSubtype(Family
, Model
, &Features
[0],
822 &(__cpu_model
.__cpu_type
),
823 &(__cpu_model
.__cpu_subtype
));
824 __cpu_model
.__cpu_vendor
= VENDOR_INTEL
;
825 } else if (Vendor
== SIG_AMD
) {
827 getAMDProcessorTypeAndSubtype(Family
, Model
, &Features
[0],
828 &(__cpu_model
.__cpu_type
),
829 &(__cpu_model
.__cpu_subtype
));
830 __cpu_model
.__cpu_vendor
= VENDOR_AMD
;
832 __cpu_model
.__cpu_vendor
= VENDOR_OTHER
;
834 assert(__cpu_model
.__cpu_vendor
< VENDOR_MAX
);
835 assert(__cpu_model
.__cpu_type
< CPU_TYPE_MAX
);
836 assert(__cpu_model
.__cpu_subtype
< CPU_SUBTYPE_MAX
);
840 #elif defined(__aarch64__)
846 #define HWCAP_CPUID (1 << 11)
849 #define HWCAP_FP (1 << 0)
852 #define HWCAP_ASIMD (1 << 1)
855 #define HWCAP_AES (1 << 3)
858 #define HWCAP_PMULL (1 << 4)
861 #define HWCAP_SHA1 (1 << 5)
864 #define HWCAP_SHA2 (1 << 6)
866 #ifndef HWCAP_ATOMICS
867 #define HWCAP_ATOMICS (1 << 8)
870 #define HWCAP_FPHP (1 << 9)
872 #ifndef HWCAP_ASIMDHP
873 #define HWCAP_ASIMDHP (1 << 10)
875 #ifndef HWCAP_ASIMDRDM
876 #define HWCAP_ASIMDRDM (1 << 12)
879 #define HWCAP_JSCVT (1 << 13)
882 #define HWCAP_FCMA (1 << 14)
885 #define HWCAP_LRCPC (1 << 15)
888 #define HWCAP_DCPOP (1 << 16)
891 #define HWCAP_SHA3 (1 << 17)
894 #define HWCAP_SM3 (1 << 18)
897 #define HWCAP_SM4 (1 << 19)
899 #ifndef HWCAP_ASIMDDP
900 #define HWCAP_ASIMDDP (1 << 20)
903 #define HWCAP_SHA512 (1 << 21)
906 #define HWCAP_SVE (1 << 22)
908 #ifndef HWCAP_ASIMDFHM
909 #define HWCAP_ASIMDFHM (1 << 23)
912 #define HWCAP_DIT (1 << 24)
915 #define HWCAP_ILRCPC (1 << 26)
918 #define HWCAP_FLAGM (1 << 27)
921 #define HWCAP_SSBS (1 << 28)
924 #define HWCAP_SB (1 << 29)
927 #ifndef HWCAP2_DCPODP
928 #define HWCAP2_DCPODP (1 << 0)
931 #define HWCAP2_SVE2 (1 << 1)
933 #ifndef HWCAP2_SVEAES
934 #define HWCAP2_SVEAES (1 << 2)
936 #ifndef HWCAP2_SVEPMULL
937 #define HWCAP2_SVEPMULL (1 << 3)
939 #ifndef HWCAP2_SVEBITPERM
940 #define HWCAP2_SVEBITPERM (1 << 4)
942 #ifndef HWCAP2_SVESHA3
943 #define HWCAP2_SVESHA3 (1 << 5)
945 #ifndef HWCAP2_SVESM4
946 #define HWCAP2_SVESM4 (1 << 6)
948 #ifndef HWCAP2_FLAGM2
949 #define HWCAP2_FLAGM2 (1 << 7)
952 #define HWCAP2_FRINT (1 << 8)
954 #ifndef HWCAP2_SVEI8MM
955 #define HWCAP2_SVEI8MM (1 << 9)
957 #ifndef HWCAP2_SVEF32MM
958 #define HWCAP2_SVEF32MM (1 << 10)
960 #ifndef HWCAP2_SVEF64MM
961 #define HWCAP2_SVEF64MM (1 << 11)
963 #ifndef HWCAP2_SVEBF16
964 #define HWCAP2_SVEBF16 (1 << 12)
967 #define HWCAP2_I8MM (1 << 13)
970 #define HWCAP2_BF16 (1 << 14)
973 #define HWCAP2_DGH (1 << 15)
976 #define HWCAP2_RNG (1 << 16)
979 #define HWCAP2_BTI (1 << 17)
982 #define HWCAP2_MTE (1 << 18)
985 #define HWCAP2_RPRES (1 << 21)
988 #define HWCAP2_MTE3 (1 << 22)
991 #define HWCAP2_SME (1 << 23)
993 #ifndef HWCAP2_SME_I16I64
994 #define HWCAP2_SME_I16I64 (1 << 24)
996 #ifndef HWCAP2_SME_F64F64
997 #define HWCAP2_SME_F64F64 (1 << 25)
1000 #define HWCAP2_WFXT (1UL << 31)
1002 #ifndef HWCAP2_EBF16
1003 #define HWCAP2_EBF16 (1UL << 32)
1005 #ifndef HWCAP2_SVE_EBF16
1006 #define HWCAP2_SVE_EBF16 (1UL << 33)
1009 // LSE support detection for out-of-line atomics
1010 // using HWCAP and Auxiliary vector
1011 _Bool __aarch64_have_lse_atomics
1012 __attribute__((visibility("hidden"), nocommon
));
1014 #if defined(__has_include)
1015 #if __has_include(<sys/auxv.h>)
1016 #include <sys/auxv.h>
1017 #if __has_include(<asm/hwcap.h>)
1018 #include <asm/hwcap.h>
1020 #if defined(__ANDROID__)
1022 #include <sys/system_properties.h>
1023 #elif defined(__Fuchsia__)
1024 #include <zircon/features.h>
1025 #include <zircon/syscalls.h>
1028 // Detect Exynos 9810 CPU
1029 #define IF_EXYNOS9810 \
1030 char arch[PROP_VALUE_MAX]; \
1031 if (__system_property_get("ro.arch", arch) > 0 && \
1032 strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
1034 static void CONSTRUCTOR_ATTRIBUTE
init_have_lse_atomics(void) {
1035 #if defined(__FreeBSD__)
1036 unsigned long hwcap
;
1037 int result
= elf_aux_info(AT_HWCAP
, &hwcap
, sizeof hwcap
);
1038 __aarch64_have_lse_atomics
= result
== 0 && (hwcap
& HWCAP_ATOMICS
) != 0;
1039 #elif defined(__Fuchsia__)
1040 // This ensures the vDSO is a direct link-time dependency of anything that
1041 // needs this initializer code.
1042 #pragma comment(lib, "zircon")
1044 zx_status_t status
= _zx_system_get_features(ZX_FEATURE_KIND_CPU
, &features
);
1045 __aarch64_have_lse_atomics
=
1046 status
== ZX_OK
&& (features
& ZX_ARM64_FEATURE_ISA_ATOMICS
) != 0;
1048 unsigned long hwcap
= getauxval(AT_HWCAP
);
1049 _Bool result
= (hwcap
& HWCAP_ATOMICS
) != 0;
1050 #if defined(__ANDROID__)
1052 // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
1053 // only the former support LSE atomics. However, the kernel in the
1054 // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
1055 // reported the feature as being supported.
1057 // The kernel appears to have been corrected to mark it unsupported as of
1058 // the Android 9.0 release on those devices, and this issue has not been
1059 // observed anywhere else. Thus, this workaround may be removed if
1060 // compiler-rt ever drops support for Android 8.0.
1061 IF_EXYNOS9810 result
= false;
1063 #endif // defined(__ANDROID__)
1064 __aarch64_have_lse_atomics
= result
;
1065 #endif // defined(__FreeBSD__)
1068 #if !defined(DISABLE_AARCH64_FMV)
1069 // CPUFeatures must correspond to the same AArch64 features in
1070 // AArch64TargetParser.h
1133 // Architecture features used
1134 // in Function Multi Versioning
1136 unsigned long long features
;
1137 // As features grows new fields could be added
1138 } __aarch64_cpu_features
__attribute__((visibility("hidden"), nocommon
));
1140 void init_cpu_features_resolver(unsigned long hwcap
, unsigned long hwcap2
) {
1141 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
1142 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
1143 #define extractBits(val, start, number) \
1144 (val & ((1ULL << number) - 1ULL) << start) >> start
1145 if (hwcap
& HWCAP_CRC32
)
1146 setCPUFeature(FEAT_CRC
);
1147 if (hwcap
& HWCAP_PMULL
)
1148 setCPUFeature(FEAT_PMULL
);
1149 if (hwcap
& HWCAP_FLAGM
)
1150 setCPUFeature(FEAT_FLAGM
);
1151 if (hwcap2
& HWCAP2_FLAGM2
) {
1152 setCPUFeature(FEAT_FLAGM
);
1153 setCPUFeature(FEAT_FLAGM2
);
1155 if (hwcap
& HWCAP_SM3
&& hwcap
& HWCAP_SM4
)
1156 setCPUFeature(FEAT_SM4
);
1157 if (hwcap
& HWCAP_ASIMDDP
)
1158 setCPUFeature(FEAT_DOTPROD
);
1159 if (hwcap
& HWCAP_ASIMDFHM
)
1160 setCPUFeature(FEAT_FP16FML
);
1161 if (hwcap
& HWCAP_FPHP
) {
1162 setCPUFeature(FEAT_FP16
);
1163 setCPUFeature(FEAT_FP
);
1165 if (hwcap
& HWCAP_DIT
)
1166 setCPUFeature(FEAT_DIT
);
1167 if (hwcap
& HWCAP_ASIMDRDM
)
1168 setCPUFeature(FEAT_RDM
);
1169 if (hwcap
& HWCAP_ILRCPC
)
1170 setCPUFeature(FEAT_RCPC2
);
1171 if (hwcap
& HWCAP_AES
)
1172 setCPUFeature(FEAT_AES
);
1173 if (hwcap
& HWCAP_SHA1
)
1174 setCPUFeature(FEAT_SHA1
);
1175 if (hwcap
& HWCAP_SHA2
)
1176 setCPUFeature(FEAT_SHA2
);
1177 if (hwcap
& HWCAP_JSCVT
)
1178 setCPUFeature(FEAT_JSCVT
);
1179 if (hwcap
& HWCAP_FCMA
)
1180 setCPUFeature(FEAT_FCMA
);
1181 if (hwcap
& HWCAP_SB
)
1182 setCPUFeature(FEAT_SB
);
1183 if (hwcap
& HWCAP_SSBS
)
1184 setCPUFeature(FEAT_SSBS2
);
1185 if (hwcap2
& HWCAP2_MTE
) {
1186 setCPUFeature(FEAT_MEMTAG
);
1187 setCPUFeature(FEAT_MEMTAG2
);
1189 if (hwcap2
& HWCAP2_MTE3
) {
1190 setCPUFeature(FEAT_MEMTAG
);
1191 setCPUFeature(FEAT_MEMTAG2
);
1192 setCPUFeature(FEAT_MEMTAG3
);
1194 if (hwcap2
& HWCAP2_SVEAES
)
1195 setCPUFeature(FEAT_SVE_AES
);
1196 if (hwcap2
& HWCAP2_SVEPMULL
) {
1197 setCPUFeature(FEAT_SVE_AES
);
1198 setCPUFeature(FEAT_SVE_PMULL128
);
1200 if (hwcap2
& HWCAP2_SVEBITPERM
)
1201 setCPUFeature(FEAT_SVE_BITPERM
);
1202 if (hwcap2
& HWCAP2_SVESHA3
)
1203 setCPUFeature(FEAT_SVE_SHA3
);
1204 if (hwcap2
& HWCAP2_SVESM4
)
1205 setCPUFeature(FEAT_SVE_SM4
);
1206 if (hwcap2
& HWCAP2_DCPODP
)
1207 setCPUFeature(FEAT_DPB2
);
1208 if (hwcap
& HWCAP_ATOMICS
)
1209 setCPUFeature(FEAT_LSE
);
1210 if (hwcap2
& HWCAP2_RNG
)
1211 setCPUFeature(FEAT_RNG
);
1212 if (hwcap2
& HWCAP2_I8MM
)
1213 setCPUFeature(FEAT_I8MM
);
1214 if (hwcap2
& HWCAP2_EBF16
)
1215 setCPUFeature(FEAT_EBF16
);
1216 if (hwcap2
& HWCAP2_SVE_EBF16
)
1217 setCPUFeature(FEAT_SVE_EBF16
);
1218 if (hwcap2
& HWCAP2_DGH
)
1219 setCPUFeature(FEAT_DGH
);
1220 if (hwcap2
& HWCAP2_FRINT
)
1221 setCPUFeature(FEAT_FRINTTS
);
1222 if (hwcap2
& HWCAP2_SVEI8MM
)
1223 setCPUFeature(FEAT_SVE_I8MM
);
1224 if (hwcap2
& HWCAP2_SVEF32MM
)
1225 setCPUFeature(FEAT_SVE_F32MM
);
1226 if (hwcap2
& HWCAP2_SVEF64MM
)
1227 setCPUFeature(FEAT_SVE_F64MM
);
1228 if (hwcap2
& HWCAP2_BTI
)
1229 setCPUFeature(FEAT_BTI
);
1230 if (hwcap2
& HWCAP2_RPRES
)
1231 setCPUFeature(FEAT_RPRES
);
1232 if (hwcap2
& HWCAP2_WFXT
)
1233 setCPUFeature(FEAT_WFXT
);
1234 if (hwcap2
& HWCAP2_SME
)
1235 setCPUFeature(FEAT_SME
);
1236 if (hwcap2
& HWCAP2_SME_I16I64
)
1237 setCPUFeature(FEAT_SME_I64
);
1238 if (hwcap2
& HWCAP2_SME_F64F64
)
1239 setCPUFeature(FEAT_SME_F64
);
1240 if (hwcap
& HWCAP_CPUID
) {
1242 getCPUFeature(ID_AA64PFR1_EL1
, ftr
);
1243 // ID_AA64PFR1_EL1.MTE >= 0b0001
1244 if (extractBits(ftr
, 8, 4) >= 0x1)
1245 setCPUFeature(FEAT_MEMTAG
);
1246 // ID_AA64PFR1_EL1.SSBS == 0b0001
1247 if (extractBits(ftr
, 4, 4) == 0x1)
1248 setCPUFeature(FEAT_SSBS
);
1249 // ID_AA64PFR1_EL1.SME == 0b0010
1250 if (extractBits(ftr
, 24, 4) == 0x2)
1251 setCPUFeature(FEAT_SME2
);
1252 getCPUFeature(ID_AA64PFR0_EL1
, ftr
);
1253 // ID_AA64PFR0_EL1.FP != 0b1111
1254 if (extractBits(ftr
, 16, 4) != 0xF) {
1255 setCPUFeature(FEAT_FP
);
1256 // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
1257 setCPUFeature(FEAT_SIMD
);
1259 // ID_AA64PFR0_EL1.SVE != 0b0000
1260 if (extractBits(ftr
, 32, 4) != 0x0) {
1261 // get ID_AA64ZFR0_EL1, that name supported
1262 // if sve enabled only
1263 getCPUFeature(S3_0_C0_C4_4
, ftr
);
1264 // ID_AA64ZFR0_EL1.SVEver == 0b0000
1265 if (extractBits(ftr
, 0, 4) == 0x0)
1266 setCPUFeature(FEAT_SVE
);
1267 // ID_AA64ZFR0_EL1.SVEver == 0b0001
1268 if (extractBits(ftr
, 0, 4) == 0x1)
1269 setCPUFeature(FEAT_SVE2
);
1270 // ID_AA64ZFR0_EL1.BF16 != 0b0000
1271 if (extractBits(ftr
, 20, 4) != 0x0)
1272 setCPUFeature(FEAT_SVE_BF16
);
1274 getCPUFeature(ID_AA64ISAR0_EL1
, ftr
);
1275 // ID_AA64ISAR0_EL1.SHA3 != 0b0000
1276 if (extractBits(ftr
, 32, 4) != 0x0)
1277 setCPUFeature(FEAT_SHA3
);
1278 getCPUFeature(ID_AA64ISAR1_EL1
, ftr
);
1279 // ID_AA64ISAR1_EL1.DPB >= 0b0001
1280 if (extractBits(ftr
, 0, 4) >= 0x1)
1281 setCPUFeature(FEAT_DPB
);
1282 // ID_AA64ISAR1_EL1.LRCPC != 0b0000
1283 if (extractBits(ftr
, 20, 4) != 0x0)
1284 setCPUFeature(FEAT_RCPC
);
1285 // ID_AA64ISAR1_EL1.SPECRES == 0b0001
1286 if (extractBits(ftr
, 40, 4) == 0x2)
1287 setCPUFeature(FEAT_PREDRES
);
1288 // ID_AA64ISAR1_EL1.BF16 != 0b0000
1289 if (extractBits(ftr
, 44, 4) != 0x0)
1290 setCPUFeature(FEAT_BF16
);
1291 // ID_AA64ISAR1_EL1.LS64 >= 0b0001
1292 if (extractBits(ftr
, 60, 4) >= 0x1)
1293 setCPUFeature(FEAT_LS64
);
1294 // ID_AA64ISAR1_EL1.LS64 >= 0b0010
1295 if (extractBits(ftr
, 60, 4) >= 0x2)
1296 setCPUFeature(FEAT_LS64_V
);
1297 // ID_AA64ISAR1_EL1.LS64 >= 0b0011
1298 if (extractBits(ftr
, 60, 4) >= 0x3)
1299 setCPUFeature(FEAT_LS64_ACCDATA
);
1301 // Set some features in case of no CPUID support
1302 if (hwcap
& (HWCAP_FP
| HWCAP_FPHP
)) {
1303 setCPUFeature(FEAT_FP
);
1304 // FP and AdvSIMD fields have the same value
1305 setCPUFeature(FEAT_SIMD
);
1307 if (hwcap
& HWCAP_DCPOP
|| hwcap2
& HWCAP2_DCPODP
)
1308 setCPUFeature(FEAT_DPB
);
1309 if (hwcap
& HWCAP_LRCPC
|| hwcap
& HWCAP_ILRCPC
)
1310 setCPUFeature(FEAT_RCPC
);
1311 if (hwcap2
& HWCAP2_BF16
|| hwcap2
& HWCAP2_EBF16
)
1312 setCPUFeature(FEAT_BF16
);
1313 if (hwcap2
& HWCAP2_SVEBF16
)
1314 setCPUFeature(FEAT_SVE_BF16
);
1315 if (hwcap2
& HWCAP2_SVE2
&& hwcap
& HWCAP_SVE
)
1316 setCPUFeature(FEAT_SVE2
);
1317 if (hwcap
& HWCAP_SHA3
)
1318 setCPUFeature(FEAT_SHA3
);
1322 void CONSTRUCTOR_ATTRIBUTE
init_cpu_features(void) {
1323 unsigned long hwcap
;
1324 unsigned long hwcap2
;
1325 // CPU features already initialized.
1326 if (__aarch64_cpu_features
.features
)
1328 setCPUFeature(FEAT_MAX
);
1329 #if defined(__FreeBSD__)
1331 res
= elf_aux_info(AT_HWCAP
, &hwcap
, sizeof hwcap
);
1332 res
|= elf_aux_info(AT_HWCAP2
, &hwcap2
, sizeof hwcap2
);
1336 #if defined(__ANDROID__)
1337 // Don't set any CPU features,
1338 // detection could be wrong on Exynos 9810.
1339 IF_EXYNOS9810
return;
1340 #endif // defined(__ANDROID__)
1341 hwcap
= getauxval(AT_HWCAP
);
1342 hwcap2
= getauxval(AT_HWCAP2
);
1343 #endif // defined(__FreeBSD__)
1344 init_cpu_features_resolver(hwcap
, hwcap2
);
1346 #undef getCPUFeature
1347 #undef setCPUFeature
1348 #undef IF_EXYNOS9810
1350 #endif // !defined(DISABLE_AARCH64_FMV)
1351 #endif // defined(__has_include)
1352 #endif // __has_include(<sys/auxv.h>)
1353 #endif // __has_include(<asm/hwcap.h>)
1354 #endif // defined(__aarch64__)