Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / builtins / cpu_model.c
blobaefa56abcdd9535bce8157c7517808316da2275c
1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library for x86 and
12 // __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
14 //===----------------------------------------------------------------------===//
16 #ifndef __has_attribute
17 #define __has_attribute(attr) 0
18 #endif
20 #if __has_attribute(constructor)
21 #if __GNUC__ >= 9
22 // Ordinarily init priorities below 101 are disallowed as they are reserved for the
23 // implementation. However, we are the implementation, so silence the diagnostic,
24 // since it doesn't apply to us.
25 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
26 #endif
27 // We're choosing init priority 90 to force our constructors to run before any
28 // constructors in the end user application (starting at priority 101). This value
29 // matches the libgcc choice for the same functions.
30 #define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
31 #else
32 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
33 // this runs during initialization.
34 #define CONSTRUCTOR_ATTRIBUTE
35 #endif
37 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
38 defined(_M_X64)) && \
39 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
41 #include <assert.h>
43 #define bool int
44 #define true 1
45 #define false 0
47 #ifdef _MSC_VER
48 #include <intrin.h>
49 #endif
51 enum VendorSignatures {
52 SIG_INTEL = 0x756e6547, // Genu
53 SIG_AMD = 0x68747541, // Auth
56 enum ProcessorVendors {
57 VENDOR_INTEL = 1,
58 VENDOR_AMD,
59 VENDOR_OTHER,
60 VENDOR_MAX
63 enum ProcessorTypes {
64 INTEL_BONNELL = 1,
65 INTEL_CORE2,
66 INTEL_COREI7,
67 AMDFAM10H,
68 AMDFAM15H,
69 INTEL_SILVERMONT,
70 INTEL_KNL,
71 AMD_BTVER1,
72 AMD_BTVER2,
73 AMDFAM17H,
74 INTEL_KNM,
75 INTEL_GOLDMONT,
76 INTEL_GOLDMONT_PLUS,
77 INTEL_TREMONT,
78 AMDFAM19H,
79 ZHAOXIN_FAM7H,
80 INTEL_SIERRAFOREST,
81 INTEL_GRANDRIDGE,
82 INTEL_CLEARWATERFOREST,
83 CPU_TYPE_MAX
86 enum ProcessorSubtypes {
87 INTEL_COREI7_NEHALEM = 1,
88 INTEL_COREI7_WESTMERE,
89 INTEL_COREI7_SANDYBRIDGE,
90 AMDFAM10H_BARCELONA,
91 AMDFAM10H_SHANGHAI,
92 AMDFAM10H_ISTANBUL,
93 AMDFAM15H_BDVER1,
94 AMDFAM15H_BDVER2,
95 AMDFAM15H_BDVER3,
96 AMDFAM15H_BDVER4,
97 AMDFAM17H_ZNVER1,
98 INTEL_COREI7_IVYBRIDGE,
99 INTEL_COREI7_HASWELL,
100 INTEL_COREI7_BROADWELL,
101 INTEL_COREI7_SKYLAKE,
102 INTEL_COREI7_SKYLAKE_AVX512,
103 INTEL_COREI7_CANNONLAKE,
104 INTEL_COREI7_ICELAKE_CLIENT,
105 INTEL_COREI7_ICELAKE_SERVER,
106 AMDFAM17H_ZNVER2,
107 INTEL_COREI7_CASCADELAKE,
108 INTEL_COREI7_TIGERLAKE,
109 INTEL_COREI7_COOPERLAKE,
110 INTEL_COREI7_SAPPHIRERAPIDS,
111 INTEL_COREI7_ALDERLAKE,
112 AMDFAM19H_ZNVER3,
113 INTEL_COREI7_ROCKETLAKE,
114 ZHAOXIN_FAM7H_LUJIAZUI,
115 AMDFAM19H_ZNVER4,
116 INTEL_COREI7_GRANITERAPIDS,
117 INTEL_COREI7_GRANITERAPIDS_D,
118 INTEL_COREI7_ARROWLAKE,
119 INTEL_COREI7_ARROWLAKE_S,
120 INTEL_COREI7_PANTHERLAKE,
121 CPU_SUBTYPE_MAX
124 enum ProcessorFeatures {
125 FEATURE_CMOV = 0,
126 FEATURE_MMX,
127 FEATURE_POPCNT,
128 FEATURE_SSE,
129 FEATURE_SSE2,
130 FEATURE_SSE3,
131 FEATURE_SSSE3,
132 FEATURE_SSE4_1,
133 FEATURE_SSE4_2,
134 FEATURE_AVX,
135 FEATURE_AVX2,
136 FEATURE_SSE4_A,
137 FEATURE_FMA4,
138 FEATURE_XOP,
139 FEATURE_FMA,
140 FEATURE_AVX512F,
141 FEATURE_BMI,
142 FEATURE_BMI2,
143 FEATURE_AES,
144 FEATURE_PCLMUL,
145 FEATURE_AVX512VL,
146 FEATURE_AVX512BW,
147 FEATURE_AVX512DQ,
148 FEATURE_AVX512CD,
149 FEATURE_AVX512ER,
150 FEATURE_AVX512PF,
151 FEATURE_AVX512VBMI,
152 FEATURE_AVX512IFMA,
153 FEATURE_AVX5124VNNIW,
154 FEATURE_AVX5124FMAPS,
155 FEATURE_AVX512VPOPCNTDQ,
156 FEATURE_AVX512VBMI2,
157 FEATURE_GFNI,
158 FEATURE_VPCLMULQDQ,
159 FEATURE_AVX512VNNI,
160 FEATURE_AVX512BITALG,
161 FEATURE_AVX512BF16,
162 FEATURE_AVX512VP2INTERSECT,
164 FEATURE_CMPXCHG16B = 46,
165 FEATURE_F16C = 49,
166 FEATURE_LAHF_LM = 54,
167 FEATURE_LM,
168 FEATURE_WP,
169 FEATURE_LZCNT,
170 FEATURE_MOVBE,
172 FEATURE_X86_64_BASELINE = 95,
173 FEATURE_X86_64_V2,
174 FEATURE_X86_64_V3,
175 FEATURE_X86_64_V4,
176 CPU_FEATURE_MAX
179 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
180 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
181 // support. Consequently, for i386, the presence of CPUID is checked first
182 // via the corresponding eflags bit.
183 static bool isCpuIdSupported(void) {
184 #if defined(__GNUC__) || defined(__clang__)
185 #if defined(__i386__)
186 int __cpuid_supported;
187 __asm__(" pushfl\n"
188 " popl %%eax\n"
189 " movl %%eax,%%ecx\n"
190 " xorl $0x00200000,%%eax\n"
191 " pushl %%eax\n"
192 " popfl\n"
193 " pushfl\n"
194 " popl %%eax\n"
195 " movl $0,%0\n"
196 " cmpl %%eax,%%ecx\n"
197 " je 1f\n"
198 " movl $1,%0\n"
199 "1:"
200 : "=r"(__cpuid_supported)
202 : "eax", "ecx");
203 if (!__cpuid_supported)
204 return false;
205 #endif
206 return true;
207 #endif
208 return true;
211 // This code is copied from lib/Support/Host.cpp.
212 // Changes to either file should be mirrored in the other.
214 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
215 /// the specified arguments. If we can't run cpuid on the host, return true.
216 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
217 unsigned *rECX, unsigned *rEDX) {
218 #if defined(__GNUC__) || defined(__clang__)
219 #if defined(__x86_64__)
220 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
221 // FIXME: should we save this for Clang?
222 __asm__("movq\t%%rbx, %%rsi\n\t"
223 "cpuid\n\t"
224 "xchgq\t%%rbx, %%rsi\n\t"
225 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
226 : "a"(value));
227 return false;
228 #elif defined(__i386__)
229 __asm__("movl\t%%ebx, %%esi\n\t"
230 "cpuid\n\t"
231 "xchgl\t%%ebx, %%esi\n\t"
232 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
233 : "a"(value));
234 return false;
235 #else
236 return true;
237 #endif
238 #elif defined(_MSC_VER)
239 // The MSVC intrinsic is portable across x86 and x64.
240 int registers[4];
241 __cpuid(registers, value);
242 *rEAX = registers[0];
243 *rEBX = registers[1];
244 *rECX = registers[2];
245 *rEDX = registers[3];
246 return false;
247 #else
248 return true;
249 #endif
252 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
253 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
254 /// return true.
255 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
256 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
257 unsigned *rEDX) {
258 #if defined(__GNUC__) || defined(__clang__)
259 #if defined(__x86_64__)
260 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
261 // FIXME: should we save this for Clang?
262 __asm__("movq\t%%rbx, %%rsi\n\t"
263 "cpuid\n\t"
264 "xchgq\t%%rbx, %%rsi\n\t"
265 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
266 : "a"(value), "c"(subleaf));
267 return false;
268 #elif defined(__i386__)
269 __asm__("movl\t%%ebx, %%esi\n\t"
270 "cpuid\n\t"
271 "xchgl\t%%ebx, %%esi\n\t"
272 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
273 : "a"(value), "c"(subleaf));
274 return false;
275 #else
276 return true;
277 #endif
278 #elif defined(_MSC_VER)
279 int registers[4];
280 __cpuidex(registers, value, subleaf);
281 *rEAX = registers[0];
282 *rEBX = registers[1];
283 *rECX = registers[2];
284 *rEDX = registers[3];
285 return false;
286 #else
287 return true;
288 #endif
291 // Read control register 0 (XCR0). Used to detect features such as AVX.
292 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
293 #if defined(__GNUC__) || defined(__clang__)
294 // Check xgetbv; this uses a .byte sequence instead of the instruction
295 // directly because older assemblers do not include support for xgetbv and
296 // there is no easy way to conditionally compile based on the assembler used.
297 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
298 return false;
299 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
300 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
301 *rEAX = Result;
302 *rEDX = Result >> 32;
303 return false;
304 #else
305 return true;
306 #endif
309 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
310 unsigned *Model) {
311 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
312 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
313 if (*Family == 6 || *Family == 0xf) {
314 if (*Family == 0xf)
315 // Examine extended family ID if family ID is F.
316 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
317 // Examine extended model ID if family ID is 6 or F.
318 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
322 static const char *
323 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
324 const unsigned *Features,
325 unsigned *Type, unsigned *Subtype) {
326 #define testFeature(F) \
327 (Features[F / 32] & (1 << (F % 32))) != 0
329 // We select CPU strings to match the code in Host.cpp, but we don't use them
330 // in compiler-rt.
331 const char *CPU = 0;
333 switch (Family) {
334 case 6:
335 switch (Model) {
336 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
337 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
338 // mobile processor, Intel Core 2 Extreme processor, Intel
339 // Pentium Dual-Core processor, Intel Xeon processor, model
340 // 0Fh. All processors are manufactured using the 65 nm process.
341 case 0x16: // Intel Celeron processor model 16h. All processors are
342 // manufactured using the 65 nm process
343 CPU = "core2";
344 *Type = INTEL_CORE2;
345 break;
346 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
347 // 17h. All processors are manufactured using the 45 nm process.
349 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
350 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
351 // the 45 nm process.
352 CPU = "penryn";
353 *Type = INTEL_CORE2;
354 break;
355 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
356 // processors are manufactured using the 45 nm process.
357 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
358 // As found in a Summer 2010 model iMac.
359 case 0x1f:
360 case 0x2e: // Nehalem EX
361 CPU = "nehalem";
362 *Type = INTEL_COREI7;
363 *Subtype = INTEL_COREI7_NEHALEM;
364 break;
365 case 0x25: // Intel Core i7, laptop version.
366 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
367 // processors are manufactured using the 32 nm process.
368 case 0x2f: // Westmere EX
369 CPU = "westmere";
370 *Type = INTEL_COREI7;
371 *Subtype = INTEL_COREI7_WESTMERE;
372 break;
373 case 0x2a: // Intel Core i7 processor. All processors are manufactured
374 // using the 32 nm process.
375 case 0x2d:
376 CPU = "sandybridge";
377 *Type = INTEL_COREI7;
378 *Subtype = INTEL_COREI7_SANDYBRIDGE;
379 break;
380 case 0x3a:
381 case 0x3e: // Ivy Bridge EP
382 CPU = "ivybridge";
383 *Type = INTEL_COREI7;
384 *Subtype = INTEL_COREI7_IVYBRIDGE;
385 break;
387 // Haswell:
388 case 0x3c:
389 case 0x3f:
390 case 0x45:
391 case 0x46:
392 CPU = "haswell";
393 *Type = INTEL_COREI7;
394 *Subtype = INTEL_COREI7_HASWELL;
395 break;
397 // Broadwell:
398 case 0x3d:
399 case 0x47:
400 case 0x4f:
401 case 0x56:
402 CPU = "broadwell";
403 *Type = INTEL_COREI7;
404 *Subtype = INTEL_COREI7_BROADWELL;
405 break;
407 // Skylake:
408 case 0x4e: // Skylake mobile
409 case 0x5e: // Skylake desktop
410 case 0x8e: // Kaby Lake mobile
411 case 0x9e: // Kaby Lake desktop
412 case 0xa5: // Comet Lake-H/S
413 case 0xa6: // Comet Lake-U
414 CPU = "skylake";
415 *Type = INTEL_COREI7;
416 *Subtype = INTEL_COREI7_SKYLAKE;
417 break;
419 // Rocketlake:
420 case 0xa7:
421 CPU = "rocketlake";
422 *Type = INTEL_COREI7;
423 *Subtype = INTEL_COREI7_ROCKETLAKE;
424 break;
426 // Skylake Xeon:
427 case 0x55:
428 *Type = INTEL_COREI7;
429 if (testFeature(FEATURE_AVX512BF16)) {
430 CPU = "cooperlake";
431 *Subtype = INTEL_COREI7_COOPERLAKE;
432 } else if (testFeature(FEATURE_AVX512VNNI)) {
433 CPU = "cascadelake";
434 *Subtype = INTEL_COREI7_CASCADELAKE;
435 } else {
436 CPU = "skylake-avx512";
437 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
439 break;
441 // Cannonlake:
442 case 0x66:
443 CPU = "cannonlake";
444 *Type = INTEL_COREI7;
445 *Subtype = INTEL_COREI7_CANNONLAKE;
446 break;
448 // Icelake:
449 case 0x7d:
450 case 0x7e:
451 CPU = "icelake-client";
452 *Type = INTEL_COREI7;
453 *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
454 break;
456 // Tigerlake:
457 case 0x8c:
458 case 0x8d:
459 CPU = "tigerlake";
460 *Type = INTEL_COREI7;
461 *Subtype = INTEL_COREI7_TIGERLAKE;
462 break;
464 // Alderlake:
465 case 0x97:
466 case 0x9a:
467 // Raptorlake:
468 case 0xb7:
469 case 0xba:
470 case 0xbf:
471 // Meteorlake:
472 case 0xaa:
473 case 0xac:
474 // Gracemont:
475 case 0xbe:
476 CPU = "alderlake";
477 *Type = INTEL_COREI7;
478 *Subtype = INTEL_COREI7_ALDERLAKE;
479 break;
481 // Arrowlake:
482 case 0xc5:
483 CPU = "arrowlake";
484 *Type = INTEL_COREI7;
485 *Subtype = INTEL_COREI7_ARROWLAKE;
486 break;
488 // Arrowlake S:
489 case 0xc6:
490 // Lunarlake:
491 case 0xbd:
492 CPU = "arrowlake-s";
493 *Type = INTEL_COREI7;
494 *Subtype = INTEL_COREI7_ARROWLAKE_S;
495 break;
497 // Pantherlake:
498 case 0xcc:
499 CPU = "pantherlake";
500 *Type = INTEL_COREI7;
501 *Subtype = INTEL_COREI7_PANTHERLAKE;
502 break;
504 // Icelake Xeon:
505 case 0x6a:
506 case 0x6c:
507 CPU = "icelake-server";
508 *Type = INTEL_COREI7;
509 *Subtype = INTEL_COREI7_ICELAKE_SERVER;
510 break;
512 // Emerald Rapids:
513 case 0xcf:
514 // Sapphire Rapids:
515 case 0x8f:
516 CPU = "sapphirerapids";
517 *Type = INTEL_COREI7;
518 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
519 break;
521 // Granite Rapids:
522 case 0xad:
523 CPU = "graniterapids";
524 *Type = INTEL_COREI7;
525 *Subtype = INTEL_COREI7_GRANITERAPIDS;
526 break;
528 // Granite Rapids D:
529 case 0xae:
530 CPU = "graniterapids-d";
531 *Type = INTEL_COREI7;
532 *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
533 break;
535 case 0x1c: // Most 45 nm Intel Atom processors
536 case 0x26: // 45 nm Atom Lincroft
537 case 0x27: // 32 nm Atom Medfield
538 case 0x35: // 32 nm Atom Midview
539 case 0x36: // 32 nm Atom Midview
540 CPU = "bonnell";
541 *Type = INTEL_BONNELL;
542 break;
544 // Atom Silvermont codes from the Intel software optimization guide.
545 case 0x37:
546 case 0x4a:
547 case 0x4d:
548 case 0x5a:
549 case 0x5d:
550 case 0x4c: // really airmont
551 CPU = "silvermont";
552 *Type = INTEL_SILVERMONT;
553 break;
554 // Goldmont:
555 case 0x5c: // Apollo Lake
556 case 0x5f: // Denverton
557 CPU = "goldmont";
558 *Type = INTEL_GOLDMONT;
559 break; // "goldmont"
560 case 0x7a:
561 CPU = "goldmont-plus";
562 *Type = INTEL_GOLDMONT_PLUS;
563 break;
564 case 0x86:
565 case 0x8a: // Lakefield
566 case 0x96: // Elkhart Lake
567 case 0x9c: // Jasper Lake
568 CPU = "tremont";
569 *Type = INTEL_TREMONT;
570 break;
572 // Sierraforest:
573 case 0xaf:
574 CPU = "sierraforest";
575 *Type = INTEL_SIERRAFOREST;
576 break;
578 // Grandridge:
579 case 0xb6:
580 CPU = "grandridge";
581 *Type = INTEL_GRANDRIDGE;
582 break;
584 // Clearwaterforest:
585 case 0xdd:
586 CPU = "clearwaterforest";
587 *Type = INTEL_COREI7;
588 *Subtype = INTEL_CLEARWATERFOREST;
589 break;
591 case 0x57:
592 CPU = "knl";
593 *Type = INTEL_KNL;
594 break;
596 case 0x85:
597 CPU = "knm";
598 *Type = INTEL_KNM;
599 break;
601 default: // Unknown family 6 CPU.
602 break;
604 break;
605 default:
606 break; // Unknown.
609 return CPU;
612 static const char *
613 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
614 const unsigned *Features,
615 unsigned *Type, unsigned *Subtype) {
616 // We select CPU strings to match the code in Host.cpp, but we don't use them
617 // in compiler-rt.
618 const char *CPU = 0;
620 switch (Family) {
621 case 16:
622 CPU = "amdfam10";
623 *Type = AMDFAM10H;
624 switch (Model) {
625 case 2:
626 *Subtype = AMDFAM10H_BARCELONA;
627 break;
628 case 4:
629 *Subtype = AMDFAM10H_SHANGHAI;
630 break;
631 case 8:
632 *Subtype = AMDFAM10H_ISTANBUL;
633 break;
635 break;
636 case 20:
637 CPU = "btver1";
638 *Type = AMD_BTVER1;
639 break;
640 case 21:
641 CPU = "bdver1";
642 *Type = AMDFAM15H;
643 if (Model >= 0x60 && Model <= 0x7f) {
644 CPU = "bdver4";
645 *Subtype = AMDFAM15H_BDVER4;
646 break; // 60h-7Fh: Excavator
648 if (Model >= 0x30 && Model <= 0x3f) {
649 CPU = "bdver3";
650 *Subtype = AMDFAM15H_BDVER3;
651 break; // 30h-3Fh: Steamroller
653 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
654 CPU = "bdver2";
655 *Subtype = AMDFAM15H_BDVER2;
656 break; // 02h, 10h-1Fh: Piledriver
658 if (Model <= 0x0f) {
659 *Subtype = AMDFAM15H_BDVER1;
660 break; // 00h-0Fh: Bulldozer
662 break;
663 case 22:
664 CPU = "btver2";
665 *Type = AMD_BTVER2;
666 break;
667 case 23:
668 CPU = "znver1";
669 *Type = AMDFAM17H;
670 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
671 CPU = "znver2";
672 *Subtype = AMDFAM17H_ZNVER2;
673 break; // 30h-3fh, 71h: Zen2
675 if (Model <= 0x0f) {
676 *Subtype = AMDFAM17H_ZNVER1;
677 break; // 00h-0Fh: Zen1
679 break;
680 case 25:
681 CPU = "znver3";
682 *Type = AMDFAM19H;
683 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
684 // Family 19h Models 00h-0Fh - Zen3
685 // Family 19h Models 20h-2Fh - Zen3
686 // Family 19h Models 30h-3Fh - Zen3
687 // Family 19h Models 40h-4Fh - Zen3+
688 // Family 19h Models 50h-5Fh - Zen3+
689 *Subtype = AMDFAM19H_ZNVER3;
690 break;
692 if ((Model >= 0x10 && Model <= 0x1f) ||
693 (Model >= 0x60 && Model <= 0x74) ||
694 (Model >= 0x78 && Model <= 0x7b) ||
695 (Model >= 0xA0 && Model <= 0xAf)) {
696 CPU = "znver4";
697 *Subtype = AMDFAM19H_ZNVER4;
698 break; // "znver4"
700 break;
701 default:
702 break; // Unknown AMD CPU.
705 return CPU;
708 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
709 unsigned *Features) {
710 unsigned EAX = 0, EBX = 0;
712 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
713 #define setFeature(F) \
714 Features[F / 32] |= 1U << (F % 32)
716 if ((EDX >> 15) & 1)
717 setFeature(FEATURE_CMOV);
718 if ((EDX >> 23) & 1)
719 setFeature(FEATURE_MMX);
720 if ((EDX >> 25) & 1)
721 setFeature(FEATURE_SSE);
722 if ((EDX >> 26) & 1)
723 setFeature(FEATURE_SSE2);
725 if ((ECX >> 0) & 1)
726 setFeature(FEATURE_SSE3);
727 if ((ECX >> 1) & 1)
728 setFeature(FEATURE_PCLMUL);
729 if ((ECX >> 9) & 1)
730 setFeature(FEATURE_SSSE3);
731 if ((ECX >> 12) & 1)
732 setFeature(FEATURE_FMA);
733 if ((ECX >> 13) & 1)
734 setFeature(FEATURE_CMPXCHG16B);
735 if ((ECX >> 19) & 1)
736 setFeature(FEATURE_SSE4_1);
737 if ((ECX >> 20) & 1)
738 setFeature(FEATURE_SSE4_2);
739 if ((ECX >> 22) & 1)
740 setFeature(FEATURE_MOVBE);
741 if ((ECX >> 23) & 1)
742 setFeature(FEATURE_POPCNT);
743 if ((ECX >> 25) & 1)
744 setFeature(FEATURE_AES);
745 if ((ECX >> 29) & 1)
746 setFeature(FEATURE_F16C);
748 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
749 // indicates that the AVX registers will be saved and restored on context
750 // switch, then we have full AVX support.
751 const unsigned AVXBits = (1 << 27) | (1 << 28);
752 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
753 ((EAX & 0x6) == 0x6);
754 #if defined(__APPLE__)
755 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
756 // save the AVX512 context if we use AVX512 instructions, even the bit is not
757 // set right now.
758 bool HasAVX512Save = true;
759 #else
760 // AVX512 requires additional context to be saved by the OS.
761 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
762 #endif
764 if (HasAVX)
765 setFeature(FEATURE_AVX);
767 bool HasLeaf7 =
768 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
770 if (HasLeaf7 && ((EBX >> 3) & 1))
771 setFeature(FEATURE_BMI);
772 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
773 setFeature(FEATURE_AVX2);
774 if (HasLeaf7 && ((EBX >> 8) & 1))
775 setFeature(FEATURE_BMI2);
776 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
777 setFeature(FEATURE_AVX512F);
778 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
779 setFeature(FEATURE_AVX512DQ);
780 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
781 setFeature(FEATURE_AVX512IFMA);
782 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
783 setFeature(FEATURE_AVX512PF);
784 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
785 setFeature(FEATURE_AVX512ER);
786 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
787 setFeature(FEATURE_AVX512CD);
788 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
789 setFeature(FEATURE_AVX512BW);
790 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
791 setFeature(FEATURE_AVX512VL);
793 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
794 setFeature(FEATURE_AVX512VBMI);
795 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
796 setFeature(FEATURE_AVX512VBMI2);
797 if (HasLeaf7 && ((ECX >> 8) & 1))
798 setFeature(FEATURE_GFNI);
799 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
800 setFeature(FEATURE_VPCLMULQDQ);
801 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
802 setFeature(FEATURE_AVX512VNNI);
803 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
804 setFeature(FEATURE_AVX512BITALG);
805 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
806 setFeature(FEATURE_AVX512VPOPCNTDQ);
808 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
809 setFeature(FEATURE_AVX5124VNNIW);
810 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
811 setFeature(FEATURE_AVX5124FMAPS);
812 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
813 setFeature(FEATURE_AVX512VP2INTERSECT);
815 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
816 // return all 0s for invalid subleaves so check the limit.
817 bool HasLeaf7Subleaf1 =
818 HasLeaf7 && EAX >= 1 &&
819 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
820 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
821 setFeature(FEATURE_AVX512BF16);
823 unsigned MaxExtLevel;
824 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
826 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
827 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
828 if (HasExtLeaf1) {
829 if (ECX & 1)
830 setFeature(FEATURE_LAHF_LM);
831 if ((ECX >> 5) & 1)
832 setFeature(FEATURE_LZCNT);
833 if (((ECX >> 6) & 1))
834 setFeature(FEATURE_SSE4_A);
835 if (((ECX >> 11) & 1))
836 setFeature(FEATURE_XOP);
837 if (((ECX >> 16) & 1))
838 setFeature(FEATURE_FMA4);
839 if (((EDX >> 29) & 1))
840 setFeature(FEATURE_LM);
843 if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
844 setFeature(FEATURE_X86_64_BASELINE);
845 if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
846 hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
847 setFeature(FEATURE_X86_64_V2);
848 if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
849 hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
850 hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
851 hasFeature(FEATURE_MOVBE)) {
852 setFeature(FEATURE_X86_64_V3);
853 if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
854 hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
855 setFeature(FEATURE_X86_64_V4);
860 #undef hasFeature
861 #undef setFeature
864 #ifndef _WIN32
865 __attribute__((visibility("hidden")))
866 #endif
867 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
869 #ifndef _WIN32
870 __attribute__((visibility("hidden")))
871 #endif
872 struct __processor_model {
873 unsigned int __cpu_vendor;
874 unsigned int __cpu_type;
875 unsigned int __cpu_subtype;
876 unsigned int __cpu_features[1];
877 } __cpu_model = {0, 0, 0, {0}};
879 #ifndef _WIN32
880 __attribute__((visibility("hidden")))
881 #endif
882 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
884 // A constructor function that is sets __cpu_model and __cpu_features2 with
885 // the right values. This needs to run only once. This constructor is
886 // given the highest priority and it should run before constructors without
887 // the priority set. However, it still runs after ifunc initializers and
888 // needs to be called explicitly there.
890 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
891 unsigned EAX, EBX, ECX, EDX;
892 unsigned MaxLeaf = 5;
893 unsigned Vendor;
894 unsigned Model, Family;
895 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
896 static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
897 static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
899 // This function needs to run just once.
900 if (__cpu_model.__cpu_vendor)
901 return 0;
903 if (!isCpuIdSupported() ||
904 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
905 __cpu_model.__cpu_vendor = VENDOR_OTHER;
906 return -1;
909 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
910 detectX86FamilyModel(EAX, &Family, &Model);
912 // Find available features.
913 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
915 __cpu_model.__cpu_features[0] = Features[0];
916 __cpu_features2[0] = Features[1];
917 __cpu_features2[1] = Features[2];
918 __cpu_features2[2] = Features[3];
920 if (Vendor == SIG_INTEL) {
921 // Get CPU type.
922 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
923 &(__cpu_model.__cpu_type),
924 &(__cpu_model.__cpu_subtype));
925 __cpu_model.__cpu_vendor = VENDOR_INTEL;
926 } else if (Vendor == SIG_AMD) {
927 // Get CPU type.
928 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
929 &(__cpu_model.__cpu_type),
930 &(__cpu_model.__cpu_subtype));
931 __cpu_model.__cpu_vendor = VENDOR_AMD;
932 } else
933 __cpu_model.__cpu_vendor = VENDOR_OTHER;
935 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
936 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
937 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
939 return 0;
941 #elif defined(__aarch64__)
943 // LSE support detection for out-of-line atomics
944 // using HWCAP and Auxiliary vector
945 _Bool __aarch64_have_lse_atomics
946 __attribute__((visibility("hidden"), nocommon));
948 #if defined(__has_include)
949 #if __has_include(<sys/auxv.h>)
950 #include <sys/auxv.h>
952 #if __has_include(<sys/ifunc.h>)
953 #include <sys/ifunc.h>
954 #else
955 typedef struct __ifunc_arg_t {
956 unsigned long _size;
957 unsigned long _hwcap;
958 unsigned long _hwcap2;
959 } __ifunc_arg_t;
960 #endif // __has_include(<sys/ifunc.h>)
962 #if __has_include(<asm/hwcap.h>)
963 #include <asm/hwcap.h>
965 #if defined(__ANDROID__)
966 #include <string.h>
967 #include <sys/system_properties.h>
968 #elif defined(__Fuchsia__)
969 #include <zircon/features.h>
970 #include <zircon/syscalls.h>
971 #endif
973 #ifndef _IFUNC_ARG_HWCAP
974 #define _IFUNC_ARG_HWCAP (1ULL << 62)
975 #endif
976 #ifndef AT_HWCAP
977 #define AT_HWCAP 16
978 #endif
979 #ifndef HWCAP_CPUID
980 #define HWCAP_CPUID (1 << 11)
981 #endif
982 #ifndef HWCAP_FP
983 #define HWCAP_FP (1 << 0)
984 #endif
985 #ifndef HWCAP_ASIMD
986 #define HWCAP_ASIMD (1 << 1)
987 #endif
988 #ifndef HWCAP_AES
989 #define HWCAP_AES (1 << 3)
990 #endif
991 #ifndef HWCAP_PMULL
992 #define HWCAP_PMULL (1 << 4)
993 #endif
994 #ifndef HWCAP_SHA1
995 #define HWCAP_SHA1 (1 << 5)
996 #endif
997 #ifndef HWCAP_SHA2
998 #define HWCAP_SHA2 (1 << 6)
999 #endif
1000 #ifndef HWCAP_ATOMICS
1001 #define HWCAP_ATOMICS (1 << 8)
1002 #endif
1003 #ifndef HWCAP_FPHP
1004 #define HWCAP_FPHP (1 << 9)
1005 #endif
1006 #ifndef HWCAP_ASIMDHP
1007 #define HWCAP_ASIMDHP (1 << 10)
1008 #endif
1009 #ifndef HWCAP_ASIMDRDM
1010 #define HWCAP_ASIMDRDM (1 << 12)
1011 #endif
1012 #ifndef HWCAP_JSCVT
1013 #define HWCAP_JSCVT (1 << 13)
1014 #endif
1015 #ifndef HWCAP_FCMA
1016 #define HWCAP_FCMA (1 << 14)
1017 #endif
1018 #ifndef HWCAP_LRCPC
1019 #define HWCAP_LRCPC (1 << 15)
1020 #endif
1021 #ifndef HWCAP_DCPOP
1022 #define HWCAP_DCPOP (1 << 16)
1023 #endif
1024 #ifndef HWCAP_SHA3
1025 #define HWCAP_SHA3 (1 << 17)
1026 #endif
1027 #ifndef HWCAP_SM3
1028 #define HWCAP_SM3 (1 << 18)
1029 #endif
1030 #ifndef HWCAP_SM4
1031 #define HWCAP_SM4 (1 << 19)
1032 #endif
1033 #ifndef HWCAP_ASIMDDP
1034 #define HWCAP_ASIMDDP (1 << 20)
1035 #endif
1036 #ifndef HWCAP_SHA512
1037 #define HWCAP_SHA512 (1 << 21)
1038 #endif
1039 #ifndef HWCAP_SVE
1040 #define HWCAP_SVE (1 << 22)
1041 #endif
1042 #ifndef HWCAP_ASIMDFHM
1043 #define HWCAP_ASIMDFHM (1 << 23)
1044 #endif
1045 #ifndef HWCAP_DIT
1046 #define HWCAP_DIT (1 << 24)
1047 #endif
1048 #ifndef HWCAP_ILRCPC
1049 #define HWCAP_ILRCPC (1 << 26)
1050 #endif
1051 #ifndef HWCAP_FLAGM
1052 #define HWCAP_FLAGM (1 << 27)
1053 #endif
1054 #ifndef HWCAP_SSBS
1055 #define HWCAP_SSBS (1 << 28)
1056 #endif
1057 #ifndef HWCAP_SB
1058 #define HWCAP_SB (1 << 29)
1059 #endif
1061 #ifndef AT_HWCAP2
1062 #define AT_HWCAP2 26
1063 #endif
1064 #ifndef HWCAP2_DCPODP
1065 #define HWCAP2_DCPODP (1 << 0)
1066 #endif
1067 #ifndef HWCAP2_SVE2
1068 #define HWCAP2_SVE2 (1 << 1)
1069 #endif
1070 #ifndef HWCAP2_SVEAES
1071 #define HWCAP2_SVEAES (1 << 2)
1072 #endif
1073 #ifndef HWCAP2_SVEPMULL
1074 #define HWCAP2_SVEPMULL (1 << 3)
1075 #endif
1076 #ifndef HWCAP2_SVEBITPERM
1077 #define HWCAP2_SVEBITPERM (1 << 4)
1078 #endif
1079 #ifndef HWCAP2_SVESHA3
1080 #define HWCAP2_SVESHA3 (1 << 5)
1081 #endif
1082 #ifndef HWCAP2_SVESM4
1083 #define HWCAP2_SVESM4 (1 << 6)
1084 #endif
1085 #ifndef HWCAP2_FLAGM2
1086 #define HWCAP2_FLAGM2 (1 << 7)
1087 #endif
1088 #ifndef HWCAP2_FRINT
1089 #define HWCAP2_FRINT (1 << 8)
1090 #endif
1091 #ifndef HWCAP2_SVEI8MM
1092 #define HWCAP2_SVEI8MM (1 << 9)
1093 #endif
1094 #ifndef HWCAP2_SVEF32MM
1095 #define HWCAP2_SVEF32MM (1 << 10)
1096 #endif
1097 #ifndef HWCAP2_SVEF64MM
1098 #define HWCAP2_SVEF64MM (1 << 11)
1099 #endif
1100 #ifndef HWCAP2_SVEBF16
1101 #define HWCAP2_SVEBF16 (1 << 12)
1102 #endif
1103 #ifndef HWCAP2_I8MM
1104 #define HWCAP2_I8MM (1 << 13)
1105 #endif
1106 #ifndef HWCAP2_BF16
1107 #define HWCAP2_BF16 (1 << 14)
1108 #endif
1109 #ifndef HWCAP2_DGH
1110 #define HWCAP2_DGH (1 << 15)
1111 #endif
1112 #ifndef HWCAP2_RNG
1113 #define HWCAP2_RNG (1 << 16)
1114 #endif
1115 #ifndef HWCAP2_BTI
1116 #define HWCAP2_BTI (1 << 17)
1117 #endif
1118 #ifndef HWCAP2_MTE
1119 #define HWCAP2_MTE (1 << 18)
1120 #endif
1121 #ifndef HWCAP2_RPRES
1122 #define HWCAP2_RPRES (1 << 21)
1123 #endif
1124 #ifndef HWCAP2_MTE3
1125 #define HWCAP2_MTE3 (1 << 22)
1126 #endif
1127 #ifndef HWCAP2_SME
1128 #define HWCAP2_SME (1 << 23)
1129 #endif
1130 #ifndef HWCAP2_SME_I16I64
1131 #define HWCAP2_SME_I16I64 (1 << 24)
1132 #endif
1133 #ifndef HWCAP2_SME_F64F64
1134 #define HWCAP2_SME_F64F64 (1 << 25)
1135 #endif
1136 #ifndef HWCAP2_WFXT
1137 #define HWCAP2_WFXT (1UL << 31)
1138 #endif
1139 #ifndef HWCAP2_EBF16
1140 #define HWCAP2_EBF16 (1UL << 32)
1141 #endif
1142 #ifndef HWCAP2_SVE_EBF16
1143 #define HWCAP2_SVE_EBF16 (1UL << 33)
1144 #endif
1146 // Detect Exynos 9810 CPU
1147 #define IF_EXYNOS9810 \
1148 char arch[PROP_VALUE_MAX]; \
1149 if (__system_property_get("ro.arch", arch) > 0 && \
1150 strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
1152 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
1153 #if defined(__FreeBSD__)
1154 unsigned long hwcap;
1155 int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1156 __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
1157 #elif defined(__Fuchsia__)
1158 // This ensures the vDSO is a direct link-time dependency of anything that
1159 // needs this initializer code.
1160 #pragma comment(lib, "zircon")
1161 uint32_t features;
1162 zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
1163 __aarch64_have_lse_atomics =
1164 status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
1165 #else
1166 unsigned long hwcap = getauxval(AT_HWCAP);
1167 _Bool result = (hwcap & HWCAP_ATOMICS) != 0;
1168 #if defined(__ANDROID__)
1169 if (result) {
1170 // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
1171 // only the former support LSE atomics. However, the kernel in the
1172 // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
1173 // reported the feature as being supported.
1175 // The kernel appears to have been corrected to mark it unsupported as of
1176 // the Android 9.0 release on those devices, and this issue has not been
1177 // observed anywhere else. Thus, this workaround may be removed if
1178 // compiler-rt ever drops support for Android 8.0.
1179 IF_EXYNOS9810 result = false;
1181 #endif // defined(__ANDROID__)
1182 __aarch64_have_lse_atomics = result;
1183 #endif // defined(__FreeBSD__)
1186 #if !defined(DISABLE_AARCH64_FMV)
1187 // CPUFeatures must correspond to the same AArch64 features in
1188 // AArch64TargetParser.h
1189 enum CPUFeatures {
1190 FEAT_RNG,
1191 FEAT_FLAGM,
1192 FEAT_FLAGM2,
1193 FEAT_FP16FML,
1194 FEAT_DOTPROD,
1195 FEAT_SM4,
1196 FEAT_RDM,
1197 FEAT_LSE,
1198 FEAT_FP,
1199 FEAT_SIMD,
1200 FEAT_CRC,
1201 FEAT_SHA1,
1202 FEAT_SHA2,
1203 FEAT_SHA3,
1204 FEAT_AES,
1205 FEAT_PMULL,
1206 FEAT_FP16,
1207 FEAT_DIT,
1208 FEAT_DPB,
1209 FEAT_DPB2,
1210 FEAT_JSCVT,
1211 FEAT_FCMA,
1212 FEAT_RCPC,
1213 FEAT_RCPC2,
1214 FEAT_FRINTTS,
1215 FEAT_DGH,
1216 FEAT_I8MM,
1217 FEAT_BF16,
1218 FEAT_EBF16,
1219 FEAT_RPRES,
1220 FEAT_SVE,
1221 FEAT_SVE_BF16,
1222 FEAT_SVE_EBF16,
1223 FEAT_SVE_I8MM,
1224 FEAT_SVE_F32MM,
1225 FEAT_SVE_F64MM,
1226 FEAT_SVE2,
1227 FEAT_SVE_AES,
1228 FEAT_SVE_PMULL128,
1229 FEAT_SVE_BITPERM,
1230 FEAT_SVE_SHA3,
1231 FEAT_SVE_SM4,
1232 FEAT_SME,
1233 FEAT_MEMTAG,
1234 FEAT_MEMTAG2,
1235 FEAT_MEMTAG3,
1236 FEAT_SB,
1237 FEAT_PREDRES,
1238 FEAT_SSBS,
1239 FEAT_SSBS2,
1240 FEAT_BTI,
1241 FEAT_LS64,
1242 FEAT_LS64_V,
1243 FEAT_LS64_ACCDATA,
1244 FEAT_WFXT,
1245 FEAT_SME_F64,
1246 FEAT_SME_I64,
1247 FEAT_SME2,
1248 FEAT_RCPC3,
1249 FEAT_MAX,
1250 FEAT_EXT = 62, // Reserved to indicate presence of additional features field
1251 // in __aarch64_cpu_features
1252 FEAT_INIT // Used as flag of features initialization completion
1255 // Architecture features used
1256 // in Function Multi Versioning
1257 struct {
1258 unsigned long long features;
1259 // As features grows new fields could be added
1260 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
1262 static void __init_cpu_features_constructor(unsigned long hwcap,
1263 const __ifunc_arg_t *arg) {
1264 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
1265 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
1266 #define extractBits(val, start, number) \
1267 (val & ((1ULL << number) - 1ULL) << start) >> start
1268 unsigned long hwcap2 = 0;
1269 if (hwcap & _IFUNC_ARG_HWCAP)
1270 hwcap2 = arg->_hwcap2;
1271 if (hwcap & HWCAP_CRC32)
1272 setCPUFeature(FEAT_CRC);
1273 if (hwcap & HWCAP_PMULL)
1274 setCPUFeature(FEAT_PMULL);
1275 if (hwcap & HWCAP_FLAGM)
1276 setCPUFeature(FEAT_FLAGM);
1277 if (hwcap2 & HWCAP2_FLAGM2) {
1278 setCPUFeature(FEAT_FLAGM);
1279 setCPUFeature(FEAT_FLAGM2);
1281 if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
1282 setCPUFeature(FEAT_SM4);
1283 if (hwcap & HWCAP_ASIMDDP)
1284 setCPUFeature(FEAT_DOTPROD);
1285 if (hwcap & HWCAP_ASIMDFHM)
1286 setCPUFeature(FEAT_FP16FML);
1287 if (hwcap & HWCAP_FPHP) {
1288 setCPUFeature(FEAT_FP16);
1289 setCPUFeature(FEAT_FP);
1291 if (hwcap & HWCAP_DIT)
1292 setCPUFeature(FEAT_DIT);
1293 if (hwcap & HWCAP_ASIMDRDM)
1294 setCPUFeature(FEAT_RDM);
1295 if (hwcap & HWCAP_ILRCPC)
1296 setCPUFeature(FEAT_RCPC2);
1297 if (hwcap & HWCAP_AES)
1298 setCPUFeature(FEAT_AES);
1299 if (hwcap & HWCAP_SHA1)
1300 setCPUFeature(FEAT_SHA1);
1301 if (hwcap & HWCAP_SHA2)
1302 setCPUFeature(FEAT_SHA2);
1303 if (hwcap & HWCAP_JSCVT)
1304 setCPUFeature(FEAT_JSCVT);
1305 if (hwcap & HWCAP_FCMA)
1306 setCPUFeature(FEAT_FCMA);
1307 if (hwcap & HWCAP_SB)
1308 setCPUFeature(FEAT_SB);
1309 if (hwcap & HWCAP_SSBS)
1310 setCPUFeature(FEAT_SSBS2);
1311 if (hwcap2 & HWCAP2_MTE) {
1312 setCPUFeature(FEAT_MEMTAG);
1313 setCPUFeature(FEAT_MEMTAG2);
1315 if (hwcap2 & HWCAP2_MTE3) {
1316 setCPUFeature(FEAT_MEMTAG);
1317 setCPUFeature(FEAT_MEMTAG2);
1318 setCPUFeature(FEAT_MEMTAG3);
1320 if (hwcap2 & HWCAP2_SVEAES)
1321 setCPUFeature(FEAT_SVE_AES);
1322 if (hwcap2 & HWCAP2_SVEPMULL) {
1323 setCPUFeature(FEAT_SVE_AES);
1324 setCPUFeature(FEAT_SVE_PMULL128);
1326 if (hwcap2 & HWCAP2_SVEBITPERM)
1327 setCPUFeature(FEAT_SVE_BITPERM);
1328 if (hwcap2 & HWCAP2_SVESHA3)
1329 setCPUFeature(FEAT_SVE_SHA3);
1330 if (hwcap2 & HWCAP2_SVESM4)
1331 setCPUFeature(FEAT_SVE_SM4);
1332 if (hwcap2 & HWCAP2_DCPODP)
1333 setCPUFeature(FEAT_DPB2);
1334 if (hwcap & HWCAP_ATOMICS)
1335 setCPUFeature(FEAT_LSE);
1336 if (hwcap2 & HWCAP2_RNG)
1337 setCPUFeature(FEAT_RNG);
1338 if (hwcap2 & HWCAP2_I8MM)
1339 setCPUFeature(FEAT_I8MM);
1340 if (hwcap2 & HWCAP2_EBF16)
1341 setCPUFeature(FEAT_EBF16);
1342 if (hwcap2 & HWCAP2_SVE_EBF16)
1343 setCPUFeature(FEAT_SVE_EBF16);
1344 if (hwcap2 & HWCAP2_DGH)
1345 setCPUFeature(FEAT_DGH);
1346 if (hwcap2 & HWCAP2_FRINT)
1347 setCPUFeature(FEAT_FRINTTS);
1348 if (hwcap2 & HWCAP2_SVEI8MM)
1349 setCPUFeature(FEAT_SVE_I8MM);
1350 if (hwcap2 & HWCAP2_SVEF32MM)
1351 setCPUFeature(FEAT_SVE_F32MM);
1352 if (hwcap2 & HWCAP2_SVEF64MM)
1353 setCPUFeature(FEAT_SVE_F64MM);
1354 if (hwcap2 & HWCAP2_BTI)
1355 setCPUFeature(FEAT_BTI);
1356 if (hwcap2 & HWCAP2_RPRES)
1357 setCPUFeature(FEAT_RPRES);
1358 if (hwcap2 & HWCAP2_WFXT)
1359 setCPUFeature(FEAT_WFXT);
1360 if (hwcap2 & HWCAP2_SME)
1361 setCPUFeature(FEAT_SME);
1362 if (hwcap2 & HWCAP2_SME_I16I64)
1363 setCPUFeature(FEAT_SME_I64);
1364 if (hwcap2 & HWCAP2_SME_F64F64)
1365 setCPUFeature(FEAT_SME_F64);
1366 if (hwcap & HWCAP_CPUID) {
1367 unsigned long ftr;
1368 getCPUFeature(ID_AA64PFR1_EL1, ftr);
1369 // ID_AA64PFR1_EL1.MTE >= 0b0001
1370 if (extractBits(ftr, 8, 4) >= 0x1)
1371 setCPUFeature(FEAT_MEMTAG);
1372 // ID_AA64PFR1_EL1.SSBS == 0b0001
1373 if (extractBits(ftr, 4, 4) == 0x1)
1374 setCPUFeature(FEAT_SSBS);
1375 // ID_AA64PFR1_EL1.SME == 0b0010
1376 if (extractBits(ftr, 24, 4) == 0x2)
1377 setCPUFeature(FEAT_SME2);
1378 getCPUFeature(ID_AA64PFR0_EL1, ftr);
1379 // ID_AA64PFR0_EL1.FP != 0b1111
1380 if (extractBits(ftr, 16, 4) != 0xF) {
1381 setCPUFeature(FEAT_FP);
1382 // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
1383 setCPUFeature(FEAT_SIMD);
1385 // ID_AA64PFR0_EL1.SVE != 0b0000
1386 if (extractBits(ftr, 32, 4) != 0x0) {
1387 // get ID_AA64ZFR0_EL1, that name supported
1388 // if sve enabled only
1389 getCPUFeature(S3_0_C0_C4_4, ftr);
1390 // ID_AA64ZFR0_EL1.SVEver == 0b0000
1391 if (extractBits(ftr, 0, 4) == 0x0)
1392 setCPUFeature(FEAT_SVE);
1393 // ID_AA64ZFR0_EL1.SVEver == 0b0001
1394 if (extractBits(ftr, 0, 4) == 0x1)
1395 setCPUFeature(FEAT_SVE2);
1396 // ID_AA64ZFR0_EL1.BF16 != 0b0000
1397 if (extractBits(ftr, 20, 4) != 0x0)
1398 setCPUFeature(FEAT_SVE_BF16);
1400 getCPUFeature(ID_AA64ISAR0_EL1, ftr);
1401 // ID_AA64ISAR0_EL1.SHA3 != 0b0000
1402 if (extractBits(ftr, 32, 4) != 0x0)
1403 setCPUFeature(FEAT_SHA3);
1404 getCPUFeature(ID_AA64ISAR1_EL1, ftr);
1405 // ID_AA64ISAR1_EL1.DPB >= 0b0001
1406 if (extractBits(ftr, 0, 4) >= 0x1)
1407 setCPUFeature(FEAT_DPB);
1408 // ID_AA64ISAR1_EL1.LRCPC != 0b0000
1409 if (extractBits(ftr, 20, 4) != 0x0)
1410 setCPUFeature(FEAT_RCPC);
1411 // ID_AA64ISAR1_EL1.LRCPC == 0b0011
1412 if (extractBits(ftr, 20, 4) == 0x3)
1413 setCPUFeature(FEAT_RCPC3);
1414 // ID_AA64ISAR1_EL1.SPECRES == 0b0001
1415 if (extractBits(ftr, 40, 4) == 0x2)
1416 setCPUFeature(FEAT_PREDRES);
1417 // ID_AA64ISAR1_EL1.BF16 != 0b0000
1418 if (extractBits(ftr, 44, 4) != 0x0)
1419 setCPUFeature(FEAT_BF16);
1420 // ID_AA64ISAR1_EL1.LS64 >= 0b0001
1421 if (extractBits(ftr, 60, 4) >= 0x1)
1422 setCPUFeature(FEAT_LS64);
1423 // ID_AA64ISAR1_EL1.LS64 >= 0b0010
1424 if (extractBits(ftr, 60, 4) >= 0x2)
1425 setCPUFeature(FEAT_LS64_V);
1426 // ID_AA64ISAR1_EL1.LS64 >= 0b0011
1427 if (extractBits(ftr, 60, 4) >= 0x3)
1428 setCPUFeature(FEAT_LS64_ACCDATA);
1429 } else {
1430 // Set some features in case of no CPUID support
1431 if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
1432 setCPUFeature(FEAT_FP);
1433 // FP and AdvSIMD fields have the same value
1434 setCPUFeature(FEAT_SIMD);
1436 if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
1437 setCPUFeature(FEAT_DPB);
1438 if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
1439 setCPUFeature(FEAT_RCPC);
1440 if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
1441 setCPUFeature(FEAT_BF16);
1442 if (hwcap2 & HWCAP2_SVEBF16)
1443 setCPUFeature(FEAT_SVE_BF16);
1444 if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
1445 setCPUFeature(FEAT_SVE2);
1446 if (hwcap & HWCAP_SHA3)
1447 setCPUFeature(FEAT_SHA3);
1449 setCPUFeature(FEAT_INIT);
1452 void __init_cpu_features_resolver(unsigned long hwcap,
1453 const __ifunc_arg_t *arg) {
1454 if (__aarch64_cpu_features.features)
1455 return;
1456 #if defined(__ANDROID__)
1457 // ifunc resolvers don't have hwcaps in arguments on Android API lower
1458 // than 30. If so, set feature detection done and keep all CPU features
1459 // unsupported (zeros). To detect this case in runtime we check existence
1460 // of memfd_create function from Standard C library which was introduced in
1461 // Android API 30.
1462 int memfd_create(const char *, unsigned int) __attribute__((weak));
1463 if (!memfd_create)
1464 return;
1465 #endif // defined(__ANDROID__)
1466 __init_cpu_features_constructor(hwcap, arg);
1469 void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
1470 unsigned long hwcap;
1471 unsigned long hwcap2;
1472 // CPU features already initialized.
1473 if (__aarch64_cpu_features.features)
1474 return;
1475 #if defined(__FreeBSD__)
1476 int res = 0;
1477 res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1478 res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
1479 if (res)
1480 return;
1481 #else
1482 #if defined(__ANDROID__)
1483 // Don't set any CPU features,
1484 // detection could be wrong on Exynos 9810.
1485 IF_EXYNOS9810 return;
1486 #endif // defined(__ANDROID__)
1487 hwcap = getauxval(AT_HWCAP);
1488 hwcap2 = getauxval(AT_HWCAP2);
1489 #endif // defined(__FreeBSD__)
1490 __ifunc_arg_t arg;
1491 arg._size = sizeof(__ifunc_arg_t);
1492 arg._hwcap = hwcap;
1493 arg._hwcap2 = hwcap2;
1494 __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
1495 #undef extractBits
1496 #undef getCPUFeature
1497 #undef setCPUFeature
1498 #undef IF_EXYNOS9810
1500 #endif // !defined(DISABLE_AARCH64_FMV)
1501 #endif // defined(__has_include)
1502 #endif // __has_include(<sys/auxv.h>)
1503 #endif // __has_include(<asm/hwcap.h>)
1504 #endif // defined(__aarch64__)