Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / TargetParser / Host.cpp
blobaba2ebf3175807348825f220457934683f963576
1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the operating system Host detection.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
54 #define DEBUG_TYPE "host-detection"
56 //===----------------------------------------------------------------------===//
58 // Implementations of the CPU detection routines
60 //===----------------------------------------------------------------------===//
62 using namespace llvm;
64 static std::unique_ptr<llvm::MemoryBuffer>
65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68 if (std::error_code EC = Text.getError()) {
69 llvm::errs() << "Can't read "
70 << "/proc/cpuinfo: " << EC.message() << "\n";
71 return nullptr;
73 return std::move(*Text);
76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77 // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78 // and so we must use an operating-system interface to determine the current
79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80 const char *generic = "generic";
82 // The cpu line is second (after the 'processor: 0' line), so if this
83 // buffer is too small then something has changed (or is wrong).
84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
87 StringRef::const_iterator CIP = CPUInfoStart;
89 StringRef::const_iterator CPUStart = nullptr;
90 size_t CPULen = 0;
92 // We need to find the first line which starts with cpu, spaces, and a colon.
93 // After the colon, there may be some additional spaces and then the cpu type.
94 while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95 if (CIP < CPUInfoEnd && *CIP == '\n')
96 ++CIP;
98 if (CIP < CPUInfoEnd && *CIP == 'c') {
99 ++CIP;
100 if (CIP < CPUInfoEnd && *CIP == 'p') {
101 ++CIP;
102 if (CIP < CPUInfoEnd && *CIP == 'u') {
103 ++CIP;
104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105 ++CIP;
107 if (CIP < CPUInfoEnd && *CIP == ':') {
108 ++CIP;
109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110 ++CIP;
112 if (CIP < CPUInfoEnd) {
113 CPUStart = CIP;
114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115 *CIP != ',' && *CIP != '\n'))
116 ++CIP;
117 CPULen = CIP - CPUStart;
124 if (CPUStart == nullptr)
125 while (CIP < CPUInfoEnd && *CIP != '\n')
126 ++CIP;
129 if (CPUStart == nullptr)
130 return generic;
132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133 .Case("604e", "604e")
134 .Case("604", "604")
135 .Case("7400", "7400")
136 .Case("7410", "7400")
137 .Case("7447", "7400")
138 .Case("7455", "7450")
139 .Case("G4", "g4")
140 .Case("POWER4", "970")
141 .Case("PPC970FX", "970")
142 .Case("PPC970MP", "970")
143 .Case("G5", "g5")
144 .Case("POWER5", "g5")
145 .Case("A2", "a2")
146 .Case("POWER6", "pwr6")
147 .Case("POWER7", "pwr7")
148 .Case("POWER8", "pwr8")
149 .Case("POWER8E", "pwr8")
150 .Case("POWER8NVL", "pwr8")
151 .Case("POWER9", "pwr9")
152 .Case("POWER10", "pwr10")
153 // FIXME: If we get a simulator or machine with the capabilities of
154 // mcpu=future, we should revisit this and add the name reported by the
155 // simulator/machine.
156 .Default(generic);
159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160 // The cpuid register on arm is not accessible from user space. On Linux,
161 // it is exposed through the /proc/cpuinfo file.
163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164 // in all cases.
165 SmallVector<StringRef, 32> Lines;
166 ProcCpuinfoContent.split(Lines, "\n");
168 // Look for the CPU implementer line.
169 StringRef Implementer;
170 StringRef Hardware;
171 StringRef Part;
172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173 if (Lines[I].startswith("CPU implementer"))
174 Implementer = Lines[I].substr(15).ltrim("\t :");
175 if (Lines[I].startswith("Hardware"))
176 Hardware = Lines[I].substr(8).ltrim("\t :");
177 if (Lines[I].startswith("CPU part"))
178 Part = Lines[I].substr(8).ltrim("\t :");
181 if (Implementer == "0x41") { // ARM Ltd.
182 // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
185 return "cortex-a53";
188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189 // values correspond to the "Part number" in the CP15/c0 register. The
190 // contents are specified in the various processor manuals.
191 // This corresponds to the Main ID Register in Technical Reference Manuals.
192 // and is used in programs like sys-utils
193 return StringSwitch<const char *>(Part)
194 .Case("0x926", "arm926ej-s")
195 .Case("0xb02", "mpcore")
196 .Case("0xb36", "arm1136j-s")
197 .Case("0xb56", "arm1156t2-s")
198 .Case("0xb76", "arm1176jz-s")
199 .Case("0xc08", "cortex-a8")
200 .Case("0xc09", "cortex-a9")
201 .Case("0xc0f", "cortex-a15")
202 .Case("0xc20", "cortex-m0")
203 .Case("0xc23", "cortex-m3")
204 .Case("0xc24", "cortex-m4")
205 .Case("0xd22", "cortex-m55")
206 .Case("0xd02", "cortex-a34")
207 .Case("0xd04", "cortex-a35")
208 .Case("0xd03", "cortex-a53")
209 .Case("0xd05", "cortex-a55")
210 .Case("0xd46", "cortex-a510")
211 .Case("0xd07", "cortex-a57")
212 .Case("0xd08", "cortex-a72")
213 .Case("0xd09", "cortex-a73")
214 .Case("0xd0a", "cortex-a75")
215 .Case("0xd0b", "cortex-a76")
216 .Case("0xd0d", "cortex-a77")
217 .Case("0xd41", "cortex-a78")
218 .Case("0xd47", "cortex-a710")
219 .Case("0xd4d", "cortex-a715")
220 .Case("0xd44", "cortex-x1")
221 .Case("0xd4c", "cortex-x1c")
222 .Case("0xd48", "cortex-x2")
223 .Case("0xd4e", "cortex-x3")
224 .Case("0xd0c", "neoverse-n1")
225 .Case("0xd49", "neoverse-n2")
226 .Case("0xd40", "neoverse-v1")
227 .Case("0xd4f", "neoverse-v2")
228 .Default("generic");
231 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
232 return StringSwitch<const char *>(Part)
233 .Case("0x516", "thunderx2t99")
234 .Case("0x0516", "thunderx2t99")
235 .Case("0xaf", "thunderx2t99")
236 .Case("0x0af", "thunderx2t99")
237 .Case("0xa1", "thunderxt88")
238 .Case("0x0a1", "thunderxt88")
239 .Default("generic");
242 if (Implementer == "0x46") { // Fujitsu Ltd.
243 return StringSwitch<const char *>(Part)
244 .Case("0x001", "a64fx")
245 .Default("generic");
248 if (Implementer == "0x4e") { // NVIDIA Corporation
249 return StringSwitch<const char *>(Part)
250 .Case("0x004", "carmel")
251 .Default("generic");
254 if (Implementer == "0x48") // HiSilicon Technologies, Inc.
255 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
256 // values correspond to the "Part number" in the CP15/c0 register. The
257 // contents are specified in the various processor manuals.
258 return StringSwitch<const char *>(Part)
259 .Case("0xd01", "tsv110")
260 .Default("generic");
262 if (Implementer == "0x51") // Qualcomm Technologies, Inc.
263 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
264 // values correspond to the "Part number" in the CP15/c0 register. The
265 // contents are specified in the various processor manuals.
266 return StringSwitch<const char *>(Part)
267 .Case("0x06f", "krait") // APQ8064
268 .Case("0x201", "kryo")
269 .Case("0x205", "kryo")
270 .Case("0x211", "kryo")
271 .Case("0x800", "cortex-a73") // Kryo 2xx Gold
272 .Case("0x801", "cortex-a73") // Kryo 2xx Silver
273 .Case("0x802", "cortex-a75") // Kryo 3xx Gold
274 .Case("0x803", "cortex-a75") // Kryo 3xx Silver
275 .Case("0x804", "cortex-a76") // Kryo 4xx Gold
276 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
277 .Case("0xc00", "falkor")
278 .Case("0xc01", "saphira")
279 .Default("generic");
280 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
281 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
282 // any predictive pattern across variants and parts.
283 unsigned Variant = 0, Part = 0;
285 // Look for the CPU variant line, whose value is a 1 digit hexadecimal
286 // number, corresponding to the Variant bits in the CP15/C0 register.
287 for (auto I : Lines)
288 if (I.consume_front("CPU variant"))
289 I.ltrim("\t :").getAsInteger(0, Variant);
291 // Look for the CPU part line, whose value is a 3 digit hexadecimal
292 // number, corresponding to the PartNum bits in the CP15/C0 register.
293 for (auto I : Lines)
294 if (I.consume_front("CPU part"))
295 I.ltrim("\t :").getAsInteger(0, Part);
297 unsigned Exynos = (Variant << 12) | Part;
298 switch (Exynos) {
299 default:
300 // Default by falling through to Exynos M3.
301 [[fallthrough]];
302 case 0x1002:
303 return "exynos-m3";
304 case 0x1003:
305 return "exynos-m4";
309 if (Implementer == "0xc0") { // Ampere Computing
310 return StringSwitch<const char *>(Part)
311 .Case("0xac3", "ampere1")
312 .Case("0xac4", "ampere1a")
313 .Default("generic");
316 return "generic";
319 namespace {
320 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
321 switch (Id) {
322 case 2064: // z900 not supported by LLVM
323 case 2066:
324 case 2084: // z990 not supported by LLVM
325 case 2086:
326 case 2094: // z9-109 not supported by LLVM
327 case 2096:
328 return "generic";
329 case 2097:
330 case 2098:
331 return "z10";
332 case 2817:
333 case 2818:
334 return "z196";
335 case 2827:
336 case 2828:
337 return "zEC12";
338 case 2964:
339 case 2965:
340 return HaveVectorSupport? "z13" : "zEC12";
341 case 3906:
342 case 3907:
343 return HaveVectorSupport? "z14" : "zEC12";
344 case 8561:
345 case 8562:
346 return HaveVectorSupport? "z15" : "zEC12";
347 case 3931:
348 case 3932:
349 default:
350 return HaveVectorSupport? "z16" : "zEC12";
353 } // end anonymous namespace
355 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
356 // STIDP is a privileged operation, so use /proc/cpuinfo instead.
358 // The "processor 0:" line comes after a fair amount of other information,
359 // including a cache breakdown, but this should be plenty.
360 SmallVector<StringRef, 32> Lines;
361 ProcCpuinfoContent.split(Lines, "\n");
363 // Look for the CPU features.
364 SmallVector<StringRef, 32> CPUFeatures;
365 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
366 if (Lines[I].startswith("features")) {
367 size_t Pos = Lines[I].find(':');
368 if (Pos != StringRef::npos) {
369 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
370 break;
374 // We need to check for the presence of vector support independently of
375 // the machine type, since we may only use the vector register set when
376 // supported by the kernel (and hypervisor).
377 bool HaveVectorSupport = false;
378 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
379 if (CPUFeatures[I] == "vx")
380 HaveVectorSupport = true;
383 // Now check the processor machine type.
384 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
385 if (Lines[I].startswith("processor ")) {
386 size_t Pos = Lines[I].find("machine = ");
387 if (Pos != StringRef::npos) {
388 Pos += sizeof("machine = ") - 1;
389 unsigned int Id;
390 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
391 return getCPUNameFromS390Model(Id, HaveVectorSupport);
393 break;
397 return "generic";
400 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
401 // There are 24 lines in /proc/cpuinfo
402 SmallVector<StringRef> Lines;
403 ProcCpuinfoContent.split(Lines, "\n");
405 // Look for uarch line to determine cpu name
406 StringRef UArch;
407 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
408 if (Lines[I].startswith("uarch")) {
409 UArch = Lines[I].substr(5).ltrim("\t :");
410 break;
414 return StringSwitch<const char *>(UArch)
415 .Case("sifive,u74-mc", "sifive-u74")
416 .Case("sifive,bullet0", "sifive-u74")
417 .Default("generic");
420 StringRef sys::detail::getHostCPUNameForBPF() {
421 #if !defined(__linux__) || !defined(__x86_64__)
422 return "generic";
423 #else
424 uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
425 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
426 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
427 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
428 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
429 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
430 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
431 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
432 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433 /* BPF_EXIT_INSN() */
434 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
436 uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
437 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445 /* BPF_EXIT_INSN() */
446 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
448 struct bpf_prog_load_attr {
449 uint32_t prog_type;
450 uint32_t insn_cnt;
451 uint64_t insns;
452 uint64_t license;
453 uint32_t log_level;
454 uint32_t log_size;
455 uint64_t log_buf;
456 uint32_t kern_version;
457 uint32_t prog_flags;
458 } attr = {};
459 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
460 attr.insn_cnt = 5;
461 attr.insns = (uint64_t)v3_insns;
462 attr.license = (uint64_t)"DUMMY";
464 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
465 sizeof(attr));
466 if (fd >= 0) {
467 close(fd);
468 return "v3";
471 /* Clear the whole attr in case its content changed by syscall. */
472 memset(&attr, 0, sizeof(attr));
473 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
474 attr.insn_cnt = 5;
475 attr.insns = (uint64_t)v2_insns;
476 attr.license = (uint64_t)"DUMMY";
477 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
478 if (fd >= 0) {
479 close(fd);
480 return "v2";
482 return "v1";
483 #endif
486 #if defined(__i386__) || defined(_M_IX86) || \
487 defined(__x86_64__) || defined(_M_X64)
489 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
490 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
491 // support. Consequently, for i386, the presence of CPUID is checked first
492 // via the corresponding eflags bit.
493 // Removal of cpuid.h header motivated by PR30384
494 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
495 // or test-suite, but are used in external projects e.g. libstdcxx
496 static bool isCpuIdSupported() {
497 #if defined(__GNUC__) || defined(__clang__)
498 #if defined(__i386__)
499 int __cpuid_supported;
500 __asm__(" pushfl\n"
501 " popl %%eax\n"
502 " movl %%eax,%%ecx\n"
503 " xorl $0x00200000,%%eax\n"
504 " pushl %%eax\n"
505 " popfl\n"
506 " pushfl\n"
507 " popl %%eax\n"
508 " movl $0,%0\n"
509 " cmpl %%eax,%%ecx\n"
510 " je 1f\n"
511 " movl $1,%0\n"
512 "1:"
513 : "=r"(__cpuid_supported)
515 : "eax", "ecx");
516 if (!__cpuid_supported)
517 return false;
518 #endif
519 return true;
520 #endif
521 return true;
524 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
525 /// the specified arguments. If we can't run cpuid on the host, return true.
526 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
527 unsigned *rECX, unsigned *rEDX) {
528 #if defined(__GNUC__) || defined(__clang__)
529 #if defined(__x86_64__)
530 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
531 // FIXME: should we save this for Clang?
532 __asm__("movq\t%%rbx, %%rsi\n\t"
533 "cpuid\n\t"
534 "xchgq\t%%rbx, %%rsi\n\t"
535 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
536 : "a"(value));
537 return false;
538 #elif defined(__i386__)
539 __asm__("movl\t%%ebx, %%esi\n\t"
540 "cpuid\n\t"
541 "xchgl\t%%ebx, %%esi\n\t"
542 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
543 : "a"(value));
544 return false;
545 #else
546 return true;
547 #endif
548 #elif defined(_MSC_VER)
549 // The MSVC intrinsic is portable across x86 and x64.
550 int registers[4];
551 __cpuid(registers, value);
552 *rEAX = registers[0];
553 *rEBX = registers[1];
554 *rECX = registers[2];
555 *rEDX = registers[3];
556 return false;
557 #else
558 return true;
559 #endif
562 namespace llvm {
563 namespace sys {
564 namespace detail {
565 namespace x86 {
567 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
568 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
569 if (MaxLeaf == nullptr)
570 MaxLeaf = &EAX;
571 else
572 *MaxLeaf = 0;
574 if (!isCpuIdSupported())
575 return VendorSignatures::UNKNOWN;
577 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
578 return VendorSignatures::UNKNOWN;
580 // "Genu ineI ntel"
581 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
582 return VendorSignatures::GENUINE_INTEL;
584 // "Auth enti cAMD"
585 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
586 return VendorSignatures::AUTHENTIC_AMD;
588 return VendorSignatures::UNKNOWN;
591 } // namespace x86
592 } // namespace detail
593 } // namespace sys
594 } // namespace llvm
596 using namespace llvm::sys::detail::x86;
598 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
599 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
600 /// return true.
601 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
602 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
603 unsigned *rEDX) {
604 #if defined(__GNUC__) || defined(__clang__)
605 #if defined(__x86_64__)
606 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
607 // FIXME: should we save this for Clang?
608 __asm__("movq\t%%rbx, %%rsi\n\t"
609 "cpuid\n\t"
610 "xchgq\t%%rbx, %%rsi\n\t"
611 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
612 : "a"(value), "c"(subleaf));
613 return false;
614 #elif defined(__i386__)
615 __asm__("movl\t%%ebx, %%esi\n\t"
616 "cpuid\n\t"
617 "xchgl\t%%ebx, %%esi\n\t"
618 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
619 : "a"(value), "c"(subleaf));
620 return false;
621 #else
622 return true;
623 #endif
624 #elif defined(_MSC_VER)
625 int registers[4];
626 __cpuidex(registers, value, subleaf);
627 *rEAX = registers[0];
628 *rEBX = registers[1];
629 *rECX = registers[2];
630 *rEDX = registers[3];
631 return false;
632 #else
633 return true;
634 #endif
637 // Read control register 0 (XCR0). Used to detect features such as AVX.
638 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
639 #if defined(__GNUC__) || defined(__clang__)
640 // Check xgetbv; this uses a .byte sequence instead of the instruction
641 // directly because older assemblers do not include support for xgetbv and
642 // there is no easy way to conditionally compile based on the assembler used.
643 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
644 return false;
645 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
646 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
647 *rEAX = Result;
648 *rEDX = Result >> 32;
649 return false;
650 #else
651 return true;
652 #endif
655 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
656 unsigned *Model) {
657 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
658 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
659 if (*Family == 6 || *Family == 0xf) {
660 if (*Family == 0xf)
661 // Examine extended family ID if family ID is F.
662 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
663 // Examine extended model ID if family ID is 6 or F.
664 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
668 static StringRef
669 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
670 const unsigned *Features,
671 unsigned *Type, unsigned *Subtype) {
672 auto testFeature = [&](unsigned F) {
673 return (Features[F / 32] & (1U << (F % 32))) != 0;
676 StringRef CPU;
678 switch (Family) {
679 case 3:
680 CPU = "i386";
681 break;
682 case 4:
683 CPU = "i486";
684 break;
685 case 5:
686 if (testFeature(X86::FEATURE_MMX)) {
687 CPU = "pentium-mmx";
688 break;
690 CPU = "pentium";
691 break;
692 case 6:
693 switch (Model) {
694 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
695 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
696 // mobile processor, Intel Core 2 Extreme processor, Intel
697 // Pentium Dual-Core processor, Intel Xeon processor, model
698 // 0Fh. All processors are manufactured using the 65 nm process.
699 case 0x16: // Intel Celeron processor model 16h. All processors are
700 // manufactured using the 65 nm process
701 CPU = "core2";
702 *Type = X86::INTEL_CORE2;
703 break;
704 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
705 // 17h. All processors are manufactured using the 45 nm process.
707 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
708 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
709 // the 45 nm process.
710 CPU = "penryn";
711 *Type = X86::INTEL_CORE2;
712 break;
713 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
714 // processors are manufactured using the 45 nm process.
715 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
716 // As found in a Summer 2010 model iMac.
717 case 0x1f:
718 case 0x2e: // Nehalem EX
719 CPU = "nehalem";
720 *Type = X86::INTEL_COREI7;
721 *Subtype = X86::INTEL_COREI7_NEHALEM;
722 break;
723 case 0x25: // Intel Core i7, laptop version.
724 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
725 // processors are manufactured using the 32 nm process.
726 case 0x2f: // Westmere EX
727 CPU = "westmere";
728 *Type = X86::INTEL_COREI7;
729 *Subtype = X86::INTEL_COREI7_WESTMERE;
730 break;
731 case 0x2a: // Intel Core i7 processor. All processors are manufactured
732 // using the 32 nm process.
733 case 0x2d:
734 CPU = "sandybridge";
735 *Type = X86::INTEL_COREI7;
736 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
737 break;
738 case 0x3a:
739 case 0x3e: // Ivy Bridge EP
740 CPU = "ivybridge";
741 *Type = X86::INTEL_COREI7;
742 *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
743 break;
745 // Haswell:
746 case 0x3c:
747 case 0x3f:
748 case 0x45:
749 case 0x46:
750 CPU = "haswell";
751 *Type = X86::INTEL_COREI7;
752 *Subtype = X86::INTEL_COREI7_HASWELL;
753 break;
755 // Broadwell:
756 case 0x3d:
757 case 0x47:
758 case 0x4f:
759 case 0x56:
760 CPU = "broadwell";
761 *Type = X86::INTEL_COREI7;
762 *Subtype = X86::INTEL_COREI7_BROADWELL;
763 break;
765 // Skylake:
766 case 0x4e: // Skylake mobile
767 case 0x5e: // Skylake desktop
768 case 0x8e: // Kaby Lake mobile
769 case 0x9e: // Kaby Lake desktop
770 case 0xa5: // Comet Lake-H/S
771 case 0xa6: // Comet Lake-U
772 CPU = "skylake";
773 *Type = X86::INTEL_COREI7;
774 *Subtype = X86::INTEL_COREI7_SKYLAKE;
775 break;
777 // Rocketlake:
778 case 0xa7:
779 CPU = "rocketlake";
780 *Type = X86::INTEL_COREI7;
781 *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
782 break;
784 // Skylake Xeon:
785 case 0x55:
786 *Type = X86::INTEL_COREI7;
787 if (testFeature(X86::FEATURE_AVX512BF16)) {
788 CPU = "cooperlake";
789 *Subtype = X86::INTEL_COREI7_COOPERLAKE;
790 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
791 CPU = "cascadelake";
792 *Subtype = X86::INTEL_COREI7_CASCADELAKE;
793 } else {
794 CPU = "skylake-avx512";
795 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
797 break;
799 // Cannonlake:
800 case 0x66:
801 CPU = "cannonlake";
802 *Type = X86::INTEL_COREI7;
803 *Subtype = X86::INTEL_COREI7_CANNONLAKE;
804 break;
806 // Icelake:
807 case 0x7d:
808 case 0x7e:
809 CPU = "icelake-client";
810 *Type = X86::INTEL_COREI7;
811 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
812 break;
814 // Tigerlake:
815 case 0x8c:
816 case 0x8d:
817 CPU = "tigerlake";
818 *Type = X86::INTEL_COREI7;
819 *Subtype = X86::INTEL_COREI7_TIGERLAKE;
820 break;
822 // Alderlake:
823 case 0x97:
824 case 0x9a:
825 // Gracemont
826 case 0xbe:
827 // Raptorlake:
828 case 0xb7:
829 case 0xba:
830 case 0xbf:
831 // Meteorlake:
832 case 0xaa:
833 case 0xac:
834 CPU = "alderlake";
835 *Type = X86::INTEL_COREI7;
836 *Subtype = X86::INTEL_COREI7_ALDERLAKE;
837 break;
839 // Arrowlake:
840 case 0xc5:
841 CPU = "arrowlake";
842 *Type = X86::INTEL_COREI7;
843 *Subtype = X86::INTEL_COREI7_ARROWLAKE;
844 break;
846 // Arrowlake S:
847 case 0xc6:
848 // Lunarlake:
849 case 0xbd:
850 CPU = "arrowlake-s";
851 *Type = X86::INTEL_COREI7;
852 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
853 break;
855 // Pantherlake:
856 case 0xcc:
857 CPU = "pantherlake";
858 *Type = X86::INTEL_COREI7;
859 *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
860 break;
862 // Graniterapids:
863 case 0xad:
864 CPU = "graniterapids";
865 *Type = X86::INTEL_COREI7;
866 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
867 break;
869 // Granite Rapids D:
870 case 0xae:
871 CPU = "graniterapids-d";
872 *Type = X86::INTEL_COREI7;
873 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
874 break;
876 // Icelake Xeon:
877 case 0x6a:
878 case 0x6c:
879 CPU = "icelake-server";
880 *Type = X86::INTEL_COREI7;
881 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
882 break;
884 // Emerald Rapids:
885 case 0xcf:
886 // Sapphire Rapids:
887 case 0x8f:
888 CPU = "sapphirerapids";
889 *Type = X86::INTEL_COREI7;
890 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
891 break;
893 case 0x1c: // Most 45 nm Intel Atom processors
894 case 0x26: // 45 nm Atom Lincroft
895 case 0x27: // 32 nm Atom Medfield
896 case 0x35: // 32 nm Atom Midview
897 case 0x36: // 32 nm Atom Midview
898 CPU = "bonnell";
899 *Type = X86::INTEL_BONNELL;
900 break;
902 // Atom Silvermont codes from the Intel software optimization guide.
903 case 0x37:
904 case 0x4a:
905 case 0x4d:
906 case 0x5a:
907 case 0x5d:
908 case 0x4c: // really airmont
909 CPU = "silvermont";
910 *Type = X86::INTEL_SILVERMONT;
911 break;
912 // Goldmont:
913 case 0x5c: // Apollo Lake
914 case 0x5f: // Denverton
915 CPU = "goldmont";
916 *Type = X86::INTEL_GOLDMONT;
917 break;
918 case 0x7a:
919 CPU = "goldmont-plus";
920 *Type = X86::INTEL_GOLDMONT_PLUS;
921 break;
922 case 0x86:
923 case 0x8a: // Lakefield
924 case 0x96: // Elkhart Lake
925 case 0x9c: // Jasper Lake
926 CPU = "tremont";
927 *Type = X86::INTEL_TREMONT;
928 break;
930 // Sierraforest:
931 case 0xaf:
932 CPU = "sierraforest";
933 *Type = X86::INTEL_SIERRAFOREST;
934 break;
936 // Grandridge:
937 case 0xb6:
938 CPU = "grandridge";
939 *Type = X86::INTEL_GRANDRIDGE;
940 break;
942 // Clearwaterforest:
943 case 0xdd:
944 CPU = "clearwaterforest";
945 *Type = X86::INTEL_CLEARWATERFOREST;
946 break;
948 // Xeon Phi (Knights Landing + Knights Mill):
949 case 0x57:
950 CPU = "knl";
951 *Type = X86::INTEL_KNL;
952 break;
953 case 0x85:
954 CPU = "knm";
955 *Type = X86::INTEL_KNM;
956 break;
958 default: // Unknown family 6 CPU, try to guess.
959 // Don't both with Type/Subtype here, they aren't used by the caller.
960 // They're used above to keep the code in sync with compiler-rt.
961 // TODO detect tigerlake host from model
962 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
963 CPU = "tigerlake";
964 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
965 CPU = "icelake-client";
966 } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
967 CPU = "cannonlake";
968 } else if (testFeature(X86::FEATURE_AVX512BF16)) {
969 CPU = "cooperlake";
970 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
971 CPU = "cascadelake";
972 } else if (testFeature(X86::FEATURE_AVX512VL)) {
973 CPU = "skylake-avx512";
974 } else if (testFeature(X86::FEATURE_AVX512ER)) {
975 CPU = "knl";
976 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
977 if (testFeature(X86::FEATURE_SHA))
978 CPU = "goldmont";
979 else
980 CPU = "skylake";
981 } else if (testFeature(X86::FEATURE_ADX)) {
982 CPU = "broadwell";
983 } else if (testFeature(X86::FEATURE_AVX2)) {
984 CPU = "haswell";
985 } else if (testFeature(X86::FEATURE_AVX)) {
986 CPU = "sandybridge";
987 } else if (testFeature(X86::FEATURE_SSE4_2)) {
988 if (testFeature(X86::FEATURE_MOVBE))
989 CPU = "silvermont";
990 else
991 CPU = "nehalem";
992 } else if (testFeature(X86::FEATURE_SSE4_1)) {
993 CPU = "penryn";
994 } else if (testFeature(X86::FEATURE_SSSE3)) {
995 if (testFeature(X86::FEATURE_MOVBE))
996 CPU = "bonnell";
997 else
998 CPU = "core2";
999 } else if (testFeature(X86::FEATURE_64BIT)) {
1000 CPU = "core2";
1001 } else if (testFeature(X86::FEATURE_SSE3)) {
1002 CPU = "yonah";
1003 } else if (testFeature(X86::FEATURE_SSE2)) {
1004 CPU = "pentium-m";
1005 } else if (testFeature(X86::FEATURE_SSE)) {
1006 CPU = "pentium3";
1007 } else if (testFeature(X86::FEATURE_MMX)) {
1008 CPU = "pentium2";
1009 } else {
1010 CPU = "pentiumpro";
1012 break;
1014 break;
1015 case 15: {
1016 if (testFeature(X86::FEATURE_64BIT)) {
1017 CPU = "nocona";
1018 break;
1020 if (testFeature(X86::FEATURE_SSE3)) {
1021 CPU = "prescott";
1022 break;
1024 CPU = "pentium4";
1025 break;
1027 default:
1028 break; // Unknown.
1031 return CPU;
1034 static StringRef
1035 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
1036 const unsigned *Features,
1037 unsigned *Type, unsigned *Subtype) {
1038 auto testFeature = [&](unsigned F) {
1039 return (Features[F / 32] & (1U << (F % 32))) != 0;
1042 StringRef CPU;
1044 switch (Family) {
1045 case 4:
1046 CPU = "i486";
1047 break;
1048 case 5:
1049 CPU = "pentium";
1050 switch (Model) {
1051 case 6:
1052 case 7:
1053 CPU = "k6";
1054 break;
1055 case 8:
1056 CPU = "k6-2";
1057 break;
1058 case 9:
1059 case 13:
1060 CPU = "k6-3";
1061 break;
1062 case 10:
1063 CPU = "geode";
1064 break;
1066 break;
1067 case 6:
1068 if (testFeature(X86::FEATURE_SSE)) {
1069 CPU = "athlon-xp";
1070 break;
1072 CPU = "athlon";
1073 break;
1074 case 15:
1075 if (testFeature(X86::FEATURE_SSE3)) {
1076 CPU = "k8-sse3";
1077 break;
1079 CPU = "k8";
1080 break;
1081 case 16:
1082 CPU = "amdfam10";
1083 *Type = X86::AMDFAM10H; // "amdfam10"
1084 switch (Model) {
1085 case 2:
1086 *Subtype = X86::AMDFAM10H_BARCELONA;
1087 break;
1088 case 4:
1089 *Subtype = X86::AMDFAM10H_SHANGHAI;
1090 break;
1091 case 8:
1092 *Subtype = X86::AMDFAM10H_ISTANBUL;
1093 break;
1095 break;
1096 case 20:
1097 CPU = "btver1";
1098 *Type = X86::AMD_BTVER1;
1099 break;
1100 case 21:
1101 CPU = "bdver1";
1102 *Type = X86::AMDFAM15H;
1103 if (Model >= 0x60 && Model <= 0x7f) {
1104 CPU = "bdver4";
1105 *Subtype = X86::AMDFAM15H_BDVER4;
1106 break; // 60h-7Fh: Excavator
1108 if (Model >= 0x30 && Model <= 0x3f) {
1109 CPU = "bdver3";
1110 *Subtype = X86::AMDFAM15H_BDVER3;
1111 break; // 30h-3Fh: Steamroller
1113 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1114 CPU = "bdver2";
1115 *Subtype = X86::AMDFAM15H_BDVER2;
1116 break; // 02h, 10h-1Fh: Piledriver
1118 if (Model <= 0x0f) {
1119 *Subtype = X86::AMDFAM15H_BDVER1;
1120 break; // 00h-0Fh: Bulldozer
1122 break;
1123 case 22:
1124 CPU = "btver2";
1125 *Type = X86::AMD_BTVER2;
1126 break;
1127 case 23:
1128 CPU = "znver1";
1129 *Type = X86::AMDFAM17H;
1130 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1131 CPU = "znver2";
1132 *Subtype = X86::AMDFAM17H_ZNVER2;
1133 break; // 30h-3fh, 71h: Zen2
1135 if (Model <= 0x0f) {
1136 *Subtype = X86::AMDFAM17H_ZNVER1;
1137 break; // 00h-0Fh: Zen1
1139 break;
1140 case 25:
1141 CPU = "znver3";
1142 *Type = X86::AMDFAM19H;
1143 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
1144 // Family 19h Models 00h-0Fh - Zen3
1145 // Family 19h Models 20h-2Fh - Zen3
1146 // Family 19h Models 30h-3Fh - Zen3
1147 // Family 19h Models 40h-4Fh - Zen3+
1148 // Family 19h Models 50h-5Fh - Zen3+
1149 *Subtype = X86::AMDFAM19H_ZNVER3;
1150 break;
1152 if ((Model >= 0x10 && Model <= 0x1f) ||
1153 (Model >= 0x60 && Model <= 0x74) ||
1154 (Model >= 0x78 && Model <= 0x7b) ||
1155 (Model >= 0xA0 && Model <= 0xAf)) {
1156 CPU = "znver4";
1157 *Subtype = X86::AMDFAM19H_ZNVER4;
1158 break; // "znver4"
1160 break; // family 19h
1161 default:
1162 break; // Unknown AMD CPU.
1165 return CPU;
1168 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1169 unsigned *Features) {
1170 unsigned EAX, EBX;
1172 auto setFeature = [&](unsigned F) {
1173 Features[F / 32] |= 1U << (F % 32);
1176 if ((EDX >> 15) & 1)
1177 setFeature(X86::FEATURE_CMOV);
1178 if ((EDX >> 23) & 1)
1179 setFeature(X86::FEATURE_MMX);
1180 if ((EDX >> 25) & 1)
1181 setFeature(X86::FEATURE_SSE);
1182 if ((EDX >> 26) & 1)
1183 setFeature(X86::FEATURE_SSE2);
1185 if ((ECX >> 0) & 1)
1186 setFeature(X86::FEATURE_SSE3);
1187 if ((ECX >> 1) & 1)
1188 setFeature(X86::FEATURE_PCLMUL);
1189 if ((ECX >> 9) & 1)
1190 setFeature(X86::FEATURE_SSSE3);
1191 if ((ECX >> 12) & 1)
1192 setFeature(X86::FEATURE_FMA);
1193 if ((ECX >> 19) & 1)
1194 setFeature(X86::FEATURE_SSE4_1);
1195 if ((ECX >> 20) & 1) {
1196 setFeature(X86::FEATURE_SSE4_2);
1197 setFeature(X86::FEATURE_CRC32);
1199 if ((ECX >> 23) & 1)
1200 setFeature(X86::FEATURE_POPCNT);
1201 if ((ECX >> 25) & 1)
1202 setFeature(X86::FEATURE_AES);
1204 if ((ECX >> 22) & 1)
1205 setFeature(X86::FEATURE_MOVBE);
1207 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1208 // indicates that the AVX registers will be saved and restored on context
1209 // switch, then we have full AVX support.
1210 const unsigned AVXBits = (1 << 27) | (1 << 28);
1211 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1212 ((EAX & 0x6) == 0x6);
1213 #if defined(__APPLE__)
1214 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1215 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1216 // set right now.
1217 bool HasAVX512Save = true;
1218 #else
1219 // AVX512 requires additional context to be saved by the OS.
1220 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1221 #endif
1223 if (HasAVX)
1224 setFeature(X86::FEATURE_AVX);
1226 bool HasLeaf7 =
1227 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1229 if (HasLeaf7 && ((EBX >> 3) & 1))
1230 setFeature(X86::FEATURE_BMI);
1231 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1232 setFeature(X86::FEATURE_AVX2);
1233 if (HasLeaf7 && ((EBX >> 8) & 1))
1234 setFeature(X86::FEATURE_BMI2);
1235 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1236 setFeature(X86::FEATURE_AVX512F);
1237 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1238 setFeature(X86::FEATURE_AVX512DQ);
1239 if (HasLeaf7 && ((EBX >> 19) & 1))
1240 setFeature(X86::FEATURE_ADX);
1241 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1242 setFeature(X86::FEATURE_AVX512IFMA);
1243 if (HasLeaf7 && ((EBX >> 23) & 1))
1244 setFeature(X86::FEATURE_CLFLUSHOPT);
1245 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1246 setFeature(X86::FEATURE_AVX512PF);
1247 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1248 setFeature(X86::FEATURE_AVX512ER);
1249 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1250 setFeature(X86::FEATURE_AVX512CD);
1251 if (HasLeaf7 && ((EBX >> 29) & 1))
1252 setFeature(X86::FEATURE_SHA);
1253 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1254 setFeature(X86::FEATURE_AVX512BW);
1255 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1256 setFeature(X86::FEATURE_AVX512VL);
1258 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1259 setFeature(X86::FEATURE_AVX512VBMI);
1260 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1261 setFeature(X86::FEATURE_AVX512VBMI2);
1262 if (HasLeaf7 && ((ECX >> 8) & 1))
1263 setFeature(X86::FEATURE_GFNI);
1264 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1265 setFeature(X86::FEATURE_VPCLMULQDQ);
1266 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1267 setFeature(X86::FEATURE_AVX512VNNI);
1268 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1269 setFeature(X86::FEATURE_AVX512BITALG);
1270 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1271 setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1273 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1274 setFeature(X86::FEATURE_AVX5124VNNIW);
1275 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1276 setFeature(X86::FEATURE_AVX5124FMAPS);
1277 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1278 setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1280 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1281 // return all 0s for invalid subleaves so check the limit.
1282 bool HasLeaf7Subleaf1 =
1283 HasLeaf7 && EAX >= 1 &&
1284 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1285 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1286 setFeature(X86::FEATURE_AVX512BF16);
1288 unsigned MaxExtLevel;
1289 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1291 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1292 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1293 if (HasExtLeaf1 && ((ECX >> 6) & 1))
1294 setFeature(X86::FEATURE_SSE4_A);
1295 if (HasExtLeaf1 && ((ECX >> 11) & 1))
1296 setFeature(X86::FEATURE_XOP);
1297 if (HasExtLeaf1 && ((ECX >> 16) & 1))
1298 setFeature(X86::FEATURE_FMA4);
1300 if (HasExtLeaf1 && ((EDX >> 29) & 1))
1301 setFeature(X86::FEATURE_64BIT);
1304 StringRef sys::getHostCPUName() {
1305 unsigned MaxLeaf = 0;
1306 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1307 if (Vendor == VendorSignatures::UNKNOWN)
1308 return "generic";
1310 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1311 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1313 unsigned Family = 0, Model = 0;
1314 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1315 detectX86FamilyModel(EAX, &Family, &Model);
1316 getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1318 // These aren't consumed in this file, but we try to keep some source code the
1319 // same or similar to compiler-rt.
1320 unsigned Type = 0;
1321 unsigned Subtype = 0;
1323 StringRef CPU;
1325 if (Vendor == VendorSignatures::GENUINE_INTEL) {
1326 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1327 &Subtype);
1328 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1329 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1330 &Subtype);
1333 if (!CPU.empty())
1334 return CPU;
1336 return "generic";
1339 #elif defined(__APPLE__) && defined(__powerpc__)
1340 StringRef sys::getHostCPUName() {
1341 host_basic_info_data_t hostInfo;
1342 mach_msg_type_number_t infoCount;
1344 infoCount = HOST_BASIC_INFO_COUNT;
1345 mach_port_t hostPort = mach_host_self();
1346 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1347 &infoCount);
1348 mach_port_deallocate(mach_task_self(), hostPort);
1350 if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1351 return "generic";
1353 switch (hostInfo.cpu_subtype) {
1354 case CPU_SUBTYPE_POWERPC_601:
1355 return "601";
1356 case CPU_SUBTYPE_POWERPC_602:
1357 return "602";
1358 case CPU_SUBTYPE_POWERPC_603:
1359 return "603";
1360 case CPU_SUBTYPE_POWERPC_603e:
1361 return "603e";
1362 case CPU_SUBTYPE_POWERPC_603ev:
1363 return "603ev";
1364 case CPU_SUBTYPE_POWERPC_604:
1365 return "604";
1366 case CPU_SUBTYPE_POWERPC_604e:
1367 return "604e";
1368 case CPU_SUBTYPE_POWERPC_620:
1369 return "620";
1370 case CPU_SUBTYPE_POWERPC_750:
1371 return "750";
1372 case CPU_SUBTYPE_POWERPC_7400:
1373 return "7400";
1374 case CPU_SUBTYPE_POWERPC_7450:
1375 return "7450";
1376 case CPU_SUBTYPE_POWERPC_970:
1377 return "970";
1378 default:;
1381 return "generic";
1383 #elif defined(__linux__) && defined(__powerpc__)
1384 StringRef sys::getHostCPUName() {
1385 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1386 StringRef Content = P ? P->getBuffer() : "";
1387 return detail::getHostCPUNameForPowerPC(Content);
1389 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1390 StringRef sys::getHostCPUName() {
1391 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1392 StringRef Content = P ? P->getBuffer() : "";
1393 return detail::getHostCPUNameForARM(Content);
1395 #elif defined(__linux__) && defined(__s390x__)
1396 StringRef sys::getHostCPUName() {
1397 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1398 StringRef Content = P ? P->getBuffer() : "";
1399 return detail::getHostCPUNameForS390x(Content);
1401 #elif defined(__MVS__)
1402 StringRef sys::getHostCPUName() {
1403 // Get pointer to Communications Vector Table (CVT).
1404 // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1405 // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1406 int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1407 // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1408 // of address.
1409 int ReadValue = *StartToCVTOffset;
1410 // Explicitly clear the high order bit.
1411 ReadValue = (ReadValue & 0x7FFFFFFF);
1412 char *CVT = reinterpret_cast<char *>(ReadValue);
1413 // The model number is located in the CVT prefix at offset -6 and stored as
1414 // signless packed decimal.
1415 uint16_t Id = *(uint16_t *)&CVT[-6];
1416 // Convert number to integer.
1417 Id = decodePackedBCD<uint16_t>(Id, false);
1418 // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1419 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1420 // extension can only be used if bit CVTVEF is on.
1421 bool HaveVectorSupport = CVT[244] & 0x80;
1422 return getCPUNameFromS390Model(Id, HaveVectorSupport);
1424 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1425 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1426 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1427 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1428 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1429 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1430 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1431 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1432 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1433 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1435 StringRef sys::getHostCPUName() {
1436 uint32_t Family;
1437 size_t Length = sizeof(Family);
1438 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1440 switch (Family) {
1441 case CPUFAMILY_ARM_SWIFT:
1442 return "swift";
1443 case CPUFAMILY_ARM_CYCLONE:
1444 return "apple-a7";
1445 case CPUFAMILY_ARM_TYPHOON:
1446 return "apple-a8";
1447 case CPUFAMILY_ARM_TWISTER:
1448 return "apple-a9";
1449 case CPUFAMILY_ARM_HURRICANE:
1450 return "apple-a10";
1451 case CPUFAMILY_ARM_MONSOON_MISTRAL:
1452 return "apple-a11";
1453 case CPUFAMILY_ARM_VORTEX_TEMPEST:
1454 return "apple-a12";
1455 case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1456 return "apple-a13";
1457 case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1458 return "apple-m1";
1459 default:
1460 // Default to the newest CPU we know about.
1461 return "apple-m1";
1464 #elif defined(_AIX)
1465 StringRef sys::getHostCPUName() {
1466 switch (_system_configuration.implementation) {
1467 case POWER_4:
1468 if (_system_configuration.version == PV_4_3)
1469 return "970";
1470 return "pwr4";
1471 case POWER_5:
1472 if (_system_configuration.version == PV_5)
1473 return "pwr5";
1474 return "pwr5x";
1475 case POWER_6:
1476 if (_system_configuration.version == PV_6_Compat)
1477 return "pwr6";
1478 return "pwr6x";
1479 case POWER_7:
1480 return "pwr7";
1481 case POWER_8:
1482 return "pwr8";
1483 case POWER_9:
1484 return "pwr9";
1485 // TODO: simplify this once the macro is available in all OS levels.
1486 #ifdef POWER_10
1487 case POWER_10:
1488 #else
1489 case 0x40000:
1490 #endif
1491 return "pwr10";
1492 default:
1493 return "generic";
1496 #elif defined(__loongarch__)
1497 StringRef sys::getHostCPUName() {
1498 // Use processor id to detect cpu name.
1499 uint32_t processor_id;
1500 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
1501 switch (processor_id & 0xff00) {
1502 case 0xc000: // Loongson 64bit, 4-issue
1503 return "la464";
1504 // TODO: Others.
1505 default:
1506 break;
1508 return "generic";
1510 #elif defined(__riscv)
1511 StringRef sys::getHostCPUName() {
1512 #if defined(__linux__)
1513 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1514 StringRef Content = P ? P->getBuffer() : "";
1515 return detail::getHostCPUNameForRISCV(Content);
1516 #else
1517 #if __riscv_xlen == 64
1518 return "generic-rv64";
1519 #elif __riscv_xlen == 32
1520 return "generic-rv32";
1521 #else
1522 #error "Unhandled value of __riscv_xlen"
1523 #endif
1524 #endif
1526 #elif defined(__sparc__)
1527 #if defined(__linux__)
1528 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1529 SmallVector<StringRef> Lines;
1530 ProcCpuinfoContent.split(Lines, "\n");
1532 // Look for cpu line to determine cpu name
1533 StringRef Cpu;
1534 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1535 if (Lines[I].startswith("cpu")) {
1536 Cpu = Lines[I].substr(5).ltrim("\t :");
1537 break;
1541 return StringSwitch<const char *>(Cpu)
1542 .StartsWith("SuperSparc", "supersparc")
1543 .StartsWith("HyperSparc", "hypersparc")
1544 .StartsWith("SpitFire", "ultrasparc")
1545 .StartsWith("BlackBird", "ultrasparc")
1546 .StartsWith("Sabre", " ultrasparc")
1547 .StartsWith("Hummingbird", "ultrasparc")
1548 .StartsWith("Cheetah", "ultrasparc3")
1549 .StartsWith("Jalapeno", "ultrasparc3")
1550 .StartsWith("Jaguar", "ultrasparc3")
1551 .StartsWith("Panther", "ultrasparc3")
1552 .StartsWith("Serrano", "ultrasparc3")
1553 .StartsWith("UltraSparc T1", "niagara")
1554 .StartsWith("UltraSparc T2", "niagara2")
1555 .StartsWith("UltraSparc T3", "niagara3")
1556 .StartsWith("UltraSparc T4", "niagara4")
1557 .StartsWith("UltraSparc T5", "niagara4")
1558 .StartsWith("LEON", "leon3")
1559 // niagara7/m8 not supported by LLVM yet.
1560 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1561 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1562 .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1563 .Default("generic");
1565 #endif
1567 StringRef sys::getHostCPUName() {
1568 #if defined(__linux__)
1569 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1570 StringRef Content = P ? P->getBuffer() : "";
1571 return detail::getHostCPUNameForSPARC(Content);
1572 #elif defined(__sun__) && defined(__svr4__)
1573 char *buf = NULL;
1574 kstat_ctl_t *kc;
1575 kstat_t *ksp;
1576 kstat_named_t *brand = NULL;
1578 kc = kstat_open();
1579 if (kc != NULL) {
1580 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1581 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1582 ksp->ks_type == KSTAT_TYPE_NAMED)
1583 brand =
1584 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1585 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1586 buf = KSTAT_NAMED_STR_PTR(brand);
1588 kstat_close(kc);
1590 return StringSwitch<const char *>(buf)
1591 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1592 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1593 .Case("TMS390Z55",
1594 "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1595 .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1596 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1597 .Case("RT623", "hypersparc") // Ross hyperSPARC
1598 .Case("RT625", "hypersparc")
1599 .Case("RT626", "hypersparc")
1600 .Case("UltraSPARC-I", "ultrasparc")
1601 .Case("UltraSPARC-II", "ultrasparc")
1602 .Case("UltraSPARC-IIe", "ultrasparc")
1603 .Case("UltraSPARC-IIi", "ultrasparc")
1604 .Case("SPARC64-III", "ultrasparc")
1605 .Case("SPARC64-IV", "ultrasparc")
1606 .Case("UltraSPARC-III", "ultrasparc3")
1607 .Case("UltraSPARC-III+", "ultrasparc3")
1608 .Case("UltraSPARC-IIIi", "ultrasparc3")
1609 .Case("UltraSPARC-IIIi+", "ultrasparc3")
1610 .Case("UltraSPARC-IV", "ultrasparc3")
1611 .Case("UltraSPARC-IV+", "ultrasparc3")
1612 .Case("SPARC64-V", "ultrasparc3")
1613 .Case("SPARC64-VI", "ultrasparc3")
1614 .Case("SPARC64-VII", "ultrasparc3")
1615 .Case("UltraSPARC-T1", "niagara")
1616 .Case("UltraSPARC-T2", "niagara2")
1617 .Case("UltraSPARC-T2", "niagara2")
1618 .Case("UltraSPARC-T2+", "niagara2")
1619 .Case("SPARC-T3", "niagara3")
1620 .Case("SPARC-T4", "niagara4")
1621 .Case("SPARC-T5", "niagara4")
1622 // niagara7/m8 not supported by LLVM yet.
1623 .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1624 .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1625 .Case("SPARC-M8", "niagara4" /* "m8" */)
1626 .Default("generic");
1627 #else
1628 return "generic";
1629 #endif
1631 #else
1632 StringRef sys::getHostCPUName() { return "generic"; }
1633 namespace llvm {
1634 namespace sys {
1635 namespace detail {
1636 namespace x86 {
1638 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1639 return VendorSignatures::UNKNOWN;
1642 } // namespace x86
1643 } // namespace detail
1644 } // namespace sys
1645 } // namespace llvm
1646 #endif
1648 #if defined(__i386__) || defined(_M_IX86) || \
1649 defined(__x86_64__) || defined(_M_X64)
1650 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1651 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1652 unsigned MaxLevel;
1654 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1655 return false;
1657 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1659 Features["cx8"] = (EDX >> 8) & 1;
1660 Features["cmov"] = (EDX >> 15) & 1;
1661 Features["mmx"] = (EDX >> 23) & 1;
1662 Features["fxsr"] = (EDX >> 24) & 1;
1663 Features["sse"] = (EDX >> 25) & 1;
1664 Features["sse2"] = (EDX >> 26) & 1;
1666 Features["sse3"] = (ECX >> 0) & 1;
1667 Features["pclmul"] = (ECX >> 1) & 1;
1668 Features["ssse3"] = (ECX >> 9) & 1;
1669 Features["cx16"] = (ECX >> 13) & 1;
1670 Features["sse4.1"] = (ECX >> 19) & 1;
1671 Features["sse4.2"] = (ECX >> 20) & 1;
1672 Features["crc32"] = Features["sse4.2"];
1673 Features["movbe"] = (ECX >> 22) & 1;
1674 Features["popcnt"] = (ECX >> 23) & 1;
1675 Features["aes"] = (ECX >> 25) & 1;
1676 Features["rdrnd"] = (ECX >> 30) & 1;
1678 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1679 // indicates that the AVX registers will be saved and restored on context
1680 // switch, then we have full AVX support.
1681 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1682 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1683 #if defined(__APPLE__)
1684 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1685 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1686 // set right now.
1687 bool HasAVX512Save = true;
1688 #else
1689 // AVX512 requires additional context to be saved by the OS.
1690 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1691 #endif
1692 // AMX requires additional context to be saved by the OS.
1693 const unsigned AMXBits = (1 << 17) | (1 << 18);
1694 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1696 Features["avx"] = HasAVXSave;
1697 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
1698 // Only enable XSAVE if OS has enabled support for saving YMM state.
1699 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1700 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave;
1702 unsigned MaxExtLevel;
1703 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1705 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1706 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1707 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1);
1708 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
1709 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
1710 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
1711 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1712 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
1713 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1714 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
1715 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1717 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);
1719 // Miscellaneous memory related features, detected by
1720 // using the 0x80000008 leaf of the CPUID instruction
1721 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1722 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1723 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
1724 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1);
1725 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1727 bool HasLeaf7 =
1728 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1730 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
1731 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
1732 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
1733 // AVX2 is only supported if we have the OS save support from AVX.
1734 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave;
1735 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
1736 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
1737 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
1738 // AVX512 is only supported if the OS supports the context save for it.
1739 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1740 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1741 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
1742 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
1743 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1744 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1745 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
1746 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1747 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1748 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1749 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
1750 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1751 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1753 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
1754 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
1755 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
1756 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
1757 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
1758 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
1759 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
1760 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
1761 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1762 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1763 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1764 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1765 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
1766 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1767 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
1768 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
1769 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
1770 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
1772 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
1773 Features["avx512vp2intersect"] =
1774 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1775 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
1776 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1);
1777 // There are two CPUID leafs which information associated with the pconfig
1778 // instruction:
1779 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1780 // bit of EDX), while the EAX=0x1b leaf returns information on the
1781 // availability of specific pconfig leafs.
1782 // The target feature here only refers to the the first of these two.
1783 // Users might need to check for the availability of specific pconfig
1784 // leaves using cpuid, since that information is ignored while
1785 // detecting features using the "-march=native" flag.
1786 // For more info, see X86 ISA docs.
1787 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1788 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1789 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1790 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1791 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1792 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1793 // return all 0s for invalid subleaves so check the limit.
1794 bool HasLeaf7Subleaf1 =
1795 HasLeaf7 && EAX >= 1 &&
1796 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1797 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
1798 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
1799 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
1800 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1801 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1802 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1803 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1804 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1805 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1806 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1807 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1808 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1809 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
1810 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
1811 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1812 Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
1813 Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
1815 bool HasLeafD = MaxLevel >= 0xd &&
1816 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1818 // Only enable XSAVE if OS has enabled support for saving YMM state.
1819 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1820 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1821 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1823 bool HasLeaf14 = MaxLevel >= 0x14 &&
1824 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1826 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1828 bool HasLeaf19 =
1829 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1830 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1832 bool HasLeaf24 =
1833 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1834 Features["avx10.1-512"] =
1835 Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
1837 return true;
1839 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1840 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1841 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1842 if (!P)
1843 return false;
1845 SmallVector<StringRef, 32> Lines;
1846 P->getBuffer().split(Lines, "\n");
1848 SmallVector<StringRef, 32> CPUFeatures;
1850 // Look for the CPU features.
1851 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1852 if (Lines[I].startswith("Features")) {
1853 Lines[I].split(CPUFeatures, ' ');
1854 break;
1857 #if defined(__aarch64__)
1858 // Keep track of which crypto features we have seen
1859 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1860 uint32_t crypto = 0;
1861 #endif
1863 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1864 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1865 #if defined(__aarch64__)
1866 .Case("asimd", "neon")
1867 .Case("fp", "fp-armv8")
1868 .Case("crc32", "crc")
1869 .Case("atomics", "lse")
1870 .Case("sve", "sve")
1871 .Case("sve2", "sve2")
1872 #else
1873 .Case("half", "fp16")
1874 .Case("neon", "neon")
1875 .Case("vfpv3", "vfp3")
1876 .Case("vfpv3d16", "vfp3d16")
1877 .Case("vfpv4", "vfp4")
1878 .Case("idiva", "hwdiv-arm")
1879 .Case("idivt", "hwdiv")
1880 #endif
1881 .Default("");
1883 #if defined(__aarch64__)
1884 // We need to check crypto separately since we need all of the crypto
1885 // extensions to enable the subtarget feature
1886 if (CPUFeatures[I] == "aes")
1887 crypto |= CAP_AES;
1888 else if (CPUFeatures[I] == "pmull")
1889 crypto |= CAP_PMULL;
1890 else if (CPUFeatures[I] == "sha1")
1891 crypto |= CAP_SHA1;
1892 else if (CPUFeatures[I] == "sha2")
1893 crypto |= CAP_SHA2;
1894 #endif
1896 if (LLVMFeatureStr != "")
1897 Features[LLVMFeatureStr] = true;
1900 #if defined(__aarch64__)
1901 // If we have all crypto bits we can add the feature
1902 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1903 Features["crypto"] = true;
1904 #endif
1906 return true;
1908 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1909 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1910 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1911 Features["neon"] = true;
1912 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1913 Features["crc"] = true;
1914 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1915 Features["crypto"] = true;
1917 return true;
1919 #elif defined(__linux__) && defined(__loongarch__)
1920 #include <sys/auxv.h>
1921 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1922 unsigned long hwcap = getauxval(AT_HWCAP);
1923 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
1924 uint32_t cpucfg2 = 0x2;
1925 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
1927 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
1928 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
1930 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX
1931 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
1932 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ
1934 return true;
1936 #else
1937 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1938 #endif
1940 #if __APPLE__
1941 /// \returns the \p triple, but with the Host's arch spliced in.
1942 static Triple withHostArch(Triple T) {
1943 #if defined(__arm__)
1944 T.setArch(Triple::arm);
1945 T.setArchName("arm");
1946 #elif defined(__arm64e__)
1947 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
1948 T.setArchName("arm64e");
1949 #elif defined(__aarch64__)
1950 T.setArch(Triple::aarch64);
1951 T.setArchName("arm64");
1952 #elif defined(__x86_64h__)
1953 T.setArch(Triple::x86_64);
1954 T.setArchName("x86_64h");
1955 #elif defined(__x86_64__)
1956 T.setArch(Triple::x86_64);
1957 T.setArchName("x86_64");
1958 #elif defined(__i386__)
1959 T.setArch(Triple::x86);
1960 T.setArchName("i386");
1961 #elif defined(__powerpc__)
1962 T.setArch(Triple::ppc);
1963 T.setArchName("powerpc");
1964 #else
1965 # error "Unimplemented host arch fixup"
1966 #endif
1967 return T;
1969 #endif
1971 std::string sys::getProcessTriple() {
1972 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1973 Triple PT(Triple::normalize(TargetTripleString));
1975 #if __APPLE__
1976 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
1977 /// the slices. This fixes that up.
1978 PT = withHostArch(PT);
1979 #endif
1981 if (sizeof(void *) == 8 && PT.isArch32Bit())
1982 PT = PT.get64BitArchVariant();
1983 if (sizeof(void *) == 4 && PT.isArch64Bit())
1984 PT = PT.get32BitArchVariant();
1986 return PT.str();
1989 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
1990 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
1991 std::string CPU = std::string(sys::getHostCPUName());
1992 if (CPU == "generic")
1993 CPU = "(unknown)";
1994 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n'
1995 << " Host CPU: " << CPU << '\n';
1996 #endif