2 * Identify the characteristics of the host CPU, providing information
3 * about cache sizes and assembly optimisation hints. This module is
4 * provided primarily for assembly language programmers.
7 * Some of this information was extremely difficult to track down. Some of the
8 * documents below were found only in cached versions stored by search engines!
9 * This code relies on information found in:
12 * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13 * Volume 2A: Instruction Set Reference, A-M" (2007).
15 * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
17 * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18 * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
20 * $(LI "AMD Geode(TM) GX Processors Data Book",
21 * Advanced Micro Devices, Publication ID 31505E, (2005).
23 * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
25 * $(LI "Application note 106: Software Customization for the 6x86 Family",
26 * Cyrix Corporation, Rev 1.5 (1998)
28 * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29 * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30 * National Semiconductor, (2002)
32 * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
34 * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35 * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36 * $(LI "What every programmer should know about memory",
37 * Ulrich Depper, Red Hat, Inc., (2007).
39 * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40 * $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
42 * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
47 * Bugs: Currently only works on x86 and Itanium CPUs.
48 * Many processors have bugs in their microcode for the CPUID instruction,
49 * so sometimes the cache information may be incorrect.
51 * Copyright: Copyright Don Clugston 2007 - 2009.
52 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53 * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54 * Source: $(DRUNTIMESRC core/_cpuid.d)
59 version (GNU
) version = GNU_OR_LDC
;
60 version (LDC
) version = GNU_OR_LDC
;
66 // If optimizing for a particular processor, it is generally better
67 // to identify based on features rather than model. NOTE: Normally
68 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
69 // with a backup for other CPUs.
70 // Pentium -- preferPentium1()
74 // PIII -- + mmx() + sse()
75 // PentiumM -- + mmx() + sse() + sse2()
76 // Pentium4 -- preferPentium4()
77 // PentiumD -- + isX86_64()
78 // Core2 -- default + isX86_64()
79 // AMD K5 -- preferPentium1()
81 // AMD K6-II -- + mmx() + 3dnow()
82 // AMD K7 -- preferAthlon()
84 // AMD K10 -- + isX86_64()
85 // Cyrix 6x86 -- preferPentium1()
88 // GDC support uses extended inline assembly:
89 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html (general information and hints)
90 // https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html (binding variables to registers)
91 // https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
95 /// Cache size and behaviour
98 /// Size of the cache, in kilobytes, per CPU.
99 /// For L1 unified (data + code) caches, this size is half the physical size.
100 /// (we don't halve it for larger sizes, since normally
101 /// data size is much greater than code size for critical loops).
103 /// Number of ways of associativity, eg:
105 /// $(LI 1 = direct mapped)
106 /// $(LI 2 = 2-way set associative)
107 /// $(LI 3 = 3-way set associative)
108 /// $(LI ubyte.max = fully associative)
111 /// Number of bytes read into the cache when a cache miss occurs.
116 /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
117 // Note: When we deprecate it, we simply make it private.
118 __gshared CacheInfo
[5] datacache
;
122 /// The data caches. If there are fewer than 5 physical caches levels,
123 /// the remaining levels are set to size_t.max (== entire memory space)
124 const(CacheInfo
)[5] dataCaches() { return _dataCaches
; }
126 /// Returns vendor string, for display purposes only.
127 /// Do NOT use this to determine features!
128 /// Note that some CPUs have programmable vendorIDs.
129 string
vendor() {return _vendor
;}
130 /// Returns processor string, for display purposes only
131 string
processor() {return _processor
;}
133 /// Does it have an x87 FPU on-chip?
134 bool x87onChip() {return _x87onChip
;}
135 /// Is MMX supported?
136 bool mmx() {return _mmx
;}
137 /// Is SSE supported?
138 bool sse() {return _sse
;}
139 /// Is SSE2 supported?
140 bool sse2() {return _sse2
;}
141 /// Is SSE3 supported?
142 bool sse3() {return _sse3
;}
143 /// Is SSSE3 supported?
144 bool ssse3() {return _ssse3
;}
145 /// Is SSE4.1 supported?
146 bool sse41() {return _sse41
;}
147 /// Is SSE4.2 supported?
148 bool sse42() {return _sse42
;}
149 /// Is SSE4a supported?
150 bool sse4a() {return _sse4a
;}
152 bool aes() {return _aes
;}
153 /// Is pclmulqdq supported
154 bool hasPclmulqdq() {return _hasPclmulqdq
;}
155 /// Is rdrand supported
156 bool hasRdrand() {return _hasRdrand
;}
158 bool avx() {return _avx
;}
159 /// Is VEX-Encoded AES supported
160 bool vaes() {return _vaes
;}
161 /// Is vpclmulqdq supported
162 bool hasVpclmulqdq(){return _hasVpclmulqdq
; }
164 bool fma() {return _fma
;}
165 /// Is FP16C supported
166 bool fp16c() {return _fp16c
;}
167 /// Is AVX2 supported
168 bool avx2() {return _avx2
;}
169 /// Is HLE (hardware lock elision) supported
170 bool hle() {return _hle
;}
171 /// Is RTM (restricted transactional memory) supported
172 bool rtm() {return _rtm
;}
173 /// Is AVX512F supported
174 bool avx512f() {return _avx512f
;}
175 /// Is rdseed supported
176 bool hasRdseed() {return _hasRdseed
;}
178 bool hasSha() {return _hasSha
;}
179 /// Is AMD 3DNOW supported?
180 bool amd3dnow() {return _amd3dnow
;}
181 /// Is AMD 3DNOW Ext supported?
182 bool amd3dnowExt() {return _amd3dnowExt
;}
183 /// Are AMD extensions to MMX supported?
184 bool amdMmx() {return _amdMmx
;}
185 /// Is fxsave/fxrstor supported?
186 bool hasFxsr() {return _hasFxsr
;}
187 /// Is cmov supported?
188 bool hasCmov() {return _hasCmov
;}
189 /// Is rdtsc supported?
190 bool hasRdtsc() {return _hasRdtsc
;}
191 /// Is cmpxchg8b supported?
192 bool hasCmpxchg8b() {return _hasCmpxchg8b
;}
193 /// Is cmpxchg8b supported?
194 bool hasCmpxchg16b() {return _hasCmpxchg16b
;}
195 /// Is SYSENTER/SYSEXIT supported?
196 bool hasSysEnterSysExit() {return _hasSysEnterSysExit
;}
197 /// Is 3DNow prefetch supported?
198 bool has3dnowPrefetch() {return _has3dnowPrefetch
;}
199 /// Are LAHF and SAHF supported in 64-bit mode?
200 bool hasLahfSahf() {return _hasLahfSahf
;}
201 /// Is POPCNT supported?
202 bool hasPopcnt() {return _hasPopcnt
;}
203 /// Is LZCNT supported?
204 bool hasLzcnt() {return _hasLzcnt
;}
205 /// Is this an Intel64 or AMD 64?
206 bool isX86_64() {return _isX86_64
;}
208 /// Is this an IA64 (Itanium) processor?
209 bool isItanium() { return _isItanium
; }
211 /// Is hyperthreading supported?
212 bool hyperThreading() { return _hyperThreading
; }
213 /// Returns number of threads per CPU
214 uint threadsPerCPU() {return _threadsPerCPU
;}
215 /// Returns number of cores in CPU
216 uint coresPerCPU() {return _coresPerCPU
;}
218 /// Optimisation hints for assembly code.
220 /// For forward compatibility, the CPU is compared against different
221 /// microarchitectures. For 32-bit x86, comparisons are made against
222 /// the Intel PPro/PII/PIII/PM family.
224 /// The major 32-bit x86 microarchitecture 'dynasties' have been:
227 /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
228 /// $(LI AMD Athlon (K7, K8, K10). )
229 /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
230 /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
233 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
234 /// Cyrix, Rise) were mostly in-order.
236 /// Some new processors do not fit into the existing categories:
239 /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
240 /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
243 /// Within each dynasty, the optimisation techniques are largely
244 /// identical (eg, use instruction pairing for group 4). Major
245 /// instruction set improvements occur within each dynasty.
247 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
248 bool preferAthlon() { return _preferAthlon
; }
249 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
250 bool preferPentium4() { return _preferPentium4
; }
251 /// Does this CPU perform better on Pentium I code than Pentium Pro code?
252 bool preferPentium1() { return _preferPentium1
; }
257 /* These exist as immutables so that the query property functions can
258 * be backwards compatible with code that called them with ().
259 * Also, immutables can only be set by the static this().
261 const(CacheInfo
)[5] _dataCaches
;
295 bool _hasSysEnterSysExit
;
296 bool _has3dnowPrefetch
;
302 bool _hyperThreading
;
306 bool _preferPentium4
;
307 bool _preferPentium1
;
311 // All these values are set only once, and never subsequently modified.
313 /// $(RED Warning: This field will be turned into a property in a future release.)
315 /// Processor type (vendor-dependent).
316 /// This should be visible ONLY for display purposes.
317 uint stepping
, model
, family
;
318 /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
319 uint numCacheLevels
= 1;
320 /// The number of cache levels in the CPU.
321 @property uint cacheLevels() { return numCacheLevels
; }
326 bool probablyIntel
; // true = _probably_ an Intel processor, might be faking
327 bool probablyAMD
; // true = _probably_ an AMD or Hygon processor
328 string processorName
;
329 char [12] vendorID
= 0;
330 char [48] processorNameBuffer
= 0;
331 uint features
= 0; // mmx, sse, sse2, hyperthreading, etc
332 uint miscfeatures
= 0; // sse3, etc.
333 uint extfeatures
= 0; // HLE, AVX2, RTM, etc.
334 uint amdfeatures
= 0; // 3DNow!, mmxext, etc
335 uint amdmiscfeatures
= 0; // sse4a, sse5, svm, etc
336 ulong xfeatures
= 0; // XFEATURES_ENABLED_MASK
341 CpuFeatures cpuFeatures
;
343 /* Hide from the optimizer where cf (a register) is coming from, so that
344 * cf doesn't get "optimized away". The idea is to reference
345 * the global data through cf so not so many fixups are inserted
346 * into the executable image.
348 CpuFeatures
* getCpuFeatures() @nogc nothrow
350 pragma(inline
, false);
354 // Note that this may indicate multi-core rather than hyperthreading.
355 @property bool hyperThreadingBit() { return (cpuFeatures
.features
&HTT_BIT
)!=0;}
357 // feature flags CPUID1_EDX
361 TIMESTAMP_BIT
= 1<<4, // rdtsc
362 MDSR_BIT
= 1<<5, // RDMSR/WRMSR
363 CMPXCHG8B_BIT
= 1<<8,
364 SYSENTERSYSEXIT_BIT
= 1<<11,
373 // feature flags misc CPUID1_ECX
377 PCLMULQDQ_BIT
= 1<<1, // from AVX
380 FMA_BIT
= 1<<12, // from AVX
381 CMPXCHG16B_BIT
= 1<<13,
385 AES_BIT
= 1<<25, // AES instructions from AVX
386 OSXSAVE_BIT
= 1<<27, // Used for AVX
391 // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
394 FSGSBASE_BIT
= 1 << 0,
402 INVPCID_BIT
= 1 << 10,
404 AVX512F_BIT
= 1 << 16,
405 AVX512DQ_BIT
= 1 << 17,
406 RDSEED_BIT
= 1 << 18,
408 AVX512IFMA_BIT
= 1 << 21,
409 CLFLUSHOPT_BIT
= 1 << 23,
411 AVX512PF_BIT
= 1 << 26,
412 AVX512ER_BIT
= 1 << 27,
413 AVX512CD_BIT
= 1 << 28,
415 AVX512BW_BIT
= 1 << 30,
416 AVX512VL_BIT
= 1 << 31,
418 // feature flags XFEATURES_ENABLED_MASK
425 // AMD feature flags CPUID80000001_EDX
429 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
431 PAGE1GB_BIT
= 1<<26, // support for 1GB pages
434 AMD_3DNOW_EXT_BIT
= 1<<30,
435 AMD_3DNOW_BIT
= 1<<31
437 // AMD misc feature flags CPUID80000001_ECX
443 AMD_3DNOW_PREFETCH_BIT
= 1<<8,
447 version (GNU_OR_LDC
) {
449 enum supportedX86
= true;
450 else version (X86_64
)
451 enum supportedX86
= true;
453 enum supportedX86
= false;
454 } else version (D_InlineAsm_X86
) {
455 enum supportedX86
= true;
456 } else version (D_InlineAsm_X86_64
) {
457 enum supportedX86
= true;
459 enum supportedX86
= false;
462 static if (supportedX86
) {
463 // Note that this code will also work for Itanium in x86 mode.
465 __gshared
uint max_cpuid
, max_extended_cpuid
;
467 // CPUID2: "cache and tlb information"
468 void getcacheinfoCPUID2()
470 // We are only interested in the data caches
471 void decipherCpuid2(ubyte x
) @nogc nothrow {
473 // Values from http://www.sandpile.org/ia32/cpuid.htm.
474 // Includes Itanium and non-Intel CPUs.
476 static immutable ubyte [63] ids
= [
477 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
479 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
480 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
481 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
483 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
485 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
486 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
488 static immutable uint [63] sizes
= [
489 8, 16, 16, 64, 16, 24, 8, 16, 32,
490 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
491 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
492 128, 192, 128, 256, 384, 512, 3072, 512, 128,
493 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
495 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
496 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
498 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
499 static immutable ubyte [63] ways
= [
500 2, 4, 4, 8, 8, 6, 4, 4, 4,
501 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
502 8, 8, 8, 8, 4, 8, 16, 24,
503 4, 6, 2, 4, 6, 4, 12, 8, 8,
504 4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
505 4, 4, 4, 8, 8, 8, 12, 12, 12,
506 16, 16, 16, 24, 24, 24
508 enum { FIRSTDATA2
= 8, FIRSTDATA3
= 28+9 }
509 for (size_t i
=0; i
< ids
.length
; ++i
) {
511 int level
= i
< FIRSTDATA2 ?
0: i
<FIRSTDATA3 ?
1 : 2;
512 if (x
==0x49 && family
==0xF && model
==0x6) level
=2;
513 datacache
[level
].size
=sizes
[i
];
514 datacache
[level
].associativity
=ways
[i
];
515 if (level
== 3 || x
==0x2C || x
==0x0D ||
(x
>=0x48 && x
<=0x80)
516 || x
==0x86 || x
==0x87
517 ||
(x
>=0x66 && x
<=0x68) ||
(x
>=0x39 && x
<=0x3E)){
518 datacache
[level
].lineSize
= 64;
519 } else datacache
[level
].lineSize
= 32;
525 bool firstTime
= true;
526 // On a multi-core system, this could theoretically fail, but it's only used
527 // for old single-core CPUs.
530 version (GNU_OR_LDC
) asm pure nothrow @nogc {
531 "cpuid" : "=a" (a
[0]), "=b" (a
[1]), "=c" (a
[2]), "=d" (a
[3]) : "a" (2);
532 } else asm pure nothrow @nogc {
541 if (a
[0]==0x0000_7001 && a
[3]==0x80 && a
[1]==0 && a
[2]==0) {
542 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
543 // These are NOT standard Intel values
544 // (TLB = 32 entry, 4 way associative, 4K pages)
545 // (L1 cache = 16K, 4way, linesize16)
547 datacache
[0].associativity
=4;
548 datacache
[0].lineSize
=16;
551 // lsb of a is how many times to loop.
552 numinfos
= a
[0] & 0xFF;
553 // and otherwise it should be ignored
557 for (int c
=0; c
<4;++c
) {
558 // high bit set == no info.
559 if (a
[c
] & 0x8000_0000) continue;
560 decipherCpuid2(cast(ubyte)(a
[c
] & 0xFF));
561 decipherCpuid2(cast(ubyte)((a
[c
]>>8) & 0xFF));
562 decipherCpuid2(cast(ubyte)((a
[c
]>>16) & 0xFF));
563 decipherCpuid2(cast(ubyte)((a
[c
]>>24) & 0xFF));
565 } while (--numinfos
);
568 // CPUID4: "Deterministic cache parameters" leaf
569 void getcacheinfoCPUID4()
573 uint a
, b
, number_of_sets
;
574 version (GNU_OR_LDC
) asm pure nothrow @nogc {
575 "cpuid" : "=a" (a
), "=b" (b
), "=c" (number_of_sets
) : "a" (4), "c" (cachenum
) : "edx";
576 } else asm pure nothrow @nogc {
582 mov number_of_sets
, ECX
;
585 if ((a
&0x1F)==0) break; // no more caches
586 immutable uint numthreads
= ((a
>>14) & 0xFFF) + 1;
587 immutable uint numcores
= ((a
>>26) & 0x3F) + 1;
588 if (numcores
> cpuFeatures
.maxCores
) cpuFeatures
.maxCores
= numcores
;
589 if ((a
&0x1F)!=1 && ((a
&0x1F)!=3)) continue; // we only want data & unified caches
592 immutable ubyte level
= cast(ubyte)(((a
>>5)&7)-1);
593 if (level
> datacache
.length
) continue; // ignore deep caches
594 datacache
[level
].associativity
= a
& 0x200 ?
ubyte.max
:cast(ubyte)((b
>>22)+1);
595 datacache
[level
].lineSize
= (b
& 0xFFF)+ 1; // system coherency line size
596 immutable uint line_partitions
= ((b
>> 12)& 0x3FF) + 1;
597 // Size = number of sets * associativity * cachelinesize * linepartitions
598 // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
599 immutable ulong sz
= (datacache
[level
].associativity
< ubyte.max
)? number_of_sets
*
600 datacache
[level
].associativity
: number_of_sets
;
601 datacache
[level
].size
= cast(size_t
)(
602 (sz
* datacache
[level
].lineSize
* line_partitions
) / (numthreads
*1024));
603 if (level
== 0 && (a
&0xF)==3) {
604 // Halve the size for unified L1 caches
605 datacache
[level
].size
/=2;
610 // CPUID8000_0005 & 6
611 void getAMDcacheinfo()
613 uint dummy
, c5
, c6
, d6
;
614 version (GNU_OR_LDC
) asm pure nothrow @nogc {
615 "cpuid" : "=a" (dummy
), "=c" (c5
) : "a" (0x8000_0005) : "ebx", "edx";
616 } else asm pure nothrow @nogc {
617 mov EAX
, 0x8000_0005; // L1 cache
619 // EAX has L1_TLB_4M.
621 // EDX has L1 instruction cache
625 datacache
[0].size
= ( (c5
>>24) & 0xFF);
626 datacache
[0].associativity
= cast(ubyte)( (c5
>> 16) & 0xFF);
627 datacache
[0].lineSize
= c5
& 0xFF;
629 if (max_extended_cpuid
>= 0x8000_0006) {
630 // AMD K6-III or K6-2+ or later.
632 if (max_extended_cpuid
>= 0x8000_0008) {
633 // read the number of physical cores (minus 1) from the 8 lowest ECX bits
634 version (GNU_OR_LDC
) asm pure nothrow @nogc {
635 "cpuid" : "=a" (dummy
), "=c" (numcores
) : "a" (0x8000_0008) : "ebx", "edx";
636 } else asm pure nothrow @nogc {
637 mov EAX
, 0x8000_0008;
641 numcores
= (numcores
& 0xFF) + 1;
642 if (numcores
>cpuFeatures
.maxCores
) cpuFeatures
.maxCores
= numcores
;
645 version (GNU_OR_LDC
) asm pure nothrow @nogc {
646 "cpuid" : "=a" (dummy
), "=c" (c6
), "=d" (d6
) : "a" (0x8000_0006) : "ebx";
647 } else asm pure nothrow @nogc {
648 mov EAX
, 0x8000_0006; // L2/L3 cache
650 mov c6
, ECX
; // L2 cache info
651 mov d6
, EDX
; // L3 cache info
654 static immutable ubyte [] assocmap
= [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
655 datacache
[1].size
= (c6
>>16) & 0xFFFF;
656 datacache
[1].associativity
= assocmap
[(c6
>>12)&0xF];
657 datacache
[1].lineSize
= c6
& 0xFF;
659 // The L3 cache value is TOTAL, not per core.
660 datacache
[2].size
= ((d6
>>18)*512)/numcores
; // could be up to 2 * this, -1.
661 datacache
[2].associativity
= assocmap
[(d6
>>12)&0xF];
662 datacache
[2].lineSize
= d6
& 0xFF;
666 // For Intel CoreI7 and later, use function 0x0B
667 // to determine number of processors.
672 // I'm not sure about this. The docs state that there
673 // are 2 hyperthreads per core if HT is factory enabled.
674 for (int level
= 0; level
< 2; level
++)
676 version (GNU_OR_LDC
) asm pure nothrow @nogc {
677 "cpuid" : "=a" (a
), "=b" (b
), "=c" (c
), "=d" (d
) : "a" (0x0B), "c" (level
);
678 } else asm pure nothrow @nogc {
690 threadsPerCore
= b
& 0xFFFF;
693 cpuFeatures
.maxThreads
= b
& 0xFFFF;
694 cpuFeatures
.maxCores
= cpuFeatures
.maxThreads
/ threadsPerCore
;
697 // Got "invalid domain" returned from cpuid
698 if (a
== 0 && b
== 0)
705 auto cf
= getCpuFeatures();
708 uint* venptr
= cast(uint*)cf
.vendorID
.ptr
;
711 asm pure nothrow @nogc {
712 "cpuid" : "=a" (max_cpuid
), "=b" (venptr
[0]), "=d" (venptr
[1]), "=c" (venptr
[2]) : "a" (0);
713 "cpuid" : "=a" (max_extended_cpuid
) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
719 version (D_InlineAsm_X86
)
721 asm pure nothrow @nogc {
731 else version (D_InlineAsm_X86_64
)
733 asm pure nothrow @nogc {
743 asm pure nothrow @nogc {
744 mov EAX
, 0x8000_0000;
749 max_extended_cpuid
= a2
;
753 cf
.probablyIntel
= cf
.vendorID
== "GenuineIntel";
754 cf
.probablyAMD
= (cf
.vendorID
== "AuthenticAMD" || cf
.vendorID
== "HygonGenuine");
755 uint apic
= 0; // brand index, apic id
756 version (GNU_OR_LDC
) asm pure nothrow @nogc {
757 "cpuid" : "=a" (a
), "=b" (apic
), "=c" (cf
.miscfeatures
), "=d" (cf
.features
) : "a" (1);
759 asm pure nothrow @nogc {
760 mov EAX
, 1; // model, stepping
771 immutable uint fbase
= (a
>> 8) & 0xF;
772 immutable uint mbase
= (a
>> 4) & 0xF;
773 family
= ((fbase
== 0xF) ||
(fbase
== 0)) ? fbase
+ (a
>> 20) & 0xFF : fbase
;
774 model
= ((fbase
== 0xF) ||
(fbase
== 6 && cf
.probablyIntel
) ) ?
775 mbase
+ ((a
>> 12) & 0xF0) : mbase
;
779 version (GNU_OR_LDC
) asm pure nothrow @nogc {
780 "cpuid" : "=a" (a
), "=b" (cf
.extfeatures
), "=c" (c
) : "a" (7), "c" (0) : "edx";
783 asm pure nothrow @nogc {
784 mov EAX
, 7; // Structured extended feature leaf.
785 mov ECX
, 0; // Main leaf.
787 mov ext
, EBX
; // HLE, AVX2, RTM, etc.
789 cf
.extfeatures
= ext
;
793 if (cf
.miscfeatures
& OSXSAVE_BIT
)
795 version (GNU_OR_LDC
) asm pure nothrow @nogc {
796 /* Old assemblers do not recognize xgetbv, and there is no easy way
797 * to conditionally compile based on the assembler used, so use the
798 * raw .byte sequence instead. */
799 ".byte 0x0f, 0x01, 0xd0" : "=a" (a
), "=d" (d
) : "c" (0);
800 } else asm pure nothrow @nogc {
806 cf
.xfeatures
= cast(ulong)d
<< 32 | a
;
810 cf
.amdmiscfeatures
= 0;
811 if (max_extended_cpuid
>= 0x8000_0001) {
812 version (GNU_OR_LDC
) asm pure nothrow @nogc {
813 "cpuid" : "=a" (a
), "=c" (cf
.amdmiscfeatures
), "=d" (cf
.amdfeatures
) : "a" (0x8000_0001) : "ebx";
815 asm pure nothrow @nogc {
816 mov EAX
, 0x8000_0001;
821 cf
.amdmiscfeatures
= c
;
825 // Try to detect fraudulent vendorIDs
826 if (amd3dnow
) cf
.probablyIntel
= false;
828 if (!cf
.probablyIntel
&& max_extended_cpuid
>= 0x8000_0008) {
829 //http://support.amd.com/TechDocs/25481.pdf pg.36
831 if (hyperThreadingBit
) {
832 // determine max number of cores for AMD
833 version (GNU_OR_LDC
) asm pure nothrow @nogc {
834 "cpuid" : "=a" (a
), "=c" (c
) : "a" (0x8000_0008) : "ebx", "edx";
835 } else asm pure nothrow @nogc {
836 mov EAX
, 0x8000_0008;
840 cf
.maxCores
+= c
& 0xFF;
844 if (max_extended_cpuid
>= 0x8000_0004) {
845 uint* pnb
= cast(uint*)cf
.processorNameBuffer
.ptr
;
848 asm pure nothrow @nogc {
849 "cpuid" : "=a" (pnb
[0]), "=b" (pnb
[1]), "=c" (pnb
[ 2]), "=d" (pnb
[ 3]) : "a" (0x8000_0002);
850 "cpuid" : "=a" (pnb
[4]), "=b" (pnb
[5]), "=c" (pnb
[ 6]), "=d" (pnb
[ 7]) : "a" (0x8000_0003);
851 "cpuid" : "=a" (pnb
[8]), "=b" (pnb
[9]), "=c" (pnb
[10]), "=d" (pnb
[11]) : "a" (0x8000_0004);
854 else version (D_InlineAsm_X86
)
856 asm pure nothrow @nogc {
859 mov EAX
, 0x8000_0002;
865 mov EAX
, 0x8000_0003;
871 mov EAX
, 0x8000_0004;
880 else version (D_InlineAsm_X86_64
)
882 asm pure nothrow @nogc {
885 mov EAX
, 0x8000_0002;
891 mov EAX
, 0x8000_0003;
897 mov EAX
, 0x8000_0004;
906 // Intel P4 and PM pad at front with spaces.
907 // Other CPUs pad at end with nulls.
908 int start
= 0, end
= 0;
909 while (cf
.processorNameBuffer
[start
] == ' ') { ++start
; }
910 while (cf
.processorNameBuffer
[cf
.processorNameBuffer
.length
-end
-1] == 0) { ++end
; }
911 cf
.processorName
= cast(string
)(cf
.processorNameBuffer
[start
..$-end
]);
913 cf
.processorName
= "Unknown CPU";
915 // Determine cache sizes
917 // Intel docs specify that they return 0 for 0x8000_0005.
918 // AMD docs do not specify the behaviour for 0004 and 0002.
919 // Centaur/VIA and most other manufacturers use the AMD method,
920 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
921 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
922 // for CPUID80000005. But Geode GX uses the AMD method
924 // Deal with Geode GX1 - make it same as MediaGX MMX.
925 if (max_extended_cpuid
==0x8000_0005 && max_cpuid
==2) {
926 max_extended_cpuid
= 0x8000_0004;
928 // Therefore, we try the AMD method unless it's an Intel chip.
929 // If we still have no info, try the Intel methods.
930 datacache
[0].size
= 0;
931 if (max_cpuid
<2 ||
!cf
.probablyIntel
) {
932 if (max_extended_cpuid
>= 0x8000_0005) {
934 } else if (cf
.probablyAMD
) {
935 // According to AMDProcRecognitionAppNote, this means CPU
936 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
937 // Am5x86 has 16Kb 4-way unified data & code cache.
938 datacache
[0].size
= 8;
939 datacache
[0].associativity
= 4;
940 datacache
[0].lineSize
= 32;
943 // Values for Cyrix 6x86MX (family 6, model 0)
944 datacache
[0].size
= 64;
945 datacache
[0].associativity
= 4;
946 datacache
[0].lineSize
= 32;
949 if ((datacache
[0].size
== 0) && max_cpuid
>=4) {
950 getcacheinfoCPUID4();
952 if ((datacache
[0].size
== 0) && max_cpuid
>=2) {
953 getcacheinfoCPUID2();
955 if (datacache
[0].size
== 0) {
956 // Pentium, PMMX, late model 486, or an obscure CPU
957 if (mmx
) { // Pentium MMX. Also has 8kB code cache.
958 datacache
[0].size
= 16;
959 datacache
[0].associativity
= 4;
960 datacache
[0].lineSize
= 32;
961 } else { // Pentium 1 (which also has 8kB code cache)
963 // Cyrix 6x86: 16, 4way, 32 linesize
964 datacache
[0].size
= 8;
965 datacache
[0].associativity
= 2;
966 datacache
[0].lineSize
= 32;
969 if (cf
.probablyIntel
&& max_cpuid
>= 0x0B) {
970 // For Intel i7 and later, use function 0x0B to determine
971 // cores and hyperthreads.
974 if (hyperThreadingBit
) cf
.maxThreads
= (apic
>>>16) & 0xFF;
975 else cf
.maxThreads
= cf
.maxCores
;
977 if (cf
.probablyAMD
&& max_extended_cpuid
>= 0x8000_001E
) {
978 version (GNU_OR_LDC
) asm pure nothrow @nogc {
979 "cpuid" : "=a" (a
), "=b" (b
) : "a" (0x8000_001E
) : "ecx", "edx";
981 asm pure nothrow @nogc {
982 mov EAX
, 0x8000_001e
;
987 ubyte coresPerComputeUnit
= ((b
>> 8) & 3) + 1;
988 cf
.maxCores
= cf
.maxThreads
/ coresPerComputeUnit
;
993 // Return true if the cpuid instruction is supported.
994 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
1002 version (GNU_OR_LDC
)
1004 // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
1005 asm nothrow @nogc { "
1006 pushfl # Save EFLAGS
1007 pushfl # Store EFLAGS
1008 xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
1009 popfl # Load stored EFLAGS (with ID bit inverted)
1010 pushfl # Store EFLAGS again (ID bit may or may not be inverted)
1011 popl %%eax # eax = modified EFLAGS (ID bit may or may not be inverted)
1012 xorl (%%esp), %%eax # eax = whichever bits were changed
1013 popfl # Restore original EFLAGS
1017 else version (D_InlineAsm_X86
)
1023 xor EAX
, 0x0020_0000;
1031 return (flags
& 0x0020_0000) != 0;
1035 } else { // supported X86
1037 bool hasCPUID() { return false; }
1041 datacache
[0].size
= 8;
1042 datacache
[0].associativity
= 2;
1043 datacache
[0].lineSize
= 32;
1048 // TODO: Implement this function with OS support
1051 enum :int { PPC601, PPC603, PPC603E, PPC604,
1052 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1055 // asm { mfpvr; } returns the CPU version but unfortunately it can
1056 // only be used in kernel mode. So OS support is required.
1057 int cputype = PPC603;
1059 // 601 has a 8KB combined data & code L1 cache.
1060 uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1061 ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8];
1062 uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512];
1063 uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
1065 datacache[0].size = sizes[cputype];
1066 datacache[0].associativity = ways[cputype];
1067 datacache[0].lineSize = (cputype==PPCG5)? 128 :
1068 (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1069 datacache[1].size = L2size[cputype];
1070 datacache[2].size = L3size[cputype];
1071 datacache[1].lineSize = datacache[0].lineSize;
1072 datacache[2].lineSize = datacache[0].lineSize;
1075 // TODO: Implement this function with OS support
1078 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way.
1079 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192.
1080 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way
1081 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024.
1082 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024.
1083 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way.
1087 pragma(crt_constructor
) void cpuid_initialization()
1089 auto cf
= getCpuFeatures();
1094 // it's a 386 or 486, or a Cyrix 6x86.
1095 //Probably still has an external cache.
1097 if (datacache
[0].size
==0) {
1098 // Guess same as Pentium 1.
1099 datacache
[0].size
= 8;
1100 datacache
[0].associativity
= 2;
1101 datacache
[0].lineSize
= 32;
1104 // And now fill up all the unused levels with full memory space.
1105 for (size_t i
=1; i
< datacache
.length
; ++i
) {
1106 if (datacache
[i
].size
==0) {
1107 // Set all remaining levels of cache equal to full address space.
1108 datacache
[i
].size
= size_t
.max
/1024;
1109 datacache
[i
].associativity
= 1;
1110 datacache
[i
].lineSize
= datacache
[i
-1].lineSize
;
1116 // Set the immortals
1118 _dataCaches
= datacache
;
1119 _vendor
= cast(string
)cf
.vendorID
;
1120 _processor
= cf
.processorName
;
1121 _x87onChip
= (cf
.features
&FPU_BIT
)!=0;
1122 _mmx
= (cf
.features
&MMX_BIT
)!=0;
1123 _sse
= (cf
.features
&SSE_BIT
)!=0;
1124 _sse2
= (cf
.features
&SSE2_BIT
)!=0;
1125 _sse3
= (cf
.miscfeatures
&SSE3_BIT
)!=0;
1126 _ssse3
= (cf
.miscfeatures
&SSSE3_BIT
)!=0;
1127 _sse41
= (cf
.miscfeatures
&SSE41_BIT
)!=0;
1128 _sse42
= (cf
.miscfeatures
&SSE42_BIT
)!=0;
1129 _sse4a
= (cf
.amdmiscfeatures
&SSE4A_BIT
)!=0;
1130 _aes
= (cf
.miscfeatures
&AES_BIT
)!=0;
1131 _hasPclmulqdq
= (cf
.miscfeatures
&PCLMULQDQ_BIT
)!=0;
1132 _hasRdrand
= (cf
.miscfeatures
&RDRAND_BIT
)!=0;
1134 enum avx_mask
= XF_SSE_BIT|XF_YMM_BIT
;
1135 _avx
= (cf
.xfeatures
& avx_mask
) == avx_mask
&& (cf
.miscfeatures
&AVX_BIT
)!=0;
1138 _hasVpclmulqdq
= avx
&& hasPclmulqdq
;
1139 _fma
= avx
&& (cf
.miscfeatures
&FMA_BIT
)!=0;
1140 _fp16c
= avx
&& (cf
.miscfeatures
&FP16C_BIT
)!=0;
1141 _avx2
= avx
&& (cf
.extfeatures
& AVX2_BIT
) != 0;
1142 _hle
= (cf
.extfeatures
& HLE_BIT
) != 0;
1143 _rtm
= (cf
.extfeatures
& RTM_BIT
) != 0;
1144 _avx512f
= (cf
.extfeatures
& AVX512F_BIT
) != 0;
1145 _hasRdseed
= (cf
.extfeatures
&RDSEED_BIT
)!=0;
1146 _hasSha
= (cf
.extfeatures
&SHA_BIT
)!=0;
1147 _amd3dnow
= (cf
.amdfeatures
&AMD_3DNOW_BIT
)!=0;
1148 _amd3dnowExt
= (cf
.amdfeatures
&AMD_3DNOW_EXT_BIT
)!=0;
1149 _amdMmx
= (cf
.amdfeatures
&AMD_MMX_BIT
)!=0;
1150 _hasFxsr
= (cf
.features
&FXSR_BIT
)!=0;
1151 _hasCmov
= (cf
.features
&CMOV_BIT
)!=0;
1152 _hasRdtsc
= (cf
.features
&TIMESTAMP_BIT
)!=0;
1153 _hasCmpxchg8b
= (cf
.features
&CMPXCHG8B_BIT
)!=0;
1154 _hasCmpxchg16b
= (cf
.miscfeatures
&CMPXCHG16B_BIT
)!=0;
1155 _hasSysEnterSysExit
=
1156 // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1157 // (REF: www.geoffchappell.com).
1158 (cf
.probablyIntel
&& (family
< 6 ||
(family
==6 && (model
< 3 ||
(model
==3 && stepping
<3)))))
1160 : (cf
.features
& SYSENTERSYSEXIT_BIT
)!=0;
1161 _has3dnowPrefetch
= (cf
.amdmiscfeatures
&AMD_3DNOW_PREFETCH_BIT
)!=0;
1162 _hasLahfSahf
= (cf
.amdmiscfeatures
&LAHFSAHF_BIT
)!=0;
1163 _hasPopcnt
= (cf
.miscfeatures
&POPCNT_BIT
)!=0;
1164 _hasLzcnt
= (cf
.amdmiscfeatures
&LZCNT_BIT
)!=0;
1165 _isX86_64
= (cf
.amdfeatures
&AMD64_BIT
)!=0;
1166 _isItanium
= (cf
.features
&IA64_BIT
)!=0;
1167 _hyperThreading
= cf
.maxThreads
>cf
.maxCores
;
1168 _threadsPerCPU
= cf
.maxThreads
;
1169 _coresPerCPU
= cf
.maxCores
;
1170 _preferAthlon
= cf
.probablyAMD
&& family
>=6;
1171 _preferPentium4
= cf
.probablyIntel
&& family
== 0xF;
1172 _preferPentium1
= family
< 6 ||
(family
==6 && model
< 0xF && !cf
.probablyIntel
);