8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / uts / i86pc / os / cpuid.c
blobc6cf704d6484d6c3a9b3197e841a516cc0503f6b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
35 * Copyright 2016 Joyent, Inc.
38 * Various routines to handle identification
39 * and classification of x86 processors.
42 #include <sys/types.h>
43 #include <sys/archsystm.h>
44 #include <sys/x86_archext.h>
45 #include <sys/kmem.h>
46 #include <sys/systm.h>
47 #include <sys/cmn_err.h>
48 #include <sys/sunddi.h>
49 #include <sys/sunndi.h>
50 #include <sys/cpuvar.h>
51 #include <sys/processor.h>
52 #include <sys/sysmacros.h>
53 #include <sys/pg.h>
54 #include <sys/fp.h>
55 #include <sys/controlregs.h>
56 #include <sys/bitmap.h>
57 #include <sys/auxv_386.h>
58 #include <sys/memnode.h>
59 #include <sys/pci_cfgspace.h>
60 #include <sys/comm_page.h>
61 #include <sys/tsc.h>
63 #ifdef __xpv
64 #include <sys/hypervisor.h>
65 #else
66 #include <sys/ontrap.h>
67 #endif
70 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
71 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
72 * them accordingly. For most modern processors, feature detection occurs here
73 * in pass 1.
75 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
76 * for the boot CPU and does the basic analysis that the early kernel needs.
77 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
78 * CPU.
80 * Pass 1 includes:
82 * o Determining vendor/model/family/stepping and setting x86_type and
83 * x86_vendor accordingly.
84 * o Processing the feature flags returned by the cpuid instruction while
85 * applying any workarounds or tricks for the specific processor.
86 * o Mapping the feature flags into Solaris feature bits (X86_*).
87 * o Processing extended feature flags if supported by the processor,
88 * again while applying specific processor knowledge.
89 * o Determining the CMT characteristics of the system.
91 * Pass 1 is done on non-boot CPUs during their initialization and the results
92 * are used only as a meager attempt at ensuring that all processors within the
93 * system support the same features.
95 * Pass 2 of cpuid feature analysis happens just at the beginning
96 * of startup(). It just copies in and corrects the remainder
97 * of the cpuid data we depend on: standard cpuid functions that we didn't
98 * need for pass1 feature analysis, and extended cpuid functions beyond the
99 * simple feature processing done in pass1.
101 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
102 * particular kernel memory allocation has been made available. It creates a
103 * readable brand string based on the data collected in the first two passes.
105 * Pass 4 of cpuid analysis is invoked after post_startup() when all
106 * the support infrastructure for various hardware features has been
107 * initialized. It determines which processor features will be reported
108 * to userland via the aux vector.
110 * All passes are executed on all CPUs, but only the boot CPU determines what
111 * features the kernel will use.
113 * Much of the worst junk in this file is for the support of processors
114 * that didn't really implement the cpuid instruction properly.
116 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
117 * the pass numbers. Accordingly, changes to the pass code may require changes
118 * to the accessor code.
121 uint_t x86_vendor = X86_VENDOR_IntelClone;
122 uint_t x86_type = X86_TYPE_OTHER;
123 uint_t x86_clflush_size = 0;
125 uint_t pentiumpro_bug4046376;
127 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
129 static char *x86_feature_names[NUM_X86_FEATURES] = {
130 "lgpg",
131 "tsc",
132 "msr",
133 "mtrr",
134 "pge",
135 "de",
136 "cmov",
137 "mmx",
138 "mca",
139 "pae",
140 "cv8",
141 "pat",
142 "sep",
143 "sse",
144 "sse2",
145 "htt",
146 "asysc",
147 "nx",
148 "sse3",
149 "cx16",
150 "cmp",
151 "tscp",
152 "mwait",
153 "sse4a",
154 "cpuid",
155 "ssse3",
156 "sse4_1",
157 "sse4_2",
158 "1gpg",
159 "clfsh",
160 "64",
161 "aes",
162 "pclmulqdq",
163 "xsave",
164 "avx",
165 "vmx",
166 "svm",
167 "topoext",
168 "f16c",
169 "rdrand",
170 "x2apic",
171 "avx2",
172 "bmi1",
173 "bmi2",
174 "fma",
175 "smep",
176 "smap",
177 "adx",
178 "rdseed"
181 boolean_t
182 is_x86_feature(void *featureset, uint_t feature)
184 ASSERT(feature < NUM_X86_FEATURES);
185 return (BT_TEST((ulong_t *)featureset, feature));
188 void
189 add_x86_feature(void *featureset, uint_t feature)
191 ASSERT(feature < NUM_X86_FEATURES);
192 BT_SET((ulong_t *)featureset, feature);
195 void
196 remove_x86_feature(void *featureset, uint_t feature)
198 ASSERT(feature < NUM_X86_FEATURES);
199 BT_CLEAR((ulong_t *)featureset, feature);
202 boolean_t
203 compare_x86_featureset(void *setA, void *setB)
206 * We assume that the unused bits of the bitmap are always zero.
208 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
209 return (B_TRUE);
210 } else {
211 return (B_FALSE);
215 void
216 print_x86_featureset(void *featureset)
218 uint_t i;
220 for (i = 0; i < NUM_X86_FEATURES; i++) {
221 if (is_x86_feature(featureset, i)) {
222 cmn_err(CE_CONT, "?x86_feature: %s\n",
223 x86_feature_names[i]);
228 static size_t xsave_state_size = 0;
229 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
230 boolean_t xsave_force_disable = B_FALSE;
231 extern int disable_smap;
234 * This is set to platform type we are running on.
236 static int platform_type = -1;
238 #if !defined(__xpv)
240 * Variable to patch if hypervisor platform detection needs to be
241 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
243 int enable_platform_detection = 1;
244 #endif
247 * monitor/mwait info.
249 * size_actual and buf_actual are the real address and size allocated to get
250 * proper mwait_buf alignement. buf_actual and size_actual should be passed
251 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
252 * processor cache-line alignment, but this is not guarantied in the furture.
254 struct mwait_info {
255 size_t mon_min; /* min size to avoid missed wakeups */
256 size_t mon_max; /* size to avoid false wakeups */
257 size_t size_actual; /* size actually allocated */
258 void *buf_actual; /* memory actually allocated */
259 uint32_t support; /* processor support of monitor/mwait */
263 * xsave/xrestor info.
265 * This structure contains HW feature bits and size of the xsave save area.
266 * Note: the kernel will use the maximum size required for all hardware
267 * features. It is not optimize for potential memory savings if features at
268 * the end of the save area are not enabled.
270 struct xsave_info {
271 uint32_t xsav_hw_features_low; /* Supported HW features */
272 uint32_t xsav_hw_features_high; /* Supported HW features */
273 size_t xsav_max_size; /* max size save area for HW features */
274 size_t ymm_size; /* AVX: size of ymm save area */
275 size_t ymm_offset; /* AVX: offset for ymm save area */
280 * These constants determine how many of the elements of the
281 * cpuid we cache in the cpuid_info data structure; the
282 * remaining elements are accessible via the cpuid instruction.
285 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
286 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
289 * Some terminology needs to be explained:
290 * - Socket: Something that can be plugged into a motherboard.
291 * - Package: Same as socket
292 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
293 * differently: there, chip is the same as processor node (below)
294 * - Processor node: Some AMD processors have more than one
295 * "subprocessor" embedded in a package. These subprocessors (nodes)
296 * are fully-functional processors themselves with cores, caches,
297 * memory controllers, PCI configuration spaces. They are connected
298 * inside the package with Hypertransport links. On single-node
299 * processors, processor node is equivalent to chip/socket/package.
300 * - Compute Unit: Some AMD processors pair cores in "compute units" that
301 * share the FPU and the I$ and L2 caches.
304 struct cpuid_info {
305 uint_t cpi_pass; /* last pass completed */
307 * standard function information
309 uint_t cpi_maxeax; /* fn 0: %eax */
310 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
311 uint_t cpi_vendor; /* enum of cpi_vendorstr */
313 uint_t cpi_family; /* fn 1: extended family */
314 uint_t cpi_model; /* fn 1: extended model */
315 uint_t cpi_step; /* fn 1: stepping */
316 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
317 /* AMD: package/socket # */
318 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
319 int cpi_clogid; /* fn 1: %ebx: thread # */
320 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
321 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
322 uint_t cpi_ncache; /* fn 2: number of elements */
323 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
324 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
325 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
326 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
327 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
329 * extended function information
331 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
332 char cpi_brandstr[49]; /* fn 0x8000000[234] */
333 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
334 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
335 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
337 id_t cpi_coreid; /* same coreid => strands share core */
338 int cpi_pkgcoreid; /* core number within single package */
339 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
340 /* Intel: fn 4: %eax[31-26] */
342 * supported feature information
344 uint32_t cpi_support[6];
345 #define STD_EDX_FEATURES 0
346 #define AMD_EDX_FEATURES 1
347 #define TM_EDX_FEATURES 2
348 #define STD_ECX_FEATURES 3
349 #define AMD_ECX_FEATURES 4
350 #define STD_EBX_FEATURES 5
352 * Synthesized information, where known.
354 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
355 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
356 uint32_t cpi_socket; /* Chip package/socket type */
358 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
359 uint32_t cpi_apicid;
360 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
361 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
362 /* Intel: 1 */
363 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
364 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
366 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
370 static struct cpuid_info cpuid_info0;
373 * These bit fields are defined by the Intel Application Note AP-485
374 * "Intel Processor Identification and the CPUID Instruction"
376 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
377 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
378 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
379 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
380 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
381 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
383 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
384 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
385 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
386 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
387 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
389 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
390 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
391 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
392 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
394 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
395 #define CPI_XMAXEAX_MAX 0x80000100
396 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
397 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
400 * Function 4 (Deterministic Cache Parameters) macros
401 * Defined by Intel Application Note AP-485
403 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
404 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
405 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
406 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
407 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
408 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
409 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
411 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
412 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
413 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
415 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
417 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
421 * A couple of shorthand macros to identify "later" P6-family chips
422 * like the Pentium M and Core. First, the "older" P6-based stuff
423 * (loosely defined as "pre-Pentium-4"):
424 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
426 #define IS_LEGACY_P6(cpi) ( \
427 cpi->cpi_family == 6 && \
428 (cpi->cpi_model == 1 || \
429 cpi->cpi_model == 3 || \
430 cpi->cpi_model == 5 || \
431 cpi->cpi_model == 6 || \
432 cpi->cpi_model == 7 || \
433 cpi->cpi_model == 8 || \
434 cpi->cpi_model == 0xA || \
435 cpi->cpi_model == 0xB) \
438 /* A "new F6" is everything with family 6 that's not the above */
439 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
441 /* Extended family/model support */
442 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
443 cpi->cpi_family >= 0xf)
446 * Info for monitor/mwait idle loop.
448 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
449 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
450 * 2006.
451 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
452 * Documentation Updates" #33633, Rev 2.05, December 2006.
454 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
455 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
456 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
457 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
458 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
459 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
460 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
461 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
463 * Number of sub-cstates for a given c-state.
465 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
466 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
469 * XSAVE leaf 0xD enumeration
471 #define CPUID_LEAFD_2_YMM_OFFSET 576
472 #define CPUID_LEAFD_2_YMM_SIZE 256
475 * Functions we consune from cpuid_subr.c; don't publish these in a header
476 * file to try and keep people using the expected cpuid_* interfaces.
478 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
479 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
480 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
481 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
482 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
485 * Apply up various platform-dependent restrictions where the
486 * underlying platform restrictions mean the CPU can be marked
487 * as less capable than its cpuid instruction would imply.
489 #if defined(__xpv)
490 static void
491 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
493 switch (eax) {
494 case 1: {
495 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
496 0 : CPUID_INTC_EDX_MCA;
497 cp->cp_edx &=
498 ~(mcamask |
499 CPUID_INTC_EDX_PSE |
500 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
501 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
502 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
503 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
504 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
505 break;
508 case 0x80000001:
509 cp->cp_edx &=
510 ~(CPUID_AMD_EDX_PSE |
511 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
512 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
513 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
514 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
515 CPUID_AMD_EDX_TSCP);
516 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
517 break;
518 default:
519 break;
522 switch (vendor) {
523 case X86_VENDOR_Intel:
524 switch (eax) {
525 case 4:
527 * Zero out the (ncores-per-chip - 1) field
529 cp->cp_eax &= 0x03fffffff;
530 break;
531 default:
532 break;
534 break;
535 case X86_VENDOR_AMD:
536 switch (eax) {
538 case 0x80000001:
539 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
540 break;
542 case 0x80000008:
544 * Zero out the (ncores-per-chip - 1) field
546 cp->cp_ecx &= 0xffffff00;
547 break;
548 default:
549 break;
551 break;
552 default:
553 break;
556 #else
557 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
558 #endif
561 * Some undocumented ways of patching the results of the cpuid
562 * instruction to permit running Solaris 10 on future cpus that
563 * we don't currently support. Could be set to non-zero values
564 * via settings in eeprom.
567 uint32_t cpuid_feature_ecx_include;
568 uint32_t cpuid_feature_ecx_exclude;
569 uint32_t cpuid_feature_edx_include;
570 uint32_t cpuid_feature_edx_exclude;
573 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
575 void
576 cpuid_alloc_space(cpu_t *cpu)
579 * By convention, cpu0 is the boot cpu, which is set up
580 * before memory allocation is available. All other cpus get
581 * their cpuid_info struct allocated here.
583 ASSERT(cpu->cpu_id != 0);
584 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
585 cpu->cpu_m.mcpu_cpi =
586 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
589 void
590 cpuid_free_space(cpu_t *cpu)
592 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
593 int i;
595 ASSERT(cpi != NULL);
596 ASSERT(cpi != &cpuid_info0);
599 * Free up any function 4 related dynamic storage
601 for (i = 1; i < cpi->cpi_std_4_size; i++)
602 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
603 if (cpi->cpi_std_4_size > 0)
604 kmem_free(cpi->cpi_std_4,
605 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
607 kmem_free(cpi, sizeof (*cpi));
608 cpu->cpu_m.mcpu_cpi = NULL;
611 #if !defined(__xpv)
613 * Determine the type of the underlying platform. This is used to customize
614 * initialization of various subsystems (e.g. TSC). determine_platform() must
615 * only ever be called once to prevent two processors from seeing different
616 * values of platform_type. Must be called before cpuid_pass1(), the earliest
617 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
619 void
620 determine_platform(void)
622 struct cpuid_regs cp;
623 uint32_t base;
624 uint32_t regs[4];
625 char *hvstr = (char *)regs;
627 ASSERT(platform_type == -1);
629 platform_type = HW_NATIVE;
631 if (!enable_platform_detection)
632 return;
635 * If Hypervisor CPUID bit is set, try to determine hypervisor
636 * vendor signature, and set platform type accordingly.
638 * References:
639 * http://lkml.org/lkml/2008/10/1/246
640 * http://kb.vmware.com/kb/1009458
642 cp.cp_eax = 0x1;
643 (void) __cpuid_insn(&cp);
644 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
645 cp.cp_eax = 0x40000000;
646 (void) __cpuid_insn(&cp);
647 regs[0] = cp.cp_ebx;
648 regs[1] = cp.cp_ecx;
649 regs[2] = cp.cp_edx;
650 regs[3] = 0;
651 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
652 platform_type = HW_XEN_HVM;
653 return;
655 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
656 platform_type = HW_VMWARE;
657 return;
659 if (strcmp(hvstr, HVSIG_KVM) == 0) {
660 platform_type = HW_KVM;
661 return;
663 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
664 platform_type = HW_MICROSOFT;
665 } else {
667 * Check older VMware hardware versions. VMware hypervisor is
668 * detected by performing an IN operation to VMware hypervisor
669 * port and checking that value returned in %ebx is VMware
670 * hypervisor magic value.
672 * References: http://kb.vmware.com/kb/1009458
674 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
675 if (regs[1] == VMWARE_HVMAGIC) {
676 platform_type = HW_VMWARE;
677 return;
682 * Check Xen hypervisor. In a fully virtualized domain,
683 * Xen's pseudo-cpuid function returns a string representing the
684 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
685 * supported cpuid function. We need at least a (base + 2) leaf value
686 * to do what we want to do. Try different base values, since the
687 * hypervisor might use a different one depending on whether Hyper-V
688 * emulation is switched on by default or not.
690 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
691 cp.cp_eax = base;
692 (void) __cpuid_insn(&cp);
693 regs[0] = cp.cp_ebx;
694 regs[1] = cp.cp_ecx;
695 regs[2] = cp.cp_edx;
696 regs[3] = 0;
697 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
698 cp.cp_eax >= (base + 2)) {
699 platform_type &= ~HW_NATIVE;
700 platform_type |= HW_XEN_HVM;
701 return;
707 get_hwenv(void)
709 ASSERT(platform_type != -1);
710 return (platform_type);
714 is_controldom(void)
716 return (0);
719 #else
722 get_hwenv(void)
724 return (HW_XEN_PV);
728 is_controldom(void)
730 return (DOMAIN_IS_INITDOMAIN(xen_info));
733 #endif /* __xpv */
735 static void
736 cpuid_intel_getids(cpu_t *cpu, void *feature)
738 uint_t i;
739 uint_t chipid_shift = 0;
740 uint_t coreid_shift = 0;
741 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
743 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
744 chipid_shift++;
746 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
747 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
749 if (is_x86_feature(feature, X86FSET_CMP)) {
751 * Multi-core (and possibly multi-threaded)
752 * processors.
754 uint_t ncpu_per_core;
755 if (cpi->cpi_ncore_per_chip == 1)
756 ncpu_per_core = cpi->cpi_ncpu_per_chip;
757 else if (cpi->cpi_ncore_per_chip > 1)
758 ncpu_per_core = cpi->cpi_ncpu_per_chip /
759 cpi->cpi_ncore_per_chip;
761 * 8bit APIC IDs on dual core Pentiums
762 * look like this:
764 * +-----------------------+------+------+
765 * | Physical Package ID | MC | HT |
766 * +-----------------------+------+------+
767 * <------- chipid -------->
768 * <------- coreid --------------->
769 * <--- clogid -->
770 * <------>
771 * pkgcoreid
773 * Where the number of bits necessary to
774 * represent MC and HT fields together equals
775 * to the minimum number of bits necessary to
776 * store the value of cpi->cpi_ncpu_per_chip.
777 * Of those bits, the MC part uses the number
778 * of bits necessary to store the value of
779 * cpi->cpi_ncore_per_chip.
781 for (i = 1; i < ncpu_per_core; i <<= 1)
782 coreid_shift++;
783 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
784 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
785 } else if (is_x86_feature(feature, X86FSET_HTT)) {
787 * Single-core multi-threaded processors.
789 cpi->cpi_coreid = cpi->cpi_chipid;
790 cpi->cpi_pkgcoreid = 0;
792 cpi->cpi_procnodeid = cpi->cpi_chipid;
793 cpi->cpi_compunitid = cpi->cpi_coreid;
796 static void
797 cpuid_amd_getids(cpu_t *cpu)
799 int i, first_half, coreidsz;
800 uint32_t nb_caps_reg;
801 uint_t node2_1;
802 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
803 struct cpuid_regs *cp;
806 * AMD CMP chips currently have a single thread per core.
808 * Since no two cpus share a core we must assign a distinct coreid
809 * per cpu, and we do this by using the cpu_id. This scheme does not,
810 * however, guarantee that sibling cores of a chip will have sequential
811 * coreids starting at a multiple of the number of cores per chip -
812 * that is usually the case, but if the ACPI MADT table is presented
813 * in a different order then we need to perform a few more gymnastics
814 * for the pkgcoreid.
816 * All processors in the system have the same number of enabled
817 * cores. Cores within a processor are always numbered sequentially
818 * from 0 regardless of how many or which are disabled, and there
819 * is no way for operating system to discover the real core id when some
820 * are disabled.
822 * In family 0x15, the cores come in pairs called compute units. They
823 * share I$ and L2 caches and the FPU. Enumeration of this feature is
824 * simplified by the new topology extensions CPUID leaf, indicated by
825 * the X86 feature X86FSET_TOPOEXT.
828 cpi->cpi_coreid = cpu->cpu_id;
829 cpi->cpi_compunitid = cpu->cpu_id;
831 if (cpi->cpi_xmaxeax >= 0x80000008) {
833 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
836 * In AMD parlance chip is really a node while Solaris
837 * sees chip as equivalent to socket/package.
839 cpi->cpi_ncore_per_chip =
840 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
841 if (coreidsz == 0) {
842 /* Use legacy method */
843 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
844 coreidsz++;
845 if (coreidsz == 0)
846 coreidsz = 1;
848 } else {
849 /* Assume single-core part */
850 cpi->cpi_ncore_per_chip = 1;
851 coreidsz = 1;
854 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
855 cpi->cpi_apicid & ((1<<coreidsz) - 1);
856 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
858 /* Get node ID, compute unit ID */
859 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
860 cpi->cpi_xmaxeax >= 0x8000001e) {
861 cp = &cpi->cpi_extd[0x1e];
862 cp->cp_eax = 0x8000001e;
863 (void) __cpuid_insn(cp);
865 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
866 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
867 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
868 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
869 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
870 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
871 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
872 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
873 } else if (cpi->cpi_family == 0x10) {
875 * See if we are a multi-node processor.
876 * All processors in the system have the same number of nodes
878 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
879 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
880 /* Single-node */
881 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
882 coreidsz);
883 } else {
886 * Multi-node revision D (2 nodes per package
887 * are supported)
889 cpi->cpi_procnodes_per_pkg = 2;
891 first_half = (cpi->cpi_pkgcoreid <=
892 (cpi->cpi_ncore_per_chip/2 - 1));
894 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
895 /* We are BSP */
896 cpi->cpi_procnodeid = (first_half ? 0 : 1);
897 } else {
899 /* We are AP */
900 /* NodeId[2:1] bits to use for reading F3xe8 */
901 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
903 nb_caps_reg =
904 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
907 * Check IntNodeNum bit (31:30, but bit 31 is
908 * always 0 on dual-node processors)
910 if (BITX(nb_caps_reg, 30, 30) == 0)
911 cpi->cpi_procnodeid = node2_1 +
912 !first_half;
913 else
914 cpi->cpi_procnodeid = node2_1 +
915 first_half;
918 } else {
919 cpi->cpi_procnodeid = 0;
922 cpi->cpi_chipid =
923 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
927 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
929 void
930 setup_xfem(void)
932 uint64_t flags = XFEATURE_LEGACY_FP;
934 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
936 if (is_x86_feature(x86_featureset, X86FSET_SSE))
937 flags |= XFEATURE_SSE;
939 if (is_x86_feature(x86_featureset, X86FSET_AVX))
940 flags |= XFEATURE_AVX;
942 set_xcr(XFEATURE_ENABLED_MASK, flags);
944 xsave_bv_all = flags;
947 void
948 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
950 uint32_t mask_ecx, mask_edx;
951 struct cpuid_info *cpi;
952 struct cpuid_regs *cp;
953 int xcpuid;
954 #if !defined(__xpv)
955 extern int idle_cpu_prefer_mwait;
956 #endif
959 * Space statically allocated for BSP, ensure pointer is set
961 if (cpu->cpu_id == 0) {
962 if (cpu->cpu_m.mcpu_cpi == NULL)
963 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
966 add_x86_feature(featureset, X86FSET_CPUID);
968 cpi = cpu->cpu_m.mcpu_cpi;
969 ASSERT(cpi != NULL);
970 cp = &cpi->cpi_std[0];
971 cp->cp_eax = 0;
972 cpi->cpi_maxeax = __cpuid_insn(cp);
974 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
975 *iptr++ = cp->cp_ebx;
976 *iptr++ = cp->cp_edx;
977 *iptr++ = cp->cp_ecx;
978 *(char *)&cpi->cpi_vendorstr[12] = '\0';
981 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
982 x86_vendor = cpi->cpi_vendor; /* for compatibility */
985 * Limit the range in case of weird hardware
987 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
988 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
989 if (cpi->cpi_maxeax < 1)
990 goto pass1_done;
992 cp = &cpi->cpi_std[1];
993 cp->cp_eax = 1;
994 (void) __cpuid_insn(cp);
997 * Extract identifying constants for easy access.
999 cpi->cpi_model = CPI_MODEL(cpi);
1000 cpi->cpi_family = CPI_FAMILY(cpi);
1002 if (cpi->cpi_family == 0xf)
1003 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
1006 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
1007 * Intel, and presumably everyone else, uses model == 0xf, as
1008 * one would expect (max value means possible overflow). Sigh.
1011 switch (cpi->cpi_vendor) {
1012 case X86_VENDOR_Intel:
1013 if (IS_EXTENDED_MODEL_INTEL(cpi))
1014 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1015 break;
1016 case X86_VENDOR_AMD:
1017 if (CPI_FAMILY(cpi) == 0xf)
1018 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1019 break;
1020 default:
1021 if (cpi->cpi_model == 0xf)
1022 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1023 break;
1026 cpi->cpi_step = CPI_STEP(cpi);
1027 cpi->cpi_brandid = CPI_BRANDID(cpi);
1030 * *default* assumptions:
1031 * - believe %edx feature word
1032 * - ignore %ecx feature word
1033 * - 32-bit virtual and physical addressing
1035 mask_edx = 0xffffffff;
1036 mask_ecx = 0;
1038 cpi->cpi_pabits = cpi->cpi_vabits = 32;
1040 switch (cpi->cpi_vendor) {
1041 case X86_VENDOR_Intel:
1042 if (cpi->cpi_family == 5)
1043 x86_type = X86_TYPE_P5;
1044 else if (IS_LEGACY_P6(cpi)) {
1045 x86_type = X86_TYPE_P6;
1046 pentiumpro_bug4046376 = 1;
1048 * Clear the SEP bit when it was set erroneously
1050 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
1051 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
1052 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
1053 x86_type = X86_TYPE_P4;
1055 * We don't currently depend on any of the %ecx
1056 * features until Prescott, so we'll only check
1057 * this from P4 onwards. We might want to revisit
1058 * that idea later.
1060 mask_ecx = 0xffffffff;
1061 } else if (cpi->cpi_family > 0xf)
1062 mask_ecx = 0xffffffff;
1064 * We don't support MONITOR/MWAIT if leaf 5 is not available
1065 * to obtain the monitor linesize.
1067 if (cpi->cpi_maxeax < 5)
1068 mask_ecx &= ~CPUID_INTC_ECX_MON;
1069 break;
1070 case X86_VENDOR_IntelClone:
1071 default:
1072 break;
1073 case X86_VENDOR_AMD:
1074 #if defined(OPTERON_ERRATUM_108)
1075 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1076 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1077 cpi->cpi_model = 0xc;
1078 } else
1079 #endif
1080 if (cpi->cpi_family == 5) {
1082 * AMD K5 and K6
1084 * These CPUs have an incomplete implementation
1085 * of MCA/MCE which we mask away.
1087 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1090 * Model 0 uses the wrong (APIC) bit
1091 * to indicate PGE. Fix it here.
1093 if (cpi->cpi_model == 0) {
1094 if (cp->cp_edx & 0x200) {
1095 cp->cp_edx &= ~0x200;
1096 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1101 * Early models had problems w/ MMX; disable.
1103 if (cpi->cpi_model < 6)
1104 mask_edx &= ~CPUID_INTC_EDX_MMX;
1108 * For newer families, SSE3 and CX16, at least, are valid;
1109 * enable all
1111 if (cpi->cpi_family >= 0xf)
1112 mask_ecx = 0xffffffff;
1114 * We don't support MONITOR/MWAIT if leaf 5 is not available
1115 * to obtain the monitor linesize.
1117 if (cpi->cpi_maxeax < 5)
1118 mask_ecx &= ~CPUID_INTC_ECX_MON;
1120 #if !defined(__xpv)
1122 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1123 * processors. AMD does not intend MWAIT to be used in the cpu
1124 * idle loop on current and future processors. 10h and future
1125 * AMD processors use more power in MWAIT than HLT.
1126 * Pre-family-10h Opterons do not have the MWAIT instruction.
1128 idle_cpu_prefer_mwait = 0;
1129 #endif
1131 break;
1132 case X86_VENDOR_TM:
1134 * workaround the NT workaround in CMS 4.1
1136 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1137 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1138 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1139 break;
1140 case X86_VENDOR_Centaur:
1142 * workaround the NT workarounds again
1144 if (cpi->cpi_family == 6)
1145 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1146 break;
1147 case X86_VENDOR_Cyrix:
1149 * We rely heavily on the probing in locore
1150 * to actually figure out what parts, if any,
1151 * of the Cyrix cpuid instruction to believe.
1153 switch (x86_type) {
1154 case X86_TYPE_CYRIX_486:
1155 mask_edx = 0;
1156 break;
1157 case X86_TYPE_CYRIX_6x86:
1158 mask_edx = 0;
1159 break;
1160 case X86_TYPE_CYRIX_6x86L:
1161 mask_edx =
1162 CPUID_INTC_EDX_DE |
1163 CPUID_INTC_EDX_CX8;
1164 break;
1165 case X86_TYPE_CYRIX_6x86MX:
1166 mask_edx =
1167 CPUID_INTC_EDX_DE |
1168 CPUID_INTC_EDX_MSR |
1169 CPUID_INTC_EDX_CX8 |
1170 CPUID_INTC_EDX_PGE |
1171 CPUID_INTC_EDX_CMOV |
1172 CPUID_INTC_EDX_MMX;
1173 break;
1174 case X86_TYPE_CYRIX_GXm:
1175 mask_edx =
1176 CPUID_INTC_EDX_MSR |
1177 CPUID_INTC_EDX_CX8 |
1178 CPUID_INTC_EDX_CMOV |
1179 CPUID_INTC_EDX_MMX;
1180 break;
1181 case X86_TYPE_CYRIX_MediaGX:
1182 break;
1183 case X86_TYPE_CYRIX_MII:
1184 case X86_TYPE_VIA_CYRIX_III:
1185 mask_edx =
1186 CPUID_INTC_EDX_DE |
1187 CPUID_INTC_EDX_TSC |
1188 CPUID_INTC_EDX_MSR |
1189 CPUID_INTC_EDX_CX8 |
1190 CPUID_INTC_EDX_PGE |
1191 CPUID_INTC_EDX_CMOV |
1192 CPUID_INTC_EDX_MMX;
1193 break;
1194 default:
1195 break;
1197 break;
1200 #if defined(__xpv)
1202 * Do not support MONITOR/MWAIT under a hypervisor
1204 mask_ecx &= ~CPUID_INTC_ECX_MON;
1206 * Do not support XSAVE under a hypervisor for now
1208 xsave_force_disable = B_TRUE;
1210 #endif /* __xpv */
1212 if (xsave_force_disable) {
1213 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1214 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1215 mask_ecx &= ~CPUID_INTC_ECX_F16C;
1216 mask_ecx &= ~CPUID_INTC_ECX_FMA;
1220 * Now we've figured out the masks that determine
1221 * which bits we choose to believe, apply the masks
1222 * to the feature words, then map the kernel's view
1223 * of these feature words into its feature word.
1225 cp->cp_edx &= mask_edx;
1226 cp->cp_ecx &= mask_ecx;
1229 * apply any platform restrictions (we don't call this
1230 * immediately after __cpuid_insn here, because we need the
1231 * workarounds applied above first)
1233 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1236 * In addition to ecx and edx, Intel is storing a bunch of instruction
1237 * set extensions in leaf 7's ebx.
1239 if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax >= 7) {
1240 struct cpuid_regs *ecp;
1241 ecp = &cpi->cpi_std[7];
1242 ecp->cp_eax = 7;
1243 ecp->cp_ecx = 0;
1244 (void) __cpuid_insn(ecp);
1246 * If XSAVE has been disabled, just ignore all of the AVX
1247 * dependent flags here.
1249 if (xsave_force_disable) {
1250 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
1251 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
1252 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
1255 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
1256 add_x86_feature(featureset, X86FSET_SMEP);
1259 * We check disable_smap here in addition to in startup_smap()
1260 * to ensure CPUs that aren't the boot CPU don't accidentally
1261 * include it in the feature set and thus generate a mismatched
1262 * x86 feature set across CPUs. Note that at this time we only
1263 * enable SMAP for the 64-bit kernel.
1265 #if defined(__amd64)
1266 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
1267 disable_smap == 0)
1268 add_x86_feature(featureset, X86FSET_SMAP);
1269 #endif
1270 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
1271 add_x86_feature(featureset, X86FSET_RDSEED);
1273 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
1274 add_x86_feature(featureset, X86FSET_ADX);
1278 * fold in overrides from the "eeprom" mechanism
1280 cp->cp_edx |= cpuid_feature_edx_include;
1281 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1283 cp->cp_ecx |= cpuid_feature_ecx_include;
1284 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1286 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1287 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1289 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1290 add_x86_feature(featureset, X86FSET_TSC);
1292 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1293 add_x86_feature(featureset, X86FSET_MSR);
1295 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1296 add_x86_feature(featureset, X86FSET_MTRR);
1298 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1299 add_x86_feature(featureset, X86FSET_PGE);
1301 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1302 add_x86_feature(featureset, X86FSET_CMOV);
1304 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1305 add_x86_feature(featureset, X86FSET_MMX);
1307 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1308 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1309 add_x86_feature(featureset, X86FSET_MCA);
1311 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1312 add_x86_feature(featureset, X86FSET_PAE);
1314 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1315 add_x86_feature(featureset, X86FSET_CX8);
1317 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1318 add_x86_feature(featureset, X86FSET_CX16);
1320 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1321 add_x86_feature(featureset, X86FSET_PAT);
1323 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1324 add_x86_feature(featureset, X86FSET_SEP);
1326 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1328 * In our implementation, fxsave/fxrstor
1329 * are prerequisites before we'll even
1330 * try and do SSE things.
1332 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1333 add_x86_feature(featureset, X86FSET_SSE);
1335 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1336 add_x86_feature(featureset, X86FSET_SSE2);
1338 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1339 add_x86_feature(featureset, X86FSET_SSE3);
1341 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1342 add_x86_feature(featureset, X86FSET_SSSE3);
1344 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1345 add_x86_feature(featureset, X86FSET_SSE4_1);
1347 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1348 add_x86_feature(featureset, X86FSET_SSE4_2);
1350 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1351 add_x86_feature(featureset, X86FSET_AES);
1353 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1354 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1357 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1358 add_x86_feature(featureset, X86FSET_XSAVE);
1360 /* We only test AVX when there is XSAVE */
1361 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1362 add_x86_feature(featureset,
1363 X86FSET_AVX);
1366 * Intel says we can't check these without also
1367 * checking AVX.
1369 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
1370 add_x86_feature(featureset,
1371 X86FSET_F16C);
1373 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
1374 add_x86_feature(featureset,
1375 X86FSET_FMA);
1377 if (cpi->cpi_std[7].cp_ebx &
1378 CPUID_INTC_EBX_7_0_BMI1)
1379 add_x86_feature(featureset,
1380 X86FSET_BMI1);
1382 if (cpi->cpi_std[7].cp_ebx &
1383 CPUID_INTC_EBX_7_0_BMI2)
1384 add_x86_feature(featureset,
1385 X86FSET_BMI2);
1387 if (cpi->cpi_std[7].cp_ebx &
1388 CPUID_INTC_EBX_7_0_AVX2)
1389 add_x86_feature(featureset,
1390 X86FSET_AVX2);
1394 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
1395 add_x86_feature(featureset, X86FSET_X2APIC);
1397 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1398 add_x86_feature(featureset, X86FSET_DE);
1400 #if !defined(__xpv)
1401 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1404 * We require the CLFLUSH instruction for erratum workaround
1405 * to use MONITOR/MWAIT.
1407 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1408 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1409 add_x86_feature(featureset, X86FSET_MWAIT);
1410 } else {
1411 extern int idle_cpu_assert_cflush_monitor;
1414 * All processors we are aware of which have
1415 * MONITOR/MWAIT also have CLFLUSH.
1417 if (idle_cpu_assert_cflush_monitor) {
1418 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1419 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1423 #endif /* __xpv */
1425 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1426 add_x86_feature(featureset, X86FSET_VMX);
1429 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
1430 add_x86_feature(featureset, X86FSET_RDRAND);
1433 * Only need it first time, rest of the cpus would follow suit.
1434 * we only capture this for the bootcpu.
1436 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1437 add_x86_feature(featureset, X86FSET_CLFSH);
1438 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1440 if (is_x86_feature(featureset, X86FSET_PAE))
1441 cpi->cpi_pabits = 36;
1444 * Hyperthreading configuration is slightly tricky on Intel
1445 * and pure clones, and even trickier on AMD.
1447 * (AMD chose to set the HTT bit on their CMP processors,
1448 * even though they're not actually hyperthreaded. Thus it
1449 * takes a bit more work to figure out what's really going
1450 * on ... see the handling of the CMP_LGCY bit below)
1452 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1453 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1454 if (cpi->cpi_ncpu_per_chip > 1)
1455 add_x86_feature(featureset, X86FSET_HTT);
1456 } else {
1457 cpi->cpi_ncpu_per_chip = 1;
1461 * Work on the "extended" feature information, doing
1462 * some basic initialization for cpuid_pass2()
1464 xcpuid = 0;
1465 switch (cpi->cpi_vendor) {
1466 case X86_VENDOR_Intel:
1468 * On KVM we know we will have proper support for extended
1469 * cpuid.
1471 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
1472 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
1473 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
1474 xcpuid++;
1475 break;
1476 case X86_VENDOR_AMD:
1477 if (cpi->cpi_family > 5 ||
1478 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1479 xcpuid++;
1480 break;
1481 case X86_VENDOR_Cyrix:
1483 * Only these Cyrix CPUs are -known- to support
1484 * extended cpuid operations.
1486 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1487 x86_type == X86_TYPE_CYRIX_GXm)
1488 xcpuid++;
1489 break;
1490 case X86_VENDOR_Centaur:
1491 case X86_VENDOR_TM:
1492 default:
1493 xcpuid++;
1494 break;
1497 if (xcpuid) {
1498 cp = &cpi->cpi_extd[0];
1499 cp->cp_eax = 0x80000000;
1500 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1503 if (cpi->cpi_xmaxeax & 0x80000000) {
1505 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1506 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1508 switch (cpi->cpi_vendor) {
1509 case X86_VENDOR_Intel:
1510 case X86_VENDOR_AMD:
1511 if (cpi->cpi_xmaxeax < 0x80000001)
1512 break;
1513 cp = &cpi->cpi_extd[1];
1514 cp->cp_eax = 0x80000001;
1515 (void) __cpuid_insn(cp);
1517 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1518 cpi->cpi_family == 5 &&
1519 cpi->cpi_model == 6 &&
1520 cpi->cpi_step == 6) {
1522 * K6 model 6 uses bit 10 to indicate SYSC
1523 * Later models use bit 11. Fix it here.
1525 if (cp->cp_edx & 0x400) {
1526 cp->cp_edx &= ~0x400;
1527 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1531 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1534 * Compute the additions to the kernel's feature word.
1536 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1537 add_x86_feature(featureset, X86FSET_NX);
1541 * Regardless whether or not we boot 64-bit,
1542 * we should have a way to identify whether
1543 * the CPU is capable of running 64-bit.
1545 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1546 add_x86_feature(featureset, X86FSET_64);
1549 #if defined(__amd64)
1550 /* 1 GB large page - enable only for 64 bit kernel */
1551 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1552 add_x86_feature(featureset, X86FSET_1GPG);
1554 #endif
1556 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1557 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1558 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1559 add_x86_feature(featureset, X86FSET_SSE4A);
1563 * If both the HTT and CMP_LGCY bits are set,
1564 * then we're not actually HyperThreaded. Read
1565 * "AMD CPUID Specification" for more details.
1567 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1568 is_x86_feature(featureset, X86FSET_HTT) &&
1569 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1570 remove_x86_feature(featureset, X86FSET_HTT);
1571 add_x86_feature(featureset, X86FSET_CMP);
1573 #if defined(__amd64)
1575 * It's really tricky to support syscall/sysret in
1576 * the i386 kernel; we rely on sysenter/sysexit
1577 * instead. In the amd64 kernel, things are -way-
1578 * better.
1580 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1581 add_x86_feature(featureset, X86FSET_ASYSC);
1585 * While we're thinking about system calls, note
1586 * that AMD processors don't support sysenter
1587 * in long mode at all, so don't try to program them.
1589 if (x86_vendor == X86_VENDOR_AMD) {
1590 remove_x86_feature(featureset, X86FSET_SEP);
1592 #endif
1593 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1594 add_x86_feature(featureset, X86FSET_TSCP);
1597 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1598 add_x86_feature(featureset, X86FSET_SVM);
1601 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1602 add_x86_feature(featureset, X86FSET_TOPOEXT);
1604 break;
1605 default:
1606 break;
1610 * Get CPUID data about processor cores and hyperthreads.
1612 switch (cpi->cpi_vendor) {
1613 case X86_VENDOR_Intel:
1614 if (cpi->cpi_maxeax >= 4) {
1615 cp = &cpi->cpi_std[4];
1616 cp->cp_eax = 4;
1617 cp->cp_ecx = 0;
1618 (void) __cpuid_insn(cp);
1619 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1621 /*FALLTHROUGH*/
1622 case X86_VENDOR_AMD:
1623 if (cpi->cpi_xmaxeax < 0x80000008)
1624 break;
1625 cp = &cpi->cpi_extd[8];
1626 cp->cp_eax = 0x80000008;
1627 (void) __cpuid_insn(cp);
1628 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1631 * Virtual and physical address limits from
1632 * cpuid override previously guessed values.
1634 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1635 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1636 break;
1637 default:
1638 break;
1642 * Derive the number of cores per chip
1644 switch (cpi->cpi_vendor) {
1645 case X86_VENDOR_Intel:
1646 if (cpi->cpi_maxeax < 4) {
1647 cpi->cpi_ncore_per_chip = 1;
1648 break;
1649 } else {
1650 cpi->cpi_ncore_per_chip =
1651 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1653 break;
1654 case X86_VENDOR_AMD:
1655 if (cpi->cpi_xmaxeax < 0x80000008) {
1656 cpi->cpi_ncore_per_chip = 1;
1657 break;
1658 } else {
1660 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1661 * 1 less than the number of physical cores on
1662 * the chip. In family 0x10 this value can
1663 * be affected by "downcoring" - it reflects
1664 * 1 less than the number of cores actually
1665 * enabled on this node.
1667 cpi->cpi_ncore_per_chip =
1668 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1670 break;
1671 default:
1672 cpi->cpi_ncore_per_chip = 1;
1673 break;
1677 * Get CPUID data about TSC Invariance in Deep C-State.
1679 switch (cpi->cpi_vendor) {
1680 case X86_VENDOR_Intel:
1681 if (cpi->cpi_maxeax >= 7) {
1682 cp = &cpi->cpi_extd[7];
1683 cp->cp_eax = 0x80000007;
1684 cp->cp_ecx = 0;
1685 (void) __cpuid_insn(cp);
1687 break;
1688 default:
1689 break;
1691 } else {
1692 cpi->cpi_ncore_per_chip = 1;
1696 * If more than one core, then this processor is CMP.
1698 if (cpi->cpi_ncore_per_chip > 1) {
1699 add_x86_feature(featureset, X86FSET_CMP);
1703 * If the number of cores is the same as the number
1704 * of CPUs, then we cannot have HyperThreading.
1706 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1707 remove_x86_feature(featureset, X86FSET_HTT);
1710 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1711 cpi->cpi_procnodes_per_pkg = 1;
1712 cpi->cpi_cores_per_compunit = 1;
1713 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1714 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1716 * Single-core single-threaded processors.
1718 cpi->cpi_chipid = -1;
1719 cpi->cpi_clogid = 0;
1720 cpi->cpi_coreid = cpu->cpu_id;
1721 cpi->cpi_pkgcoreid = 0;
1722 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1723 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1724 else
1725 cpi->cpi_procnodeid = cpi->cpi_chipid;
1726 } else if (cpi->cpi_ncpu_per_chip > 1) {
1727 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1728 cpuid_intel_getids(cpu, featureset);
1729 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1730 cpuid_amd_getids(cpu);
1731 else {
1733 * All other processors are currently
1734 * assumed to have single cores.
1736 cpi->cpi_coreid = cpi->cpi_chipid;
1737 cpi->cpi_pkgcoreid = 0;
1738 cpi->cpi_procnodeid = cpi->cpi_chipid;
1739 cpi->cpi_compunitid = cpi->cpi_chipid;
1744 * Synthesize chip "revision" and socket type
1746 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1747 cpi->cpi_model, cpi->cpi_step);
1748 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1749 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1750 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1751 cpi->cpi_model, cpi->cpi_step);
1753 pass1_done:
1754 cpi->cpi_pass = 1;
1758 * Make copies of the cpuid table entries we depend on, in
1759 * part for ease of parsing now, in part so that we have only
1760 * one place to correct any of it, in part for ease of
1761 * later export to userland, and in part so we can look at
1762 * this stuff in a crash dump.
1765 /*ARGSUSED*/
1766 void
1767 cpuid_pass2(cpu_t *cpu)
1769 uint_t n, nmax;
1770 int i;
1771 struct cpuid_regs *cp;
1772 uint8_t *dp;
1773 uint32_t *iptr;
1774 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1776 ASSERT(cpi->cpi_pass == 1);
1778 if (cpi->cpi_maxeax < 1)
1779 goto pass2_done;
1781 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1782 nmax = NMAX_CPI_STD;
1784 * (We already handled n == 0 and n == 1 in pass 1)
1786 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1787 cp->cp_eax = n;
1790 * CPUID function 4 expects %ecx to be initialized
1791 * with an index which indicates which cache to return
1792 * information about. The OS is expected to call function 4
1793 * with %ecx set to 0, 1, 2, ... until it returns with
1794 * EAX[4:0] set to 0, which indicates there are no more
1795 * caches.
1797 * Here, populate cpi_std[4] with the information returned by
1798 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1799 * when dynamic memory allocation becomes available.
1801 * Note: we need to explicitly initialize %ecx here, since
1802 * function 4 may have been previously invoked.
1804 * The same is all true for CPUID function 7.
1806 if (n == 4 || n == 7)
1807 cp->cp_ecx = 0;
1809 (void) __cpuid_insn(cp);
1810 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1811 switch (n) {
1812 case 2:
1814 * "the lower 8 bits of the %eax register
1815 * contain a value that identifies the number
1816 * of times the cpuid [instruction] has to be
1817 * executed to obtain a complete image of the
1818 * processor's caching systems."
1820 * How *do* they make this stuff up?
1822 cpi->cpi_ncache = sizeof (*cp) *
1823 BITX(cp->cp_eax, 7, 0);
1824 if (cpi->cpi_ncache == 0)
1825 break;
1826 cpi->cpi_ncache--; /* skip count byte */
1829 * Well, for now, rather than attempt to implement
1830 * this slightly dubious algorithm, we just look
1831 * at the first 15 ..
1833 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1834 cpi->cpi_ncache = sizeof (*cp) - 1;
1836 dp = cpi->cpi_cacheinfo;
1837 if (BITX(cp->cp_eax, 31, 31) == 0) {
1838 uint8_t *p = (void *)&cp->cp_eax;
1839 for (i = 1; i < 4; i++)
1840 if (p[i] != 0)
1841 *dp++ = p[i];
1843 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1844 uint8_t *p = (void *)&cp->cp_ebx;
1845 for (i = 0; i < 4; i++)
1846 if (p[i] != 0)
1847 *dp++ = p[i];
1849 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1850 uint8_t *p = (void *)&cp->cp_ecx;
1851 for (i = 0; i < 4; i++)
1852 if (p[i] != 0)
1853 *dp++ = p[i];
1855 if (BITX(cp->cp_edx, 31, 31) == 0) {
1856 uint8_t *p = (void *)&cp->cp_edx;
1857 for (i = 0; i < 4; i++)
1858 if (p[i] != 0)
1859 *dp++ = p[i];
1861 break;
1863 case 3: /* Processor serial number, if PSN supported */
1864 break;
1866 case 4: /* Deterministic cache parameters */
1867 break;
1869 case 5: /* Monitor/Mwait parameters */
1871 size_t mwait_size;
1874 * check cpi_mwait.support which was set in cpuid_pass1
1876 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1877 break;
1880 * Protect ourself from insane mwait line size.
1881 * Workaround for incomplete hardware emulator(s).
1883 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1884 if (mwait_size < sizeof (uint32_t) ||
1885 !ISP2(mwait_size)) {
1886 #if DEBUG
1887 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1888 "size %ld", cpu->cpu_id, (long)mwait_size);
1889 #endif
1890 break;
1893 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1894 cpi->cpi_mwait.mon_max = mwait_size;
1895 if (MWAIT_EXTENSION(cpi)) {
1896 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1897 if (MWAIT_INT_ENABLE(cpi))
1898 cpi->cpi_mwait.support |=
1899 MWAIT_ECX_INT_ENABLE;
1901 break;
1903 default:
1904 break;
1908 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1909 struct cpuid_regs regs;
1911 cp = &regs;
1912 cp->cp_eax = 0xB;
1913 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1915 (void) __cpuid_insn(cp);
1918 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1919 * indicates that the extended topology enumeration leaf is
1920 * available.
1922 if (cp->cp_ebx) {
1923 uint32_t x2apic_id;
1924 uint_t coreid_shift = 0;
1925 uint_t ncpu_per_core = 1;
1926 uint_t chipid_shift = 0;
1927 uint_t ncpu_per_chip = 1;
1928 uint_t i;
1929 uint_t level;
1931 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1932 cp->cp_eax = 0xB;
1933 cp->cp_ecx = i;
1935 (void) __cpuid_insn(cp);
1936 level = CPI_CPU_LEVEL_TYPE(cp);
1938 if (level == 1) {
1939 x2apic_id = cp->cp_edx;
1940 coreid_shift = BITX(cp->cp_eax, 4, 0);
1941 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1942 } else if (level == 2) {
1943 x2apic_id = cp->cp_edx;
1944 chipid_shift = BITX(cp->cp_eax, 4, 0);
1945 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1949 cpi->cpi_apicid = x2apic_id;
1950 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1951 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1952 ncpu_per_core;
1953 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1954 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1955 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1956 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1959 /* Make cp NULL so that we don't stumble on others */
1960 cp = NULL;
1964 * XSAVE enumeration
1966 if (cpi->cpi_maxeax >= 0xD) {
1967 struct cpuid_regs regs;
1968 boolean_t cpuid_d_valid = B_TRUE;
1970 cp = &regs;
1971 cp->cp_eax = 0xD;
1972 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1974 (void) __cpuid_insn(cp);
1977 * Sanity checks for debug
1979 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1980 (cp->cp_eax & XFEATURE_SSE) == 0) {
1981 cpuid_d_valid = B_FALSE;
1984 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1985 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1986 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1989 * If the hw supports AVX, get the size and offset in the save
1990 * area for the ymm state.
1992 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1993 cp->cp_eax = 0xD;
1994 cp->cp_ecx = 2;
1995 cp->cp_edx = cp->cp_ebx = 0;
1997 (void) __cpuid_insn(cp);
1999 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
2000 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
2001 cpuid_d_valid = B_FALSE;
2004 cpi->cpi_xsave.ymm_size = cp->cp_eax;
2005 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
2008 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
2009 xsave_state_size = 0;
2010 } else if (cpuid_d_valid) {
2011 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
2012 } else {
2013 /* Broken CPUID 0xD, probably in HVM */
2014 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
2015 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
2016 ", ymm_size = %d, ymm_offset = %d\n",
2017 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
2018 cpi->cpi_xsave.xsav_hw_features_high,
2019 (int)cpi->cpi_xsave.xsav_max_size,
2020 (int)cpi->cpi_xsave.ymm_size,
2021 (int)cpi->cpi_xsave.ymm_offset);
2023 if (xsave_state_size != 0) {
2025 * This must be a non-boot CPU. We cannot
2026 * continue, because boot cpu has already
2027 * enabled XSAVE.
2029 ASSERT(cpu->cpu_id != 0);
2030 cmn_err(CE_PANIC, "cpu%d: we have already "
2031 "enabled XSAVE on boot cpu, cannot "
2032 "continue.", cpu->cpu_id);
2033 } else {
2035 * If we reached here on the boot CPU, it's also
2036 * almost certain that we'll reach here on the
2037 * non-boot CPUs. When we're here on a boot CPU
2038 * we should disable the feature, on a non-boot
2039 * CPU we need to confirm that we have.
2041 if (cpu->cpu_id == 0) {
2042 remove_x86_feature(x86_featureset,
2043 X86FSET_XSAVE);
2044 remove_x86_feature(x86_featureset,
2045 X86FSET_AVX);
2046 remove_x86_feature(x86_featureset,
2047 X86FSET_F16C);
2048 remove_x86_feature(x86_featureset,
2049 X86FSET_BMI1);
2050 remove_x86_feature(x86_featureset,
2051 X86FSET_BMI2);
2052 remove_x86_feature(x86_featureset,
2053 X86FSET_FMA);
2054 remove_x86_feature(x86_featureset,
2055 X86FSET_AVX2);
2056 CPI_FEATURES_ECX(cpi) &=
2057 ~CPUID_INTC_ECX_XSAVE;
2058 CPI_FEATURES_ECX(cpi) &=
2059 ~CPUID_INTC_ECX_AVX;
2060 CPI_FEATURES_ECX(cpi) &=
2061 ~CPUID_INTC_ECX_F16C;
2062 CPI_FEATURES_ECX(cpi) &=
2063 ~CPUID_INTC_ECX_FMA;
2064 CPI_FEATURES_7_0_EBX(cpi) &=
2065 ~CPUID_INTC_EBX_7_0_BMI1;
2066 CPI_FEATURES_7_0_EBX(cpi) &=
2067 ~CPUID_INTC_EBX_7_0_BMI2;
2068 CPI_FEATURES_7_0_EBX(cpi) &=
2069 ~CPUID_INTC_EBX_7_0_AVX2;
2070 xsave_force_disable = B_TRUE;
2071 } else {
2072 VERIFY(is_x86_feature(x86_featureset,
2073 X86FSET_XSAVE) == B_FALSE);
2080 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
2081 goto pass2_done;
2083 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
2084 nmax = NMAX_CPI_EXTD;
2086 * Copy the extended properties, fixing them as we go.
2087 * (We already handled n == 0 and n == 1 in pass 1)
2089 iptr = (void *)cpi->cpi_brandstr;
2090 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
2091 cp->cp_eax = 0x80000000 + n;
2092 (void) __cpuid_insn(cp);
2093 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
2094 switch (n) {
2095 case 2:
2096 case 3:
2097 case 4:
2099 * Extract the brand string
2101 *iptr++ = cp->cp_eax;
2102 *iptr++ = cp->cp_ebx;
2103 *iptr++ = cp->cp_ecx;
2104 *iptr++ = cp->cp_edx;
2105 break;
2106 case 5:
2107 switch (cpi->cpi_vendor) {
2108 case X86_VENDOR_AMD:
2110 * The Athlon and Duron were the first
2111 * parts to report the sizes of the
2112 * TLB for large pages. Before then,
2113 * we don't trust the data.
2115 if (cpi->cpi_family < 6 ||
2116 (cpi->cpi_family == 6 &&
2117 cpi->cpi_model < 1))
2118 cp->cp_eax = 0;
2119 break;
2120 default:
2121 break;
2123 break;
2124 case 6:
2125 switch (cpi->cpi_vendor) {
2126 case X86_VENDOR_AMD:
2128 * The Athlon and Duron were the first
2129 * AMD parts with L2 TLB's.
2130 * Before then, don't trust the data.
2132 if (cpi->cpi_family < 6 ||
2133 cpi->cpi_family == 6 &&
2134 cpi->cpi_model < 1)
2135 cp->cp_eax = cp->cp_ebx = 0;
2137 * AMD Duron rev A0 reports L2
2138 * cache size incorrectly as 1K
2139 * when it is really 64K
2141 if (cpi->cpi_family == 6 &&
2142 cpi->cpi_model == 3 &&
2143 cpi->cpi_step == 0) {
2144 cp->cp_ecx &= 0xffff;
2145 cp->cp_ecx |= 0x400000;
2147 break;
2148 case X86_VENDOR_Cyrix: /* VIA C3 */
2150 * VIA C3 processors are a bit messed
2151 * up w.r.t. encoding cache sizes in %ecx
2153 if (cpi->cpi_family != 6)
2154 break;
2156 * model 7 and 8 were incorrectly encoded
2158 * xxx is model 8 really broken?
2160 if (cpi->cpi_model == 7 ||
2161 cpi->cpi_model == 8)
2162 cp->cp_ecx =
2163 BITX(cp->cp_ecx, 31, 24) << 16 |
2164 BITX(cp->cp_ecx, 23, 16) << 12 |
2165 BITX(cp->cp_ecx, 15, 8) << 8 |
2166 BITX(cp->cp_ecx, 7, 0);
2168 * model 9 stepping 1 has wrong associativity
2170 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
2171 cp->cp_ecx |= 8 << 12;
2172 break;
2173 case X86_VENDOR_Intel:
2175 * Extended L2 Cache features function.
2176 * First appeared on Prescott.
2178 default:
2179 break;
2181 break;
2182 default:
2183 break;
2187 pass2_done:
2188 cpi->cpi_pass = 2;
2191 static const char *
2192 intel_cpubrand(const struct cpuid_info *cpi)
2194 int i;
2196 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2197 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2198 return ("i486");
2200 switch (cpi->cpi_family) {
2201 case 5:
2202 return ("Intel Pentium(r)");
2203 case 6:
2204 switch (cpi->cpi_model) {
2205 uint_t celeron, xeon;
2206 const struct cpuid_regs *cp;
2207 case 0:
2208 case 1:
2209 case 2:
2210 return ("Intel Pentium(r) Pro");
2211 case 3:
2212 case 4:
2213 return ("Intel Pentium(r) II");
2214 case 6:
2215 return ("Intel Celeron(r)");
2216 case 5:
2217 case 7:
2218 celeron = xeon = 0;
2219 cp = &cpi->cpi_std[2]; /* cache info */
2221 for (i = 1; i < 4; i++) {
2222 uint_t tmp;
2224 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2225 if (tmp == 0x40)
2226 celeron++;
2227 if (tmp >= 0x44 && tmp <= 0x45)
2228 xeon++;
2231 for (i = 0; i < 2; i++) {
2232 uint_t tmp;
2234 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2235 if (tmp == 0x40)
2236 celeron++;
2237 else if (tmp >= 0x44 && tmp <= 0x45)
2238 xeon++;
2241 for (i = 0; i < 4; i++) {
2242 uint_t tmp;
2244 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2245 if (tmp == 0x40)
2246 celeron++;
2247 else if (tmp >= 0x44 && tmp <= 0x45)
2248 xeon++;
2251 for (i = 0; i < 4; i++) {
2252 uint_t tmp;
2254 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2255 if (tmp == 0x40)
2256 celeron++;
2257 else if (tmp >= 0x44 && tmp <= 0x45)
2258 xeon++;
2261 if (celeron)
2262 return ("Intel Celeron(r)");
2263 if (xeon)
2264 return (cpi->cpi_model == 5 ?
2265 "Intel Pentium(r) II Xeon(tm)" :
2266 "Intel Pentium(r) III Xeon(tm)");
2267 return (cpi->cpi_model == 5 ?
2268 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2269 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2270 default:
2271 break;
2273 default:
2274 break;
2277 /* BrandID is present if the field is nonzero */
2278 if (cpi->cpi_brandid != 0) {
2279 static const struct {
2280 uint_t bt_bid;
2281 const char *bt_str;
2282 } brand_tbl[] = {
2283 { 0x1, "Intel(r) Celeron(r)" },
2284 { 0x2, "Intel(r) Pentium(r) III" },
2285 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2286 { 0x4, "Intel(r) Pentium(r) III" },
2287 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2288 { 0x7, "Mobile Intel(r) Celeron(r)" },
2289 { 0x8, "Intel(r) Pentium(r) 4" },
2290 { 0x9, "Intel(r) Pentium(r) 4" },
2291 { 0xa, "Intel(r) Celeron(r)" },
2292 { 0xb, "Intel(r) Xeon(tm)" },
2293 { 0xc, "Intel(r) Xeon(tm) MP" },
2294 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2295 { 0xf, "Mobile Intel(r) Celeron(r)" },
2296 { 0x11, "Mobile Genuine Intel(r)" },
2297 { 0x12, "Intel(r) Celeron(r) M" },
2298 { 0x13, "Mobile Intel(r) Celeron(r)" },
2299 { 0x14, "Intel(r) Celeron(r)" },
2300 { 0x15, "Mobile Genuine Intel(r)" },
2301 { 0x16, "Intel(r) Pentium(r) M" },
2302 { 0x17, "Mobile Intel(r) Celeron(r)" }
2304 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2305 uint_t sgn;
2307 sgn = (cpi->cpi_family << 8) |
2308 (cpi->cpi_model << 4) | cpi->cpi_step;
2310 for (i = 0; i < btblmax; i++)
2311 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2312 break;
2313 if (i < btblmax) {
2314 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2315 return ("Intel(r) Celeron(r)");
2316 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2317 return ("Intel(r) Xeon(tm) MP");
2318 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2319 return ("Intel(r) Xeon(tm)");
2320 return (brand_tbl[i].bt_str);
2324 return (NULL);
2327 static const char *
2328 amd_cpubrand(const struct cpuid_info *cpi)
2330 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2331 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2332 return ("i486 compatible");
2334 switch (cpi->cpi_family) {
2335 case 5:
2336 switch (cpi->cpi_model) {
2337 case 0:
2338 case 1:
2339 case 2:
2340 case 3:
2341 case 4:
2342 case 5:
2343 return ("AMD-K5(r)");
2344 case 6:
2345 case 7:
2346 return ("AMD-K6(r)");
2347 case 8:
2348 return ("AMD-K6(r)-2");
2349 case 9:
2350 return ("AMD-K6(r)-III");
2351 default:
2352 return ("AMD (family 5)");
2354 case 6:
2355 switch (cpi->cpi_model) {
2356 case 1:
2357 return ("AMD-K7(tm)");
2358 case 0:
2359 case 2:
2360 case 4:
2361 return ("AMD Athlon(tm)");
2362 case 3:
2363 case 7:
2364 return ("AMD Duron(tm)");
2365 case 6:
2366 case 8:
2367 case 10:
2369 * Use the L2 cache size to distinguish
2371 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2372 "AMD Athlon(tm)" : "AMD Duron(tm)");
2373 default:
2374 return ("AMD (family 6)");
2376 default:
2377 break;
2380 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2381 cpi->cpi_brandid != 0) {
2382 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2383 case 3:
2384 return ("AMD Opteron(tm) UP 1xx");
2385 case 4:
2386 return ("AMD Opteron(tm) DP 2xx");
2387 case 5:
2388 return ("AMD Opteron(tm) MP 8xx");
2389 default:
2390 return ("AMD Opteron(tm)");
2394 return (NULL);
2397 static const char *
2398 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2400 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2401 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2402 type == X86_TYPE_CYRIX_486)
2403 return ("i486 compatible");
2405 switch (type) {
2406 case X86_TYPE_CYRIX_6x86:
2407 return ("Cyrix 6x86");
2408 case X86_TYPE_CYRIX_6x86L:
2409 return ("Cyrix 6x86L");
2410 case X86_TYPE_CYRIX_6x86MX:
2411 return ("Cyrix 6x86MX");
2412 case X86_TYPE_CYRIX_GXm:
2413 return ("Cyrix GXm");
2414 case X86_TYPE_CYRIX_MediaGX:
2415 return ("Cyrix MediaGX");
2416 case X86_TYPE_CYRIX_MII:
2417 return ("Cyrix M2");
2418 case X86_TYPE_VIA_CYRIX_III:
2419 return ("VIA Cyrix M3");
2420 default:
2422 * Have another wild guess ..
2424 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2425 return ("Cyrix 5x86");
2426 else if (cpi->cpi_family == 5) {
2427 switch (cpi->cpi_model) {
2428 case 2:
2429 return ("Cyrix 6x86"); /* Cyrix M1 */
2430 case 4:
2431 return ("Cyrix MediaGX");
2432 default:
2433 break;
2435 } else if (cpi->cpi_family == 6) {
2436 switch (cpi->cpi_model) {
2437 case 0:
2438 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2439 case 5:
2440 case 6:
2441 case 7:
2442 case 8:
2443 case 9:
2444 return ("VIA C3");
2445 default:
2446 break;
2449 break;
2451 return (NULL);
2455 * This only gets called in the case that the CPU extended
2456 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2457 * aren't available, or contain null bytes for some reason.
2459 static void
2460 fabricate_brandstr(struct cpuid_info *cpi)
2462 const char *brand = NULL;
2464 switch (cpi->cpi_vendor) {
2465 case X86_VENDOR_Intel:
2466 brand = intel_cpubrand(cpi);
2467 break;
2468 case X86_VENDOR_AMD:
2469 brand = amd_cpubrand(cpi);
2470 break;
2471 case X86_VENDOR_Cyrix:
2472 brand = cyrix_cpubrand(cpi, x86_type);
2473 break;
2474 case X86_VENDOR_NexGen:
2475 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2476 brand = "NexGen Nx586";
2477 break;
2478 case X86_VENDOR_Centaur:
2479 if (cpi->cpi_family == 5)
2480 switch (cpi->cpi_model) {
2481 case 4:
2482 brand = "Centaur C6";
2483 break;
2484 case 8:
2485 brand = "Centaur C2";
2486 break;
2487 case 9:
2488 brand = "Centaur C3";
2489 break;
2490 default:
2491 break;
2493 break;
2494 case X86_VENDOR_Rise:
2495 if (cpi->cpi_family == 5 &&
2496 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2497 brand = "Rise mP6";
2498 break;
2499 case X86_VENDOR_SiS:
2500 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2501 brand = "SiS 55x";
2502 break;
2503 case X86_VENDOR_TM:
2504 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2505 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2506 break;
2507 case X86_VENDOR_NSC:
2508 case X86_VENDOR_UMC:
2509 default:
2510 break;
2512 if (brand) {
2513 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2514 return;
2518 * If all else fails ...
2520 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2521 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2522 cpi->cpi_model, cpi->cpi_step);
2526 * This routine is called just after kernel memory allocation
2527 * becomes available on cpu0, and as part of mp_startup() on
2528 * the other cpus.
2530 * Fixup the brand string, and collect any information from cpuid
2531 * that requires dynamically allocated storage to represent.
2533 /*ARGSUSED*/
2534 void
2535 cpuid_pass3(cpu_t *cpu)
2537 int i, max, shft, level, size;
2538 struct cpuid_regs regs;
2539 struct cpuid_regs *cp;
2540 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2542 ASSERT(cpi->cpi_pass == 2);
2545 * Function 4: Deterministic cache parameters
2547 * Take this opportunity to detect the number of threads
2548 * sharing the last level cache, and construct a corresponding
2549 * cache id. The respective cpuid_info members are initialized
2550 * to the default case of "no last level cache sharing".
2552 cpi->cpi_ncpu_shr_last_cache = 1;
2553 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2555 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2558 * Find the # of elements (size) returned by fn 4, and along
2559 * the way detect last level cache sharing details.
2561 bzero(&regs, sizeof (regs));
2562 cp = &regs;
2563 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2564 cp->cp_eax = 4;
2565 cp->cp_ecx = i;
2567 (void) __cpuid_insn(cp);
2569 if (CPI_CACHE_TYPE(cp) == 0)
2570 break;
2571 level = CPI_CACHE_LVL(cp);
2572 if (level > max) {
2573 max = level;
2574 cpi->cpi_ncpu_shr_last_cache =
2575 CPI_NTHR_SHR_CACHE(cp) + 1;
2578 cpi->cpi_std_4_size = size = i;
2581 * Allocate the cpi_std_4 array. The first element
2582 * references the regs for fn 4, %ecx == 0, which
2583 * cpuid_pass2() stashed in cpi->cpi_std[4].
2585 if (size > 0) {
2586 cpi->cpi_std_4 =
2587 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2588 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2591 * Allocate storage to hold the additional regs
2592 * for function 4, %ecx == 1 .. cpi_std_4_size.
2594 * The regs for fn 4, %ecx == 0 has already
2595 * been allocated as indicated above.
2597 for (i = 1; i < size; i++) {
2598 cp = cpi->cpi_std_4[i] =
2599 kmem_zalloc(sizeof (regs), KM_SLEEP);
2600 cp->cp_eax = 4;
2601 cp->cp_ecx = i;
2603 (void) __cpuid_insn(cp);
2607 * Determine the number of bits needed to represent
2608 * the number of CPUs sharing the last level cache.
2610 * Shift off that number of bits from the APIC id to
2611 * derive the cache id.
2613 shft = 0;
2614 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2615 shft++;
2616 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2620 * Now fixup the brand string
2622 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2623 fabricate_brandstr(cpi);
2624 } else {
2627 * If we successfully extracted a brand string from the cpuid
2628 * instruction, clean it up by removing leading spaces and
2629 * similar junk.
2631 if (cpi->cpi_brandstr[0]) {
2632 size_t maxlen = sizeof (cpi->cpi_brandstr);
2633 char *src, *dst;
2635 dst = src = (char *)cpi->cpi_brandstr;
2636 src[maxlen - 1] = '\0';
2638 * strip leading spaces
2640 while (*src == ' ')
2641 src++;
2643 * Remove any 'Genuine' or "Authentic" prefixes
2645 if (strncmp(src, "Genuine ", 8) == 0)
2646 src += 8;
2647 if (strncmp(src, "Authentic ", 10) == 0)
2648 src += 10;
2651 * Now do an in-place copy.
2652 * Map (R) to (r) and (TM) to (tm).
2653 * The era of teletypes is long gone, and there's
2654 * -really- no need to shout.
2656 while (*src != '\0') {
2657 if (src[0] == '(') {
2658 if (strncmp(src + 1, "R)", 2) == 0) {
2659 (void) strncpy(dst, "(r)", 3);
2660 src += 3;
2661 dst += 3;
2662 continue;
2664 if (strncmp(src + 1, "TM)", 3) == 0) {
2665 (void) strncpy(dst, "(tm)", 4);
2666 src += 4;
2667 dst += 4;
2668 continue;
2671 *dst++ = *src++;
2673 *dst = '\0';
2676 * Finally, remove any trailing spaces
2678 while (--dst > cpi->cpi_brandstr)
2679 if (*dst == ' ')
2680 *dst = '\0';
2681 else
2682 break;
2683 } else
2684 fabricate_brandstr(cpi);
2686 cpi->cpi_pass = 3;
2690 * This routine is called out of bind_hwcap() much later in the life
2691 * of the kernel (post_startup()). The job of this routine is to resolve
2692 * the hardware feature support and kernel support for those features into
2693 * what we're actually going to tell applications via the aux vector.
2695 void
2696 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
2698 struct cpuid_info *cpi;
2699 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
2701 if (cpu == NULL)
2702 cpu = CPU;
2703 cpi = cpu->cpu_m.mcpu_cpi;
2705 ASSERT(cpi->cpi_pass == 3);
2707 if (cpi->cpi_maxeax >= 1) {
2708 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2709 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2710 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
2712 *edx = CPI_FEATURES_EDX(cpi);
2713 *ecx = CPI_FEATURES_ECX(cpi);
2714 *ebx = CPI_FEATURES_7_0_EBX(cpi);
2717 * [these require explicit kernel support]
2719 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2720 *edx &= ~CPUID_INTC_EDX_SEP;
2722 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2723 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2724 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2725 *edx &= ~CPUID_INTC_EDX_SSE2;
2727 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2728 *edx &= ~CPUID_INTC_EDX_HTT;
2730 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2731 *ecx &= ~CPUID_INTC_ECX_SSE3;
2733 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2734 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2735 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2736 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2737 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2738 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2739 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2740 *ecx &= ~CPUID_INTC_ECX_AES;
2741 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2742 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2743 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2744 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2745 CPUID_INTC_ECX_OSXSAVE);
2746 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2747 *ecx &= ~CPUID_INTC_ECX_AVX;
2748 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
2749 *ecx &= ~CPUID_INTC_ECX_F16C;
2750 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
2751 *ecx &= ~CPUID_INTC_ECX_FMA;
2752 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
2753 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2754 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
2755 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2756 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
2757 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2758 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
2759 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
2760 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
2761 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
2764 * [no explicit support required beyond x87 fp context]
2766 if (!fpu_exists)
2767 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2770 * Now map the supported feature vector to things that we
2771 * think userland will care about.
2773 if (*edx & CPUID_INTC_EDX_SEP)
2774 hwcap_flags |= AV_386_SEP;
2775 if (*edx & CPUID_INTC_EDX_SSE)
2776 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2777 if (*edx & CPUID_INTC_EDX_SSE2)
2778 hwcap_flags |= AV_386_SSE2;
2779 if (*ecx & CPUID_INTC_ECX_SSE3)
2780 hwcap_flags |= AV_386_SSE3;
2781 if (*ecx & CPUID_INTC_ECX_SSSE3)
2782 hwcap_flags |= AV_386_SSSE3;
2783 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2784 hwcap_flags |= AV_386_SSE4_1;
2785 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2786 hwcap_flags |= AV_386_SSE4_2;
2787 if (*ecx & CPUID_INTC_ECX_MOVBE)
2788 hwcap_flags |= AV_386_MOVBE;
2789 if (*ecx & CPUID_INTC_ECX_AES)
2790 hwcap_flags |= AV_386_AES;
2791 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2792 hwcap_flags |= AV_386_PCLMULQDQ;
2793 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2794 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2795 hwcap_flags |= AV_386_XSAVE;
2797 if (*ecx & CPUID_INTC_ECX_AVX) {
2798 hwcap_flags |= AV_386_AVX;
2799 if (*ecx & CPUID_INTC_ECX_F16C)
2800 hwcap_flags_2 |= AV_386_2_F16C;
2801 if (*ecx & CPUID_INTC_ECX_FMA)
2802 hwcap_flags_2 |= AV_386_2_FMA;
2803 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
2804 hwcap_flags_2 |= AV_386_2_BMI1;
2805 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
2806 hwcap_flags_2 |= AV_386_2_BMI2;
2807 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
2808 hwcap_flags_2 |= AV_386_2_AVX2;
2811 if (*ecx & CPUID_INTC_ECX_VMX)
2812 hwcap_flags |= AV_386_VMX;
2813 if (*ecx & CPUID_INTC_ECX_POPCNT)
2814 hwcap_flags |= AV_386_POPCNT;
2815 if (*edx & CPUID_INTC_EDX_FPU)
2816 hwcap_flags |= AV_386_FPU;
2817 if (*edx & CPUID_INTC_EDX_MMX)
2818 hwcap_flags |= AV_386_MMX;
2820 if (*edx & CPUID_INTC_EDX_TSC)
2821 hwcap_flags |= AV_386_TSC;
2822 if (*edx & CPUID_INTC_EDX_CX8)
2823 hwcap_flags |= AV_386_CX8;
2824 if (*edx & CPUID_INTC_EDX_CMOV)
2825 hwcap_flags |= AV_386_CMOV;
2826 if (*ecx & CPUID_INTC_ECX_CX16)
2827 hwcap_flags |= AV_386_CX16;
2829 if (*ecx & CPUID_INTC_ECX_RDRAND)
2830 hwcap_flags_2 |= AV_386_2_RDRAND;
2831 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
2832 hwcap_flags_2 |= AV_386_2_ADX;
2833 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
2834 hwcap_flags_2 |= AV_386_2_RDSEED;
2838 if (cpi->cpi_xmaxeax < 0x80000001)
2839 goto pass4_done;
2841 switch (cpi->cpi_vendor) {
2842 struct cpuid_regs cp;
2843 uint32_t *edx, *ecx;
2845 case X86_VENDOR_Intel:
2847 * Seems like Intel duplicated what we necessary
2848 * here to make the initial crop of 64-bit OS's work.
2849 * Hopefully, those are the only "extended" bits
2850 * they'll add.
2852 /*FALLTHROUGH*/
2854 case X86_VENDOR_AMD:
2855 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2856 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2858 *edx = CPI_FEATURES_XTD_EDX(cpi);
2859 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2862 * [these features require explicit kernel support]
2864 switch (cpi->cpi_vendor) {
2865 case X86_VENDOR_Intel:
2866 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2867 *edx &= ~CPUID_AMD_EDX_TSCP;
2868 break;
2870 case X86_VENDOR_AMD:
2871 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2872 *edx &= ~CPUID_AMD_EDX_TSCP;
2873 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2874 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2875 break;
2877 default:
2878 break;
2882 * [no explicit support required beyond
2883 * x87 fp context and exception handlers]
2885 if (!fpu_exists)
2886 *edx &= ~(CPUID_AMD_EDX_MMXamd |
2887 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2889 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2890 *edx &= ~CPUID_AMD_EDX_NX;
2891 #if !defined(__amd64)
2892 *edx &= ~CPUID_AMD_EDX_LM;
2893 #endif
2895 * Now map the supported feature vector to
2896 * things that we think userland will care about.
2898 #if defined(__amd64)
2899 if (*edx & CPUID_AMD_EDX_SYSC)
2900 hwcap_flags |= AV_386_AMD_SYSC;
2901 #endif
2902 if (*edx & CPUID_AMD_EDX_MMXamd)
2903 hwcap_flags |= AV_386_AMD_MMX;
2904 if (*edx & CPUID_AMD_EDX_3DNow)
2905 hwcap_flags |= AV_386_AMD_3DNow;
2906 if (*edx & CPUID_AMD_EDX_3DNowx)
2907 hwcap_flags |= AV_386_AMD_3DNowx;
2908 if (*ecx & CPUID_AMD_ECX_SVM)
2909 hwcap_flags |= AV_386_AMD_SVM;
2911 switch (cpi->cpi_vendor) {
2912 case X86_VENDOR_AMD:
2913 if (*edx & CPUID_AMD_EDX_TSCP)
2914 hwcap_flags |= AV_386_TSCP;
2915 if (*ecx & CPUID_AMD_ECX_AHF64)
2916 hwcap_flags |= AV_386_AHF;
2917 if (*ecx & CPUID_AMD_ECX_SSE4A)
2918 hwcap_flags |= AV_386_AMD_SSE4A;
2919 if (*ecx & CPUID_AMD_ECX_LZCNT)
2920 hwcap_flags |= AV_386_AMD_LZCNT;
2921 break;
2923 case X86_VENDOR_Intel:
2924 if (*edx & CPUID_AMD_EDX_TSCP)
2925 hwcap_flags |= AV_386_TSCP;
2927 * Aarrgh.
2928 * Intel uses a different bit in the same word.
2930 if (*ecx & CPUID_INTC_ECX_AHF64)
2931 hwcap_flags |= AV_386_AHF;
2932 break;
2934 default:
2935 break;
2937 break;
2939 case X86_VENDOR_TM:
2940 cp.cp_eax = 0x80860001;
2941 (void) __cpuid_insn(&cp);
2942 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2943 break;
2945 default:
2946 break;
2949 pass4_done:
2950 cpi->cpi_pass = 4;
2951 if (hwcap_out != NULL) {
2952 hwcap_out[0] = hwcap_flags;
2953 hwcap_out[1] = hwcap_flags_2;
2959 * Simulate the cpuid instruction using the data we previously
2960 * captured about this CPU. We try our best to return the truth
2961 * about the hardware, independently of kernel support.
2963 uint32_t
2964 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2966 struct cpuid_info *cpi;
2967 struct cpuid_regs *xcp;
2969 if (cpu == NULL)
2970 cpu = CPU;
2971 cpi = cpu->cpu_m.mcpu_cpi;
2973 ASSERT(cpuid_checkpass(cpu, 3));
2976 * CPUID data is cached in two separate places: cpi_std for standard
2977 * CPUID functions, and cpi_extd for extended CPUID functions.
2979 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2980 xcp = &cpi->cpi_std[cp->cp_eax];
2981 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2982 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2983 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2984 else
2986 * The caller is asking for data from an input parameter which
2987 * the kernel has not cached. In this case we go fetch from
2988 * the hardware and return the data directly to the user.
2990 return (__cpuid_insn(cp));
2992 cp->cp_eax = xcp->cp_eax;
2993 cp->cp_ebx = xcp->cp_ebx;
2994 cp->cp_ecx = xcp->cp_ecx;
2995 cp->cp_edx = xcp->cp_edx;
2996 return (cp->cp_eax);
3000 cpuid_checkpass(cpu_t *cpu, int pass)
3002 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
3003 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
3007 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
3009 ASSERT(cpuid_checkpass(cpu, 3));
3011 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
3015 cpuid_is_cmt(cpu_t *cpu)
3017 if (cpu == NULL)
3018 cpu = CPU;
3020 ASSERT(cpuid_checkpass(cpu, 1));
3022 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
3026 * AMD and Intel both implement the 64-bit variant of the syscall
3027 * instruction (syscallq), so if there's -any- support for syscall,
3028 * cpuid currently says "yes, we support this".
3030 * However, Intel decided to -not- implement the 32-bit variant of the
3031 * syscall instruction, so we provide a predicate to allow our caller
3032 * to test that subtlety here.
3034 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
3035 * even in the case where the hardware would in fact support it.
3037 /*ARGSUSED*/
3039 cpuid_syscall32_insn(cpu_t *cpu)
3041 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
3043 #if !defined(__xpv)
3044 if (cpu == NULL)
3045 cpu = CPU;
3047 /*CSTYLED*/
3049 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3051 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3052 cpi->cpi_xmaxeax >= 0x80000001 &&
3053 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
3054 return (1);
3056 #endif
3057 return (0);
3061 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
3063 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3065 static const char fmt[] =
3066 "x86 (%s %X family %d model %d step %d clock %d MHz)";
3067 static const char fmt_ht[] =
3068 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
3070 ASSERT(cpuid_checkpass(cpu, 1));
3072 if (cpuid_is_cmt(cpu))
3073 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
3074 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
3075 cpi->cpi_family, cpi->cpi_model,
3076 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
3077 return (snprintf(s, n, fmt,
3078 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
3079 cpi->cpi_family, cpi->cpi_model,
3080 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
3083 const char *
3084 cpuid_getvendorstr(cpu_t *cpu)
3086 ASSERT(cpuid_checkpass(cpu, 1));
3087 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
3090 uint_t
3091 cpuid_getvendor(cpu_t *cpu)
3093 ASSERT(cpuid_checkpass(cpu, 1));
3094 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
3097 uint_t
3098 cpuid_getfamily(cpu_t *cpu)
3100 ASSERT(cpuid_checkpass(cpu, 1));
3101 return (cpu->cpu_m.mcpu_cpi->cpi_family);
3104 uint_t
3105 cpuid_getmodel(cpu_t *cpu)
3107 ASSERT(cpuid_checkpass(cpu, 1));
3108 return (cpu->cpu_m.mcpu_cpi->cpi_model);
3111 uint_t
3112 cpuid_get_ncpu_per_chip(cpu_t *cpu)
3114 ASSERT(cpuid_checkpass(cpu, 1));
3115 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
3118 uint_t
3119 cpuid_get_ncore_per_chip(cpu_t *cpu)
3121 ASSERT(cpuid_checkpass(cpu, 1));
3122 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
3125 uint_t
3126 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
3128 ASSERT(cpuid_checkpass(cpu, 2));
3129 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
3132 id_t
3133 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
3135 ASSERT(cpuid_checkpass(cpu, 2));
3136 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3139 uint_t
3140 cpuid_getstep(cpu_t *cpu)
3142 ASSERT(cpuid_checkpass(cpu, 1));
3143 return (cpu->cpu_m.mcpu_cpi->cpi_step);
3146 uint_t
3147 cpuid_getsig(struct cpu *cpu)
3149 ASSERT(cpuid_checkpass(cpu, 1));
3150 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
3153 uint32_t
3154 cpuid_getchiprev(struct cpu *cpu)
3156 ASSERT(cpuid_checkpass(cpu, 1));
3157 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
3160 const char *
3161 cpuid_getchiprevstr(struct cpu *cpu)
3163 ASSERT(cpuid_checkpass(cpu, 1));
3164 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
3167 uint32_t
3168 cpuid_getsockettype(struct cpu *cpu)
3170 ASSERT(cpuid_checkpass(cpu, 1));
3171 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
3174 const char *
3175 cpuid_getsocketstr(cpu_t *cpu)
3177 static const char *socketstr = NULL;
3178 struct cpuid_info *cpi;
3180 ASSERT(cpuid_checkpass(cpu, 1));
3181 cpi = cpu->cpu_m.mcpu_cpi;
3183 /* Assume that socket types are the same across the system */
3184 if (socketstr == NULL)
3185 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
3186 cpi->cpi_model, cpi->cpi_step);
3189 return (socketstr);
3193 cpuid_get_chipid(cpu_t *cpu)
3195 ASSERT(cpuid_checkpass(cpu, 1));
3197 if (cpuid_is_cmt(cpu))
3198 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
3199 return (cpu->cpu_id);
3202 id_t
3203 cpuid_get_coreid(cpu_t *cpu)
3205 ASSERT(cpuid_checkpass(cpu, 1));
3206 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
3210 cpuid_get_pkgcoreid(cpu_t *cpu)
3212 ASSERT(cpuid_checkpass(cpu, 1));
3213 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
3217 cpuid_get_clogid(cpu_t *cpu)
3219 ASSERT(cpuid_checkpass(cpu, 1));
3220 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
3224 cpuid_get_cacheid(cpu_t *cpu)
3226 ASSERT(cpuid_checkpass(cpu, 1));
3227 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3230 uint_t
3231 cpuid_get_procnodeid(cpu_t *cpu)
3233 ASSERT(cpuid_checkpass(cpu, 1));
3234 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
3237 uint_t
3238 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3240 ASSERT(cpuid_checkpass(cpu, 1));
3241 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3244 uint_t
3245 cpuid_get_compunitid(cpu_t *cpu)
3247 ASSERT(cpuid_checkpass(cpu, 1));
3248 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3251 uint_t
3252 cpuid_get_cores_per_compunit(cpu_t *cpu)
3254 ASSERT(cpuid_checkpass(cpu, 1));
3255 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3258 /*ARGSUSED*/
3260 cpuid_have_cr8access(cpu_t *cpu)
3262 #if defined(__amd64)
3263 return (1);
3264 #else
3265 struct cpuid_info *cpi;
3267 ASSERT(cpu != NULL);
3268 cpi = cpu->cpu_m.mcpu_cpi;
3269 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3270 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3271 return (1);
3272 return (0);
3273 #endif
3276 uint32_t
3277 cpuid_get_apicid(cpu_t *cpu)
3279 ASSERT(cpuid_checkpass(cpu, 1));
3280 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3281 return (UINT32_MAX);
3282 } else {
3283 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3287 void
3288 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3290 struct cpuid_info *cpi;
3292 if (cpu == NULL)
3293 cpu = CPU;
3294 cpi = cpu->cpu_m.mcpu_cpi;
3296 ASSERT(cpuid_checkpass(cpu, 1));
3298 if (pabits)
3299 *pabits = cpi->cpi_pabits;
3300 if (vabits)
3301 *vabits = cpi->cpi_vabits;
3305 * Returns the number of data TLB entries for a corresponding
3306 * pagesize. If it can't be computed, or isn't known, the
3307 * routine returns zero. If you ask about an architecturally
3308 * impossible pagesize, the routine will panic (so that the
3309 * hat implementor knows that things are inconsistent.)
3311 uint_t
3312 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3314 struct cpuid_info *cpi;
3315 uint_t dtlb_nent = 0;
3317 if (cpu == NULL)
3318 cpu = CPU;
3319 cpi = cpu->cpu_m.mcpu_cpi;
3321 ASSERT(cpuid_checkpass(cpu, 1));
3324 * Check the L2 TLB info
3326 if (cpi->cpi_xmaxeax >= 0x80000006) {
3327 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3329 switch (pagesize) {
3331 case 4 * 1024:
3333 * All zero in the top 16 bits of the register
3334 * indicates a unified TLB. Size is in low 16 bits.
3336 if ((cp->cp_ebx & 0xffff0000) == 0)
3337 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3338 else
3339 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3340 break;
3342 case 2 * 1024 * 1024:
3343 if ((cp->cp_eax & 0xffff0000) == 0)
3344 dtlb_nent = cp->cp_eax & 0x0000ffff;
3345 else
3346 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3347 break;
3349 default:
3350 panic("unknown L2 pagesize");
3351 /*NOTREACHED*/
3355 if (dtlb_nent != 0)
3356 return (dtlb_nent);
3359 * No L2 TLB support for this size, try L1.
3361 if (cpi->cpi_xmaxeax >= 0x80000005) {
3362 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3364 switch (pagesize) {
3365 case 4 * 1024:
3366 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3367 break;
3368 case 2 * 1024 * 1024:
3369 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3370 break;
3371 default:
3372 panic("unknown L1 d-TLB pagesize");
3373 /*NOTREACHED*/
3377 return (dtlb_nent);
3381 * Return 0 if the erratum is not present or not applicable, positive
3382 * if it is, and negative if the status of the erratum is unknown.
3384 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3385 * Processors" #25759, Rev 3.57, August 2005
3388 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3390 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3391 uint_t eax;
3394 * Bail out if this CPU isn't an AMD CPU, or if it's
3395 * a legacy (32-bit) AMD CPU.
3397 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3398 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3399 cpi->cpi_family == 6)
3401 return (0);
3403 eax = cpi->cpi_std[1].cp_eax;
3405 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3406 #define SH_B3(eax) (eax == 0xf51)
3407 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3409 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3411 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3412 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3413 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3414 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3416 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3417 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3418 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3419 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3421 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3422 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3423 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3424 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3425 #define BH_E4(eax) (eax == 0x20fb1)
3426 #define SH_E5(eax) (eax == 0x20f42)
3427 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3428 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3429 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3430 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3431 DH_E6(eax) || JH_E6(eax))
3433 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3434 #define DR_B0(eax) (eax == 0x100f20)
3435 #define DR_B1(eax) (eax == 0x100f21)
3436 #define DR_BA(eax) (eax == 0x100f2a)
3437 #define DR_B2(eax) (eax == 0x100f22)
3438 #define DR_B3(eax) (eax == 0x100f23)
3439 #define RB_C0(eax) (eax == 0x100f40)
3441 switch (erratum) {
3442 case 1:
3443 return (cpi->cpi_family < 0x10);
3444 case 51: /* what does the asterisk mean? */
3445 return (B(eax) || SH_C0(eax) || CG(eax));
3446 case 52:
3447 return (B(eax));
3448 case 57:
3449 return (cpi->cpi_family <= 0x11);
3450 case 58:
3451 return (B(eax));
3452 case 60:
3453 return (cpi->cpi_family <= 0x11);
3454 case 61:
3455 case 62:
3456 case 63:
3457 case 64:
3458 case 65:
3459 case 66:
3460 case 68:
3461 case 69:
3462 case 70:
3463 case 71:
3464 return (B(eax));
3465 case 72:
3466 return (SH_B0(eax));
3467 case 74:
3468 return (B(eax));
3469 case 75:
3470 return (cpi->cpi_family < 0x10);
3471 case 76:
3472 return (B(eax));
3473 case 77:
3474 return (cpi->cpi_family <= 0x11);
3475 case 78:
3476 return (B(eax) || SH_C0(eax));
3477 case 79:
3478 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3479 case 80:
3480 case 81:
3481 case 82:
3482 return (B(eax));
3483 case 83:
3484 return (B(eax) || SH_C0(eax) || CG(eax));
3485 case 85:
3486 return (cpi->cpi_family < 0x10);
3487 case 86:
3488 return (SH_C0(eax) || CG(eax));
3489 case 88:
3490 #if !defined(__amd64)
3491 return (0);
3492 #else
3493 return (B(eax) || SH_C0(eax));
3494 #endif
3495 case 89:
3496 return (cpi->cpi_family < 0x10);
3497 case 90:
3498 return (B(eax) || SH_C0(eax) || CG(eax));
3499 case 91:
3500 case 92:
3501 return (B(eax) || SH_C0(eax));
3502 case 93:
3503 return (SH_C0(eax));
3504 case 94:
3505 return (B(eax) || SH_C0(eax) || CG(eax));
3506 case 95:
3507 #if !defined(__amd64)
3508 return (0);
3509 #else
3510 return (B(eax) || SH_C0(eax));
3511 #endif
3512 case 96:
3513 return (B(eax) || SH_C0(eax) || CG(eax));
3514 case 97:
3515 case 98:
3516 return (SH_C0(eax) || CG(eax));
3517 case 99:
3518 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3519 case 100:
3520 return (B(eax) || SH_C0(eax));
3521 case 101:
3522 case 103:
3523 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3524 case 104:
3525 return (SH_C0(eax) || CG(eax) || D0(eax));
3526 case 105:
3527 case 106:
3528 case 107:
3529 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3530 case 108:
3531 return (DH_CG(eax));
3532 case 109:
3533 return (SH_C0(eax) || CG(eax) || D0(eax));
3534 case 110:
3535 return (D0(eax) || EX(eax));
3536 case 111:
3537 return (CG(eax));
3538 case 112:
3539 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3540 case 113:
3541 return (eax == 0x20fc0);
3542 case 114:
3543 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3544 case 115:
3545 return (SH_E0(eax) || JH_E1(eax));
3546 case 116:
3547 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3548 case 117:
3549 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3550 case 118:
3551 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3552 JH_E6(eax));
3553 case 121:
3554 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3555 case 122:
3556 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3557 case 123:
3558 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3559 case 131:
3560 return (cpi->cpi_family < 0x10);
3561 case 6336786:
3563 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3564 * if this is a K8 family or newer processor
3566 if (CPI_FAMILY(cpi) == 0xf) {
3567 struct cpuid_regs regs;
3568 regs.cp_eax = 0x80000007;
3569 (void) __cpuid_insn(&regs);
3570 return (!(regs.cp_edx & 0x100));
3572 return (0);
3573 case 6323525:
3574 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3575 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3577 case 6671130:
3579 * check for processors (pre-Shanghai) that do not provide
3580 * optimal management of 1gb ptes in its tlb.
3582 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3584 case 298:
3585 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3586 DR_B2(eax) || RB_C0(eax));
3588 case 721:
3589 #if defined(__amd64)
3590 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3591 #else
3592 return (0);
3593 #endif
3595 default:
3596 return (-1);
3602 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3603 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3606 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3608 struct cpuid_info *cpi;
3609 uint_t osvwid;
3610 static int osvwfeature = -1;
3611 uint64_t osvwlength;
3614 cpi = cpu->cpu_m.mcpu_cpi;
3616 /* confirm OSVW supported */
3617 if (osvwfeature == -1) {
3618 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3619 } else {
3620 /* assert that osvw feature setting is consistent on all cpus */
3621 ASSERT(osvwfeature ==
3622 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3624 if (!osvwfeature)
3625 return (-1);
3627 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3629 switch (erratum) {
3630 case 298: /* osvwid is 0 */
3631 osvwid = 0;
3632 if (osvwlength <= (uint64_t)osvwid) {
3633 /* osvwid 0 is unknown */
3634 return (-1);
3638 * Check the OSVW STATUS MSR to determine the state
3639 * of the erratum where:
3640 * 0 - fixed by HW
3641 * 1 - BIOS has applied the workaround when BIOS
3642 * workaround is available. (Or for other errata,
3643 * OS workaround is required.)
3644 * For a value of 1, caller will confirm that the
3645 * erratum 298 workaround has indeed been applied by BIOS.
3647 * A 1 may be set in cpus that have a HW fix
3648 * in a mixed cpu system. Regarding erratum 298:
3649 * In a multiprocessor platform, the workaround above
3650 * should be applied to all processors regardless of
3651 * silicon revision when an affected processor is
3652 * present.
3655 return (rdmsr(MSR_AMD_OSVW_STATUS +
3656 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3657 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3659 default:
3660 return (-1);
3664 static const char assoc_str[] = "associativity";
3665 static const char line_str[] = "line-size";
3666 static const char size_str[] = "size";
3668 static void
3669 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3670 uint32_t val)
3672 char buf[128];
3675 * ndi_prop_update_int() is used because it is desirable for
3676 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3678 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3679 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3683 * Intel-style cache/tlb description
3685 * Standard cpuid level 2 gives a randomly ordered
3686 * selection of tags that index into a table that describes
3687 * cache and tlb properties.
3690 static const char l1_icache_str[] = "l1-icache";
3691 static const char l1_dcache_str[] = "l1-dcache";
3692 static const char l2_cache_str[] = "l2-cache";
3693 static const char l3_cache_str[] = "l3-cache";
3694 static const char itlb4k_str[] = "itlb-4K";
3695 static const char dtlb4k_str[] = "dtlb-4K";
3696 static const char itlb2M_str[] = "itlb-2M";
3697 static const char itlb4M_str[] = "itlb-4M";
3698 static const char dtlb4M_str[] = "dtlb-4M";
3699 static const char dtlb24_str[] = "dtlb0-2M-4M";
3700 static const char itlb424_str[] = "itlb-4K-2M-4M";
3701 static const char itlb24_str[] = "itlb-2M-4M";
3702 static const char dtlb44_str[] = "dtlb-4K-4M";
3703 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3704 static const char sl2_cache_str[] = "sectored-l2-cache";
3705 static const char itrace_str[] = "itrace-cache";
3706 static const char sl3_cache_str[] = "sectored-l3-cache";
3707 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3709 static const struct cachetab {
3710 uint8_t ct_code;
3711 uint8_t ct_assoc;
3712 uint16_t ct_line_size;
3713 size_t ct_size;
3714 const char *ct_label;
3715 } intel_ctab[] = {
3717 * maintain descending order!
3719 * Codes ignored - Reason
3720 * ----------------------
3721 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3722 * f0H/f1H - Currently we do not interpret prefetch size by design
3724 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3725 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3726 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3727 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3728 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3729 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3730 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3731 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3732 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3733 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3734 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3735 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3736 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3737 { 0xc0, 4, 0, 8, dtlb44_str },
3738 { 0xba, 4, 0, 64, dtlb4k_str },
3739 { 0xb4, 4, 0, 256, dtlb4k_str },
3740 { 0xb3, 4, 0, 128, dtlb4k_str },
3741 { 0xb2, 4, 0, 64, itlb4k_str },
3742 { 0xb0, 4, 0, 128, itlb4k_str },
3743 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3744 { 0x86, 4, 64, 512*1024, l2_cache_str},
3745 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3746 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3747 { 0x83, 8, 32, 512*1024, l2_cache_str},
3748 { 0x82, 8, 32, 256*1024, l2_cache_str},
3749 { 0x80, 8, 64, 512*1024, l2_cache_str},
3750 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3751 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3752 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3753 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3754 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3755 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3756 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3757 { 0x73, 8, 0, 64*1024, itrace_str},
3758 { 0x72, 8, 0, 32*1024, itrace_str},
3759 { 0x71, 8, 0, 16*1024, itrace_str},
3760 { 0x70, 8, 0, 12*1024, itrace_str},
3761 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3762 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3763 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3764 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3765 { 0x5d, 0, 0, 256, dtlb44_str},
3766 { 0x5c, 0, 0, 128, dtlb44_str},
3767 { 0x5b, 0, 0, 64, dtlb44_str},
3768 { 0x5a, 4, 0, 32, dtlb24_str},
3769 { 0x59, 0, 0, 16, dtlb4k_str},
3770 { 0x57, 4, 0, 16, dtlb4k_str},
3771 { 0x56, 4, 0, 16, dtlb4M_str},
3772 { 0x55, 0, 0, 7, itlb24_str},
3773 { 0x52, 0, 0, 256, itlb424_str},
3774 { 0x51, 0, 0, 128, itlb424_str},
3775 { 0x50, 0, 0, 64, itlb424_str},
3776 { 0x4f, 0, 0, 32, itlb4k_str},
3777 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3778 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3779 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3780 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3781 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3782 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3783 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3784 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3785 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3786 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3787 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3788 { 0x43, 4, 32, 512*1024, l2_cache_str},
3789 { 0x42, 4, 32, 256*1024, l2_cache_str},
3790 { 0x41, 4, 32, 128*1024, l2_cache_str},
3791 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3792 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3793 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3794 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3795 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3796 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3797 { 0x30, 8, 64, 32*1024, l1_icache_str},
3798 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3799 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3800 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3801 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3802 { 0x22, 4, 64, 512*1024, sl3_cache_str},
3803 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3804 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3805 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3806 { 0x0b, 4, 0, 4, itlb4M_str},
3807 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3808 { 0x08, 4, 32, 16*1024, l1_icache_str},
3809 { 0x06, 4, 32, 8*1024, l1_icache_str},
3810 { 0x05, 4, 0, 32, dtlb4M_str},
3811 { 0x04, 4, 0, 8, dtlb4M_str},
3812 { 0x03, 4, 0, 64, dtlb4k_str},
3813 { 0x02, 4, 0, 2, itlb4M_str},
3814 { 0x01, 4, 0, 32, itlb4k_str},
3815 { 0 }
3818 static const struct cachetab cyrix_ctab[] = {
3819 { 0x70, 4, 0, 32, "tlb-4K" },
3820 { 0x80, 4, 16, 16*1024, "l1-cache" },
3821 { 0 }
3825 * Search a cache table for a matching entry
3827 static const struct cachetab *
3828 find_cacheent(const struct cachetab *ct, uint_t code)
3830 if (code != 0) {
3831 for (; ct->ct_code != 0; ct++)
3832 if (ct->ct_code <= code)
3833 break;
3834 if (ct->ct_code == code)
3835 return (ct);
3837 return (NULL);
3841 * Populate cachetab entry with L2 or L3 cache-information using
3842 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3843 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3844 * information is found.
3846 static int
3847 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3849 uint32_t level, i;
3850 int ret = 0;
3852 for (i = 0; i < cpi->cpi_std_4_size; i++) {
3853 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3855 if (level == 2 || level == 3) {
3856 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3857 ct->ct_line_size =
3858 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3859 ct->ct_size = ct->ct_assoc *
3860 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3861 ct->ct_line_size *
3862 (cpi->cpi_std_4[i]->cp_ecx + 1);
3864 if (level == 2) {
3865 ct->ct_label = l2_cache_str;
3866 } else if (level == 3) {
3867 ct->ct_label = l3_cache_str;
3869 ret = 1;
3873 return (ret);
3877 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3878 * The walk is terminated if the walker returns non-zero.
3880 static void
3881 intel_walk_cacheinfo(struct cpuid_info *cpi,
3882 void *arg, int (*func)(void *, const struct cachetab *))
3884 const struct cachetab *ct;
3885 struct cachetab des_49_ct, des_b1_ct;
3886 uint8_t *dp;
3887 int i;
3889 if ((dp = cpi->cpi_cacheinfo) == NULL)
3890 return;
3891 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3893 * For overloaded descriptor 0x49 we use cpuid function 4
3894 * if supported by the current processor, to create
3895 * cache information.
3896 * For overloaded descriptor 0xb1 we use X86_PAE flag
3897 * to disambiguate the cache information.
3899 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3900 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3901 ct = &des_49_ct;
3902 } else if (*dp == 0xb1) {
3903 des_b1_ct.ct_code = 0xb1;
3904 des_b1_ct.ct_assoc = 4;
3905 des_b1_ct.ct_line_size = 0;
3906 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3907 des_b1_ct.ct_size = 8;
3908 des_b1_ct.ct_label = itlb2M_str;
3909 } else {
3910 des_b1_ct.ct_size = 4;
3911 des_b1_ct.ct_label = itlb4M_str;
3913 ct = &des_b1_ct;
3914 } else {
3915 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3916 continue;
3920 if (func(arg, ct) != 0) {
3921 break;
3927 * (Like the Intel one, except for Cyrix CPUs)
3929 static void
3930 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3931 void *arg, int (*func)(void *, const struct cachetab *))
3933 const struct cachetab *ct;
3934 uint8_t *dp;
3935 int i;
3937 if ((dp = cpi->cpi_cacheinfo) == NULL)
3938 return;
3939 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3941 * Search Cyrix-specific descriptor table first ..
3943 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3944 if (func(arg, ct) != 0)
3945 break;
3946 continue;
3949 * .. else fall back to the Intel one
3951 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3952 if (func(arg, ct) != 0)
3953 break;
3954 continue;
3960 * A cacheinfo walker that adds associativity, line-size, and size properties
3961 * to the devinfo node it is passed as an argument.
3963 static int
3964 add_cacheent_props(void *arg, const struct cachetab *ct)
3966 dev_info_t *devi = arg;
3968 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3969 if (ct->ct_line_size != 0)
3970 add_cache_prop(devi, ct->ct_label, line_str,
3971 ct->ct_line_size);
3972 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3973 return (0);
3977 static const char fully_assoc[] = "fully-associative?";
3980 * AMD style cache/tlb description
3982 * Extended functions 5 and 6 directly describe properties of
3983 * tlbs and various cache levels.
3985 static void
3986 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3988 switch (assoc) {
3989 case 0: /* reserved; ignore */
3990 break;
3991 default:
3992 add_cache_prop(devi, label, assoc_str, assoc);
3993 break;
3994 case 0xff:
3995 add_cache_prop(devi, label, fully_assoc, 1);
3996 break;
4000 static void
4001 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
4003 if (size == 0)
4004 return;
4005 add_cache_prop(devi, label, size_str, size);
4006 add_amd_assoc(devi, label, assoc);
4009 static void
4010 add_amd_cache(dev_info_t *devi, const char *label,
4011 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
4013 if (size == 0 || line_size == 0)
4014 return;
4015 add_amd_assoc(devi, label, assoc);
4017 * Most AMD parts have a sectored cache. Multiple cache lines are
4018 * associated with each tag. A sector consists of all cache lines
4019 * associated with a tag. For example, the AMD K6-III has a sector
4020 * size of 2 cache lines per tag.
4022 if (lines_per_tag != 0)
4023 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
4024 add_cache_prop(devi, label, line_str, line_size);
4025 add_cache_prop(devi, label, size_str, size * 1024);
4028 static void
4029 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
4031 switch (assoc) {
4032 case 0: /* off */
4033 break;
4034 case 1:
4035 case 2:
4036 case 4:
4037 add_cache_prop(devi, label, assoc_str, assoc);
4038 break;
4039 case 6:
4040 add_cache_prop(devi, label, assoc_str, 8);
4041 break;
4042 case 8:
4043 add_cache_prop(devi, label, assoc_str, 16);
4044 break;
4045 case 0xf:
4046 add_cache_prop(devi, label, fully_assoc, 1);
4047 break;
4048 default: /* reserved; ignore */
4049 break;
4053 static void
4054 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
4056 if (size == 0 || assoc == 0)
4057 return;
4058 add_amd_l2_assoc(devi, label, assoc);
4059 add_cache_prop(devi, label, size_str, size);
4062 static void
4063 add_amd_l2_cache(dev_info_t *devi, const char *label,
4064 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
4066 if (size == 0 || assoc == 0 || line_size == 0)
4067 return;
4068 add_amd_l2_assoc(devi, label, assoc);
4069 if (lines_per_tag != 0)
4070 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
4071 add_cache_prop(devi, label, line_str, line_size);
4072 add_cache_prop(devi, label, size_str, size * 1024);
4075 static void
4076 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
4078 struct cpuid_regs *cp;
4080 if (cpi->cpi_xmaxeax < 0x80000005)
4081 return;
4082 cp = &cpi->cpi_extd[5];
4085 * 4M/2M L1 TLB configuration
4087 * We report the size for 2M pages because AMD uses two
4088 * TLB entries for one 4M page.
4090 add_amd_tlb(devi, "dtlb-2M",
4091 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
4092 add_amd_tlb(devi, "itlb-2M",
4093 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
4096 * 4K L1 TLB configuration
4099 switch (cpi->cpi_vendor) {
4100 uint_t nentries;
4101 case X86_VENDOR_TM:
4102 if (cpi->cpi_family >= 5) {
4104 * Crusoe processors have 256 TLB entries, but
4105 * cpuid data format constrains them to only
4106 * reporting 255 of them.
4108 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
4109 nentries = 256;
4111 * Crusoe processors also have a unified TLB
4113 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
4114 nentries);
4115 break;
4117 /*FALLTHROUGH*/
4118 default:
4119 add_amd_tlb(devi, itlb4k_str,
4120 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
4121 add_amd_tlb(devi, dtlb4k_str,
4122 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
4123 break;
4127 * data L1 cache configuration
4130 add_amd_cache(devi, l1_dcache_str,
4131 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
4132 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
4135 * code L1 cache configuration
4138 add_amd_cache(devi, l1_icache_str,
4139 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
4140 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
4142 if (cpi->cpi_xmaxeax < 0x80000006)
4143 return;
4144 cp = &cpi->cpi_extd[6];
4146 /* Check for a unified L2 TLB for large pages */
4148 if (BITX(cp->cp_eax, 31, 16) == 0)
4149 add_amd_l2_tlb(devi, "l2-tlb-2M",
4150 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4151 else {
4152 add_amd_l2_tlb(devi, "l2-dtlb-2M",
4153 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4154 add_amd_l2_tlb(devi, "l2-itlb-2M",
4155 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4158 /* Check for a unified L2 TLB for 4K pages */
4160 if (BITX(cp->cp_ebx, 31, 16) == 0) {
4161 add_amd_l2_tlb(devi, "l2-tlb-4K",
4162 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4163 } else {
4164 add_amd_l2_tlb(devi, "l2-dtlb-4K",
4165 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4166 add_amd_l2_tlb(devi, "l2-itlb-4K",
4167 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4170 add_amd_l2_cache(devi, l2_cache_str,
4171 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
4172 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
4176 * There are two basic ways that the x86 world describes it cache
4177 * and tlb architecture - Intel's way and AMD's way.
4179 * Return which flavor of cache architecture we should use
4181 static int
4182 x86_which_cacheinfo(struct cpuid_info *cpi)
4184 switch (cpi->cpi_vendor) {
4185 case X86_VENDOR_Intel:
4186 if (cpi->cpi_maxeax >= 2)
4187 return (X86_VENDOR_Intel);
4188 break;
4189 case X86_VENDOR_AMD:
4191 * The K5 model 1 was the first part from AMD that reported
4192 * cache sizes via extended cpuid functions.
4194 if (cpi->cpi_family > 5 ||
4195 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
4196 return (X86_VENDOR_AMD);
4197 break;
4198 case X86_VENDOR_TM:
4199 if (cpi->cpi_family >= 5)
4200 return (X86_VENDOR_AMD);
4201 /*FALLTHROUGH*/
4202 default:
4204 * If they have extended CPU data for 0x80000005
4205 * then we assume they have AMD-format cache
4206 * information.
4208 * If not, and the vendor happens to be Cyrix,
4209 * then try our-Cyrix specific handler.
4211 * If we're not Cyrix, then assume we're using Intel's
4212 * table-driven format instead.
4214 if (cpi->cpi_xmaxeax >= 0x80000005)
4215 return (X86_VENDOR_AMD);
4216 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
4217 return (X86_VENDOR_Cyrix);
4218 else if (cpi->cpi_maxeax >= 2)
4219 return (X86_VENDOR_Intel);
4220 break;
4222 return (-1);
4225 void
4226 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
4227 struct cpuid_info *cpi)
4229 dev_info_t *cpu_devi;
4230 int create;
4232 cpu_devi = (dev_info_t *)dip;
4234 /* device_type */
4235 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4236 "device_type", "cpu");
4238 /* reg */
4239 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4240 "reg", cpu_id);
4242 /* cpu-mhz, and clock-frequency */
4243 if (cpu_freq > 0) {
4244 long long mul;
4246 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4247 "cpu-mhz", cpu_freq);
4248 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
4249 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4250 "clock-frequency", (int)mul);
4253 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4254 return;
4257 /* vendor-id */
4258 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4259 "vendor-id", cpi->cpi_vendorstr);
4261 if (cpi->cpi_maxeax == 0) {
4262 return;
4266 * family, model, and step
4268 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4269 "family", CPI_FAMILY(cpi));
4270 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4271 "cpu-model", CPI_MODEL(cpi));
4272 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4273 "stepping-id", CPI_STEP(cpi));
4275 /* type */
4276 switch (cpi->cpi_vendor) {
4277 case X86_VENDOR_Intel:
4278 create = 1;
4279 break;
4280 default:
4281 create = 0;
4282 break;
4284 if (create)
4285 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4286 "type", CPI_TYPE(cpi));
4288 /* ext-family */
4289 switch (cpi->cpi_vendor) {
4290 case X86_VENDOR_Intel:
4291 case X86_VENDOR_AMD:
4292 create = cpi->cpi_family >= 0xf;
4293 break;
4294 default:
4295 create = 0;
4296 break;
4298 if (create)
4299 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4300 "ext-family", CPI_FAMILY_XTD(cpi));
4302 /* ext-model */
4303 switch (cpi->cpi_vendor) {
4304 case X86_VENDOR_Intel:
4305 create = IS_EXTENDED_MODEL_INTEL(cpi);
4306 break;
4307 case X86_VENDOR_AMD:
4308 create = CPI_FAMILY(cpi) == 0xf;
4309 break;
4310 default:
4311 create = 0;
4312 break;
4314 if (create)
4315 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4316 "ext-model", CPI_MODEL_XTD(cpi));
4318 /* generation */
4319 switch (cpi->cpi_vendor) {
4320 case X86_VENDOR_AMD:
4322 * AMD K5 model 1 was the first part to support this
4324 create = cpi->cpi_xmaxeax >= 0x80000001;
4325 break;
4326 default:
4327 create = 0;
4328 break;
4330 if (create)
4331 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4332 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4334 /* brand-id */
4335 switch (cpi->cpi_vendor) {
4336 case X86_VENDOR_Intel:
4338 * brand id first appeared on Pentium III Xeon model 8,
4339 * and Celeron model 8 processors and Opteron
4341 create = cpi->cpi_family > 6 ||
4342 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4343 break;
4344 case X86_VENDOR_AMD:
4345 create = cpi->cpi_family >= 0xf;
4346 break;
4347 default:
4348 create = 0;
4349 break;
4351 if (create && cpi->cpi_brandid != 0) {
4352 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4353 "brand-id", cpi->cpi_brandid);
4356 /* chunks, and apic-id */
4357 switch (cpi->cpi_vendor) {
4359 * first available on Pentium IV and Opteron (K8)
4361 case X86_VENDOR_Intel:
4362 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4363 break;
4364 case X86_VENDOR_AMD:
4365 create = cpi->cpi_family >= 0xf;
4366 break;
4367 default:
4368 create = 0;
4369 break;
4371 if (create) {
4372 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4373 "chunks", CPI_CHUNKS(cpi));
4374 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4375 "apic-id", cpi->cpi_apicid);
4376 if (cpi->cpi_chipid >= 0) {
4377 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4378 "chip#", cpi->cpi_chipid);
4379 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4380 "clog#", cpi->cpi_clogid);
4384 /* cpuid-features */
4385 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4386 "cpuid-features", CPI_FEATURES_EDX(cpi));
4389 /* cpuid-features-ecx */
4390 switch (cpi->cpi_vendor) {
4391 case X86_VENDOR_Intel:
4392 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4393 break;
4394 case X86_VENDOR_AMD:
4395 create = cpi->cpi_family >= 0xf;
4396 break;
4397 default:
4398 create = 0;
4399 break;
4401 if (create)
4402 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4403 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4405 /* ext-cpuid-features */
4406 switch (cpi->cpi_vendor) {
4407 case X86_VENDOR_Intel:
4408 case X86_VENDOR_AMD:
4409 case X86_VENDOR_Cyrix:
4410 case X86_VENDOR_TM:
4411 case X86_VENDOR_Centaur:
4412 create = cpi->cpi_xmaxeax >= 0x80000001;
4413 break;
4414 default:
4415 create = 0;
4416 break;
4418 if (create) {
4419 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4420 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4421 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4422 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4426 * Brand String first appeared in Intel Pentium IV, AMD K5
4427 * model 1, and Cyrix GXm. On earlier models we try and
4428 * simulate something similar .. so this string should always
4429 * same -something- about the processor, however lame.
4431 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4432 "brand-string", cpi->cpi_brandstr);
4435 * Finally, cache and tlb information
4437 switch (x86_which_cacheinfo(cpi)) {
4438 case X86_VENDOR_Intel:
4439 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4440 break;
4441 case X86_VENDOR_Cyrix:
4442 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4443 break;
4444 case X86_VENDOR_AMD:
4445 amd_cache_info(cpi, cpu_devi);
4446 break;
4447 default:
4448 break;
4452 struct l2info {
4453 int *l2i_csz;
4454 int *l2i_lsz;
4455 int *l2i_assoc;
4456 int l2i_ret;
4460 * A cacheinfo walker that fetches the size, line-size and associativity
4461 * of the L2 cache
4463 static int
4464 intel_l2cinfo(void *arg, const struct cachetab *ct)
4466 struct l2info *l2i = arg;
4467 int *ip;
4469 if (ct->ct_label != l2_cache_str &&
4470 ct->ct_label != sl2_cache_str)
4471 return (0); /* not an L2 -- keep walking */
4473 if ((ip = l2i->l2i_csz) != NULL)
4474 *ip = ct->ct_size;
4475 if ((ip = l2i->l2i_lsz) != NULL)
4476 *ip = ct->ct_line_size;
4477 if ((ip = l2i->l2i_assoc) != NULL)
4478 *ip = ct->ct_assoc;
4479 l2i->l2i_ret = ct->ct_size;
4480 return (1); /* was an L2 -- terminate walk */
4484 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4486 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4487 * value is the associativity, the associativity for the L2 cache and
4488 * tlb is encoded in the following table. The 4 bit L2 value serves as
4489 * an index into the amd_afd[] array to determine the associativity.
4490 * -1 is undefined. 0 is fully associative.
4493 static int amd_afd[] =
4494 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4496 static void
4497 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4499 struct cpuid_regs *cp;
4500 uint_t size, assoc;
4501 int i;
4502 int *ip;
4504 if (cpi->cpi_xmaxeax < 0x80000006)
4505 return;
4506 cp = &cpi->cpi_extd[6];
4508 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4509 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4510 uint_t cachesz = size * 1024;
4511 assoc = amd_afd[i];
4513 ASSERT(assoc != -1);
4515 if ((ip = l2i->l2i_csz) != NULL)
4516 *ip = cachesz;
4517 if ((ip = l2i->l2i_lsz) != NULL)
4518 *ip = BITX(cp->cp_ecx, 7, 0);
4519 if ((ip = l2i->l2i_assoc) != NULL)
4520 *ip = assoc;
4521 l2i->l2i_ret = cachesz;
4526 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4528 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4529 struct l2info __l2info, *l2i = &__l2info;
4531 l2i->l2i_csz = csz;
4532 l2i->l2i_lsz = lsz;
4533 l2i->l2i_assoc = assoc;
4534 l2i->l2i_ret = -1;
4536 switch (x86_which_cacheinfo(cpi)) {
4537 case X86_VENDOR_Intel:
4538 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4539 break;
4540 case X86_VENDOR_Cyrix:
4541 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4542 break;
4543 case X86_VENDOR_AMD:
4544 amd_l2cacheinfo(cpi, l2i);
4545 break;
4546 default:
4547 break;
4549 return (l2i->l2i_ret);
4552 #if !defined(__xpv)
4554 uint32_t *
4555 cpuid_mwait_alloc(cpu_t *cpu)
4557 uint32_t *ret;
4558 size_t mwait_size;
4560 ASSERT(cpuid_checkpass(CPU, 2));
4562 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4563 if (mwait_size == 0)
4564 return (NULL);
4567 * kmem_alloc() returns cache line size aligned data for mwait_size
4568 * allocations. mwait_size is currently cache line sized. Neither
4569 * of these implementation details are guarantied to be true in the
4570 * future.
4572 * First try allocating mwait_size as kmem_alloc() currently returns
4573 * correctly aligned memory. If kmem_alloc() does not return
4574 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4576 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4577 * decide to free this memory.
4579 ret = kmem_zalloc(mwait_size, KM_SLEEP);
4580 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4581 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4582 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4583 *ret = MWAIT_RUNNING;
4584 return (ret);
4585 } else {
4586 kmem_free(ret, mwait_size);
4587 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4588 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4589 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4590 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4591 *ret = MWAIT_RUNNING;
4592 return (ret);
4596 void
4597 cpuid_mwait_free(cpu_t *cpu)
4599 if (cpu->cpu_m.mcpu_cpi == NULL) {
4600 return;
4603 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4604 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4605 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4606 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4609 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4610 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4613 void
4614 patch_tsc_read(int flag)
4616 size_t cnt;
4618 switch (flag) {
4619 case TSC_NONE:
4620 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4621 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4622 break;
4623 case TSC_RDTSC_MFENCE:
4624 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4625 (void) memcpy((void *)tsc_read,
4626 (void *)&_tsc_mfence_start, cnt);
4627 break;
4628 case TSC_RDTSC_LFENCE:
4629 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4630 (void) memcpy((void *)tsc_read,
4631 (void *)&_tsc_lfence_start, cnt);
4632 break;
4633 case TSC_TSCP:
4634 cnt = &_tscp_end - &_tscp_start;
4635 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4636 break;
4637 default:
4638 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
4639 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
4640 break;
4642 tsc_type = flag;
4646 cpuid_deep_cstates_supported(void)
4648 struct cpuid_info *cpi;
4649 struct cpuid_regs regs;
4651 ASSERT(cpuid_checkpass(CPU, 1));
4653 cpi = CPU->cpu_m.mcpu_cpi;
4655 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4656 return (0);
4658 switch (cpi->cpi_vendor) {
4659 case X86_VENDOR_Intel:
4660 if (cpi->cpi_xmaxeax < 0x80000007)
4661 return (0);
4664 * TSC run at a constant rate in all ACPI C-states?
4666 regs.cp_eax = 0x80000007;
4667 (void) __cpuid_insn(&regs);
4668 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4670 default:
4671 return (0);
4675 #endif /* !__xpv */
4677 void
4678 post_startup_cpu_fixups(void)
4680 #ifndef __xpv
4682 * Some AMD processors support C1E state. Entering this state will
4683 * cause the local APIC timer to stop, which we can't deal with at
4684 * this time.
4686 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4687 on_trap_data_t otd;
4688 uint64_t reg;
4690 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4691 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4692 /* Disable C1E state if it is enabled by BIOS */
4693 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4694 AMD_ACTONCMPHALT_MASK) {
4695 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4696 AMD_ACTONCMPHALT_SHIFT);
4697 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4700 no_trap();
4702 #endif /* !__xpv */
4706 * Setup necessary registers to enable XSAVE feature on this processor.
4707 * This function needs to be called early enough, so that no xsave/xrstor
4708 * ops will execute on the processor before the MSRs are properly set up.
4710 * Current implementation has the following assumption:
4711 * - cpuid_pass1() is done, so that X86 features are known.
4712 * - fpu_probe() is done, so that fp_save_mech is chosen.
4714 void
4715 xsave_setup_msr(cpu_t *cpu)
4717 ASSERT(fp_save_mech == FP_XSAVE);
4718 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4720 /* Enable OSXSAVE in CR4. */
4721 setcr4(getcr4() | CR4_OSXSAVE);
4723 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4724 * correct value.
4726 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4727 setup_xfem();
4731 * Starting with the Westmere processor the local
4732 * APIC timer will continue running in all C-states,
4733 * including the deepest C-states.
4736 cpuid_arat_supported(void)
4738 struct cpuid_info *cpi;
4739 struct cpuid_regs regs;
4741 ASSERT(cpuid_checkpass(CPU, 1));
4742 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4744 cpi = CPU->cpu_m.mcpu_cpi;
4746 switch (cpi->cpi_vendor) {
4747 case X86_VENDOR_Intel:
4749 * Always-running Local APIC Timer is
4750 * indicated by CPUID.6.EAX[2].
4752 if (cpi->cpi_maxeax >= 6) {
4753 regs.cp_eax = 6;
4754 (void) cpuid_insn(NULL, &regs);
4755 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4756 } else {
4757 return (0);
4759 default:
4760 return (0);
4765 * Check support for Intel ENERGY_PERF_BIAS feature
4768 cpuid_iepb_supported(struct cpu *cp)
4770 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4771 struct cpuid_regs regs;
4773 ASSERT(cpuid_checkpass(cp, 1));
4775 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4776 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4777 return (0);
4781 * Intel ENERGY_PERF_BIAS MSR is indicated by
4782 * capability bit CPUID.6.ECX.3
4784 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4785 return (0);
4787 regs.cp_eax = 0x6;
4788 (void) cpuid_insn(NULL, &regs);
4789 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4793 * Check support for TSC deadline timer
4795 * TSC deadline timer provides a superior software programming
4796 * model over local APIC timer that eliminates "time drifts".
4797 * Instead of specifying a relative time, software specifies an
4798 * absolute time as the target at which the processor should
4799 * generate a timer event.
4802 cpuid_deadline_tsc_supported(void)
4804 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4805 struct cpuid_regs regs;
4807 ASSERT(cpuid_checkpass(CPU, 1));
4808 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4810 switch (cpi->cpi_vendor) {
4811 case X86_VENDOR_Intel:
4812 if (cpi->cpi_maxeax >= 1) {
4813 regs.cp_eax = 1;
4814 (void) cpuid_insn(NULL, &regs);
4815 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4816 } else {
4817 return (0);
4819 default:
4820 return (0);
4824 #if defined(__amd64) && !defined(__xpv)
4826 * Patch in versions of bcopy for high performance Intel Nhm processors
4827 * and later...
4829 void
4830 patch_memops(uint_t vendor)
4832 size_t cnt, i;
4833 caddr_t to, from;
4835 if ((vendor == X86_VENDOR_Intel) &&
4836 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4837 cnt = &bcopy_patch_end - &bcopy_patch_start;
4838 to = &bcopy_ck_size;
4839 from = &bcopy_patch_start;
4840 for (i = 0; i < cnt; i++) {
4841 *to++ = *from++;
4845 #endif /* __amd64 && !__xpv */
4848 * This function finds the number of bits to represent the number of cores per
4849 * chip and the number of strands per core for the Intel platforms.
4850 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4852 void
4853 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4855 struct cpuid_regs regs;
4856 struct cpuid_regs *cp = &regs;
4858 if (vendor != X86_VENDOR_Intel) {
4859 return;
4862 /* if the cpuid level is 0xB, extended topo is available. */
4863 cp->cp_eax = 0;
4864 if (__cpuid_insn(cp) >= 0xB) {
4866 cp->cp_eax = 0xB;
4867 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4868 (void) __cpuid_insn(cp);
4871 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4872 * indicates that the extended topology enumeration leaf is
4873 * available.
4875 if (cp->cp_ebx) {
4876 uint_t coreid_shift = 0;
4877 uint_t chipid_shift = 0;
4878 uint_t i;
4879 uint_t level;
4881 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4882 cp->cp_eax = 0xB;
4883 cp->cp_ecx = i;
4885 (void) __cpuid_insn(cp);
4886 level = CPI_CPU_LEVEL_TYPE(cp);
4888 if (level == 1) {
4890 * Thread level processor topology
4891 * Number of bits shift right APIC ID
4892 * to get the coreid.
4894 coreid_shift = BITX(cp->cp_eax, 4, 0);
4895 } else if (level == 2) {
4897 * Core level processor topology
4898 * Number of bits shift right APIC ID
4899 * to get the chipid.
4901 chipid_shift = BITX(cp->cp_eax, 4, 0);
4905 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4906 *strand_nbits = coreid_shift;
4907 *core_nbits = chipid_shift - coreid_shift;