4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
27 * "Generic AMD" model-specific support. If no more-specific support can
28 * be found, or such modules declines to initialize, then for AuthenticAMD
29 * cpus this module can have a crack at providing some AMD model-specific
30 * support that at least goes beyond common MCA architectural features
31 * if not down to the nitty-gritty level for a particular model. We
32 * are layered on top of a cpu module, likely cpu.generic, so there is no
33 * need for us to perform common architecturally-accessible functions.
36 #include <sys/types.h>
37 #include <sys/cmn_err.h>
38 #include <sys/modctl.h>
39 #include <sys/cpu_module.h>
40 #include <sys/mca_x86.h>
41 #include <sys/pci_cfgspace.h>
42 #include <sys/x86_archext.h>
43 #include <sys/mc_amd.h>
44 #include <sys/fm/protocol.h>
45 #include <sys/fm/cpu/GENAMD.h>
46 #include <sys/fm/smb/fmsmb.h>
47 #include <sys/fm/util.h>
48 #include <sys/nvpair.h>
49 #include <sys/controlregs.h>
51 #include <sys/sunddi.h>
52 #include <sys/sysmacros.h>
53 #include <sys/cpu_module_ms_impl.h>
57 extern int x86gentopo_legacy
; /* x86 generic topo support */
59 int authamd_ms_support_disable
= 0;
61 #define AUTHAMD_F_REVS_BCDE \
62 (X86_CHIPREV_AMD_F_REV_B | X86_CHIPREV_AMD_F_REV_C0 | \
63 X86_CHIPREV_AMD_F_REV_CG | X86_CHIPREV_AMD_F_REV_D | \
64 X86_CHIPREV_AMD_F_REV_E)
66 #define AUTHAMD_F_REVS_FG \
67 (X86_CHIPREV_AMD_F_REV_F | X86_CHIPREV_AMD_F_REV_G)
69 #define AUTHAMD_10_REVS_AB \
70 (X86_CHIPREV_AMD_10_REV_A | X86_CHIPREV_AMD_10_REV_B)
73 * Bitmasks of support for various features. Try to enable features
74 * via inclusion in one of these bitmasks and check that at the
75 * feature imlementation - that way new family support may often simply
76 * simply need to update these bitmasks.
80 * Models that include an on-chip NorthBridge.
82 #define AUTHAMD_NBONCHIP(rev) \
83 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
84 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
87 * Families/revisions for which we can recognise main memory ECC errors.
89 #define AUTHAMD_MEMECC_RECOGNISED(rev) \
90 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
91 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
94 * Families/revisions that have an Online Spare Control Register
96 #define AUTHAMD_HAS_ONLINESPARECTL(rev) \
97 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F) || \
98 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
101 * Families/revisions for which we will perform NB MCA Config changes
103 #define AUTHAMD_DO_NBMCACFG(rev) \
104 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
105 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
108 * Families/revisions that have chip cache scrubbers.
110 #define AUTHAMD_HAS_CHIPSCRUB(rev) \
111 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
112 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
115 * Families/revisions that have a NB misc register or registers -
116 * evaluates to 0 if no support, otherwise the number of MC4_MISCj.
118 #define AUTHAMD_NBMISC_NUM(rev) \
119 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F)? 1 : \
120 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A) ? 3 : 0))
123 * Families/revision for which we wish not to machine check for GART
124 * table walk errors - bit 10 of NB CTL.
126 #define AUTHAMD_NOGARTTBLWLK_MC(rev) \
127 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
128 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
131 * Families/revisions that are potentially L3 capable
133 #define AUTHAMD_L3CAPABLE(rev) \
134 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
137 * Families/revisions that support x8 ChipKill ECC
139 #define AUTHAMD_SUPPORTS_X8ECC(rev) \
140 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_D0))
143 * We recognise main memory ECC errors for AUTHAMD_MEMECC_RECOGNISED
146 * - being reported by the NB
147 * - being a compound bus/interconnect error (external to chip)
149 * - having II of MEM (but could still be a master/target abort)
150 * - having CECC or UECC set
152 * We do not check the extended error code (first nibble of the
153 * model-specific error code on AMD) since this has changed from
154 * family 0xf to family 0x10 (ext code 0 now reserved on family 0x10).
155 * Instead we use CECC/UECC to separate off the master/target
158 * We insist that the detector be the NorthBridge bank; although
159 * IC/DC can report some main memory errors, they do not capture
160 * an address at sufficient resolution to be useful and the NB will
161 * report most errors.
163 #define AUTHAMD_IS_MEMECCERR(bank, status) \
164 ((bank) == AMD_MCA_BANK_NB && \
165 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status)) && \
166 MCAX86_ERRCODE_LL(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_LL_LG && \
167 MCAX86_ERRCODE_II(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_II_MEM && \
168 ((status) & (AMD_BANK_STAT_CECC | AMD_BANK_STAT_UECC)))
170 static authamd_error_disp_t authamd_memce_disp
= {
171 FM_EREPORT_CPU_GENAMD
,
172 FM_EREPORT_CPU_GENAMD_MEM_CE
,
173 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_MEM_CE
176 static authamd_error_disp_t authamd_memue_disp
= {
177 FM_EREPORT_CPU_GENAMD
,
178 FM_EREPORT_CPU_GENAMD_MEM_UE
,
179 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_MEM_UE
182 static authamd_error_disp_t authamd_ckmemce_disp
= {
183 FM_EREPORT_CPU_GENAMD
,
184 FM_EREPORT_CPU_GENAMD_CKMEM_CE
,
185 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_CKMEM_CE
188 static authamd_error_disp_t authamd_ckmemue_disp
= {
189 FM_EREPORT_CPU_GENAMD
,
190 FM_EREPORT_CPU_GENAMD_CKMEM_UE
,
191 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_CKMEM_UE
195 * We recognise GART walk errors as:
197 * - being reported by the NB
198 * - being a compound TLB error
199 * - having LL of LG and TT of GEN
201 * - possibly having PCC set (if source CPU)
203 #define AUTHAMD_IS_GARTERR(bank, status) \
204 ((bank) == AMD_MCA_BANK_NB && \
205 MCAX86_ERRCODE_ISTLB(MCAX86_ERRCODE(status)) && \
206 MCAX86_ERRCODE_LL(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_LL_LG && \
207 MCAX86_ERRCODE_TT(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_TT_GEN && \
208 (status) & MSR_MC_STATUS_UC)
210 static authamd_error_disp_t authamd_gart_disp
= {
211 FM_EREPORT_CPU_GENAMD
, /* use generic subclass */
212 FM_EREPORT_CPU_GENADM_GARTTBLWLK
, /* use generic leafclass */
213 0 /* no additional payload */
217 static struct authamd_nodeshared
*authamd_shared
[AUTHAMD_MAX_NODES
];
220 authamd_chip_once(authamd_data_t
*authamd
, enum authamd_cfgonce_bitnum what
)
222 return (atomic_set_long_excl(&authamd
->amd_shared
->ans_cfgonce
,
223 what
) == 0 ? B_TRUE
: B_FALSE
);
227 authamd_pcicfg_write(uint_t procnodeid
, uint_t func
, uint_t reg
, uint32_t val
)
229 ASSERT(procnodeid
+ 24 <= 31);
230 ASSERT((func
& 7) == func
);
231 ASSERT((reg
& 3) == 0 && reg
< 4096);
233 cmi_pci_putl(0, procnodeid
+ 24, func
, reg
, 0, val
);
237 authamd_pcicfg_read(uint_t procnodeid
, uint_t func
, uint_t reg
)
239 ASSERT(procnodeid
+ 24 <= 31);
240 ASSERT((func
& 7) == func
);
241 ASSERT((reg
& 3) == 0 && reg
< 4096);
243 return (cmi_pci_getl(0, procnodeid
+ 24, func
, reg
, 0, 0));
247 authamd_bankstatus_prewrite(cmi_hdl_t hdl
, authamd_data_t
*authamd
)
251 if (cmi_hdl_rdmsr(hdl
, MSR_AMD_HWCR
, &hwcr
) != CMI_SUCCESS
)
254 authamd
->amd_hwcr
= hwcr
;
256 if (!(hwcr
& AMD_HWCR_MCI_STATUS_WREN
)) {
257 hwcr
|= AMD_HWCR_MCI_STATUS_WREN
;
258 (void) cmi_hdl_wrmsr(hdl
, MSR_AMD_HWCR
, hwcr
);
263 authamd_bankstatus_postwrite(cmi_hdl_t hdl
, authamd_data_t
*authamd
)
265 uint64_t hwcr
= authamd
->amd_hwcr
;
267 if (!(hwcr
& AMD_HWCR_MCI_STATUS_WREN
)) {
268 hwcr
&= ~AMD_HWCR_MCI_STATUS_WREN
;
269 (void) cmi_hdl_wrmsr(hdl
, MSR_AMD_HWCR
, hwcr
);
274 * Read EccCnt repeatedly for all possible channel/chip-select combos:
276 * - read sparectl register
277 * - if EccErrCntWrEn is set, clear that bit in the just-read value
278 * and write it back to sparectl; this *may* clobber the EccCnt
279 * for the channel/chip-select combination currently selected, so
280 * we leave this bit clear if we had to clear it
281 * - cycle through all channel/chip-select combinations writing each
282 * combination to sparectl before reading the register back for
283 * EccCnt for that combination; since EccErrCntWrEn is clear
284 * the writes to select what count to read will not themselves
288 authamd_read_ecccnt(authamd_data_t
*authamd
, struct authamd_logout
*msl
)
290 union mcreg_sparectl sparectl
;
291 uint_t procnodeid
= authamd
->amd_shared
->ans_procnodeid
;
292 uint_t family
= authamd
->amd_shared
->ans_family
;
293 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
297 * Check for feature support; this macro will test down to the
298 * family revision number, whereafter we'll switch on family
299 * assuming that future revisions will use the same register
302 if (!AUTHAMD_HAS_ONLINESPARECTL(rev
)) {
303 bzero(&msl
->aal_eccerrcnt
, sizeof (msl
->aal_eccerrcnt
));
307 MCREG_VAL32(&sparectl
) =
308 authamd_pcicfg_read(procnodeid
, MC_FUNC_MISCCTL
,
309 MC_CTL_REG_SPARECTL
);
312 case AUTHAMD_FAMILY_F
:
313 MCREG_FIELD_F_revFG(&sparectl
, EccErrCntWrEn
) = 0;
316 case AUTHAMD_FAMILY_10
:
317 MCREG_FIELD_10_revAB(&sparectl
, EccErrCntWrEn
) = 0;
321 for (chan
= 0; chan
< AUTHAMD_DRAM_NCHANNEL
; chan
++) {
323 case AUTHAMD_FAMILY_F
:
324 MCREG_FIELD_F_revFG(&sparectl
, EccErrCntDramChan
) =
328 case AUTHAMD_FAMILY_10
:
329 MCREG_FIELD_10_revAB(&sparectl
, EccErrCntDramChan
) =
334 for (cs
= 0; cs
< AUTHAMD_DRAM_NCS
; cs
++) {
336 case AUTHAMD_FAMILY_F
:
337 MCREG_FIELD_F_revFG(&sparectl
,
338 EccErrCntDramCs
) = cs
;
341 case AUTHAMD_FAMILY_10
:
342 MCREG_FIELD_10_revAB(&sparectl
,
343 EccErrCntDramCs
) = cs
;
347 authamd_pcicfg_write(procnodeid
, MC_FUNC_MISCCTL
,
348 MC_CTL_REG_SPARECTL
, MCREG_VAL32(&sparectl
));
350 MCREG_VAL32(&sparectl
) = authamd_pcicfg_read(procnodeid
,
351 MC_FUNC_MISCCTL
, MC_CTL_REG_SPARECTL
);
354 case AUTHAMD_FAMILY_F
:
355 msl
->aal_eccerrcnt
[chan
][cs
] =
356 MCREG_FIELD_F_revFG(&sparectl
, EccErrCnt
);
358 case AUTHAMD_FAMILY_10
:
359 msl
->aal_eccerrcnt
[chan
][cs
] =
360 MCREG_FIELD_10_revAB(&sparectl
, EccErrCnt
);
370 * Clear EccCnt for all possible channel/chip-select combos:
372 * - set EccErrCntWrEn in sparectl, if necessary
373 * - write 0 to EccCnt for all channel/chip-select combinations
374 * - clear EccErrCntWrEn
376 * If requested also disable the interrupts taken on counter overflow
380 authamd_clear_ecccnt(authamd_data_t
*authamd
, boolean_t clrint
)
382 union mcreg_sparectl sparectl
;
383 uint_t procnodeid
= authamd
->amd_shared
->ans_procnodeid
;
384 uint_t family
= authamd
->amd_shared
->ans_family
;
385 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
388 if (!AUTHAMD_HAS_ONLINESPARECTL(rev
))
391 MCREG_VAL32(&sparectl
) =
392 authamd_pcicfg_read(procnodeid
, MC_FUNC_MISCCTL
,
393 MC_CTL_REG_SPARECTL
);
396 case AUTHAMD_FAMILY_F
:
397 MCREG_FIELD_F_revFG(&sparectl
, EccErrCntWrEn
) = 1;
399 MCREG_FIELD_F_revFG(&sparectl
, EccErrInt
) = 0;
400 MCREG_FIELD_F_revFG(&sparectl
, SwapDoneInt
) = 0;
404 case AUTHAMD_FAMILY_10
:
405 MCREG_FIELD_10_revAB(&sparectl
, EccErrCntWrEn
) = 1;
407 MCREG_FIELD_10_revAB(&sparectl
, EccErrInt
) = 0;
408 MCREG_FIELD_10_revAB(&sparectl
, SwapDoneInt
) = 0;
413 authamd_pcicfg_write(procnodeid
, MC_FUNC_MISCCTL
,
414 MC_CTL_REG_SPARECTL
, MCREG_VAL32(&sparectl
));
416 for (chan
= 0; chan
< AUTHAMD_DRAM_NCHANNEL
; chan
++) {
418 case AUTHAMD_FAMILY_F
:
419 MCREG_FIELD_F_revFG(&sparectl
, EccErrCntDramChan
) =
423 case AUTHAMD_FAMILY_10
:
424 MCREG_FIELD_10_revAB(&sparectl
, EccErrCntDramChan
) =
429 for (cs
= 0; cs
< AUTHAMD_DRAM_NCS
; cs
++) {
431 case AUTHAMD_FAMILY_F
:
432 MCREG_FIELD_F_revFG(&sparectl
,
433 EccErrCntDramCs
) = cs
;
434 MCREG_FIELD_F_revFG(&sparectl
,
438 case AUTHAMD_FAMILY_10
:
439 MCREG_FIELD_10_revAB(&sparectl
,
440 EccErrCntDramCs
) = cs
;
441 MCREG_FIELD_10_revAB(&sparectl
,
446 authamd_pcicfg_write(procnodeid
, MC_FUNC_MISCCTL
,
447 MC_CTL_REG_SPARECTL
, MCREG_VAL32(&sparectl
));
459 authamd_supported(cmi_hdl_t hdl
)
461 uint_t family
= cmi_hdl_family(hdl
);
464 case AUTHAMD_FAMILY_6
:
465 case AUTHAMD_FAMILY_F
:
466 case AUTHAMD_FAMILY_10
:
474 * cms_init entry point.
476 * This module provides broad model-specific support for AMD families
477 * 0x6, 0xf and 0x10. Future families will have to be evaluated once their
478 * documentation is available.
481 authamd_init(cmi_hdl_t hdl
, void **datap
)
483 uint_t chipid
= cmi_hdl_chipid(hdl
);
484 uint_t procnodeid
= cmi_hdl_procnodeid(hdl
);
485 struct authamd_nodeshared
*sp
, *osp
;
486 uint_t family
= cmi_hdl_family(hdl
);
487 uint32_t rev
= cmi_hdl_chiprev(hdl
);
488 authamd_data_t
*authamd
;
491 if (authamd_ms_support_disable
||
492 !authamd_supported(hdl
))
495 if (!is_x86_feature(x86_featureset
, X86FSET_MCA
))
498 if (cmi_hdl_rdmsr(hdl
, IA32_MSR_MCG_CAP
, &cap
) != CMI_SUCCESS
)
501 if (!(cap
& MCG_CAP_CTL_P
))
504 authamd
= *datap
= kmem_zalloc(sizeof (authamd_data_t
), KM_SLEEP
);
505 cmi_hdl_hold(hdl
); /* release in fini */
506 authamd
->amd_hdl
= hdl
;
508 if ((sp
= authamd_shared
[procnodeid
]) == NULL
) {
509 sp
= kmem_zalloc(sizeof (struct authamd_nodeshared
), KM_SLEEP
);
510 sp
->ans_chipid
= chipid
;
511 sp
->ans_procnodeid
= procnodeid
;
512 sp
->ans_family
= family
;
516 osp
= atomic_cas_ptr(&authamd_shared
[procnodeid
], NULL
, sp
);
518 kmem_free(sp
, sizeof (struct authamd_nodeshared
));
522 authamd
->amd_shared
= sp
;
528 * cms_logout_size entry point.
532 authamd_logout_size(cmi_hdl_t hdl
)
534 return (sizeof (struct authamd_logout
));
538 * cms_mcgctl_val entry point
540 * Instead of setting all bits to 1 we can set just those for the
541 * error detector banks known to exist.
545 authamd_mcgctl_val(cmi_hdl_t hdl
, int nbanks
, uint64_t proposed
)
547 return (nbanks
< 64 ? (1ULL << nbanks
) - 1 : proposed
);
551 * cms_bankctl_skipinit entry point
553 * On K6 we do not initialize MC0_CTL since, reportedly, this bank (for DC)
554 * may produce spurious machine checks.
556 * Only allow a single core to setup the NorthBridge MCi_CTL register.
560 authamd_bankctl_skipinit(cmi_hdl_t hdl
, int bank
)
562 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
563 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
565 if (authamd
->amd_shared
->ans_family
== AUTHAMD_FAMILY_6
)
566 return (bank
== 0 ? B_TRUE
: B_FALSE
);
568 if (AUTHAMD_NBONCHIP(rev
) && bank
== AMD_MCA_BANK_NB
) {
569 return (authamd_chip_once(authamd
, AUTHAMD_CFGONCE_NBMCA
) ==
570 B_TRUE
? B_FALSE
: B_TRUE
);
577 * cms_bankctl_val entry point
580 authamd_bankctl_val(cmi_hdl_t hdl
, int bank
, uint64_t proposed
)
582 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
583 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
584 uint64_t val
= proposed
;
587 * The Intel MCA says we can write all 1's to enable #MC for
588 * all errors, and AMD docs say much the same. But, depending
589 * perhaps on other config registers, taking machine checks
590 * for some errors such as GART TLB errors and master/target
591 * aborts may be bad - they set UC and sometime also PCC, but
592 * we should not always panic for these error types.
594 * Our cms_error_action entry point can suppress such panics,
595 * however we can also use the cms_bankctl_val entry point to
596 * veto enabling of some of the known villains in the first place.
598 if (bank
== AMD_MCA_BANK_NB
&& AUTHAMD_NOGARTTBLWLK_MC(rev
))
599 val
&= ~AMD_NB_EN_GARTTBLWK
;
605 * Bits to add to NB MCA config (after watchdog config).
607 uint32_t authamd_nb_mcacfg_add
= AMD_NB_CFG_ADD_CMN
;
610 * Bits to remove from NB MCA config (after watchdog config)
612 uint32_t authamd_nb_mcacfg_remove
= AMD_NB_CFG_REMOVE_CMN
;
615 * NB Watchdog policy, and rate we use if enabling.
618 AUTHAMD_NB_WDOG_LEAVEALONE
,
619 AUTHAMD_NB_WDOG_DISABLE
,
620 AUTHAMD_NB_WDOG_ENABLE_IF_DISABLED
,
621 AUTHAMD_NB_WDOG_ENABLE_FORCE_RATE
622 } authamd_nb_watchdog_policy
= AUTHAMD_NB_WDOG_ENABLE_IF_DISABLED
;
624 uint32_t authamd_nb_mcacfg_wdog
= AMD_NB_CFG_WDOGTMRCNTSEL_4095
|
625 AMD_NB_CFG_WDOGTMRBASESEL_1MS
;
628 * Per-core cache scrubbing policy and rates.
631 AUTHAMD_SCRUB_BIOSDEFAULT
, /* leave as BIOS configured */
632 AUTHAMD_SCRUB_FIXED
, /* assign our chosen rate */
633 AUTHAMD_SCRUB_MAX
/* use higher of ours and BIOS rate */
634 } authamd_scrub_policy
= AUTHAMD_SCRUB_MAX
;
636 uint32_t authamd_scrub_rate_dcache
= 0xf; /* 64K per 0.67 seconds */
637 uint32_t authamd_scrub_rate_l2cache
= 0xe; /* 1MB per 5.3 seconds */
638 uint32_t authamd_scrub_rate_l3cache
= 0xd; /* 1MB per 2.7 seconds */
641 authamd_scrubrate(uint32_t osrate
, uint32_t biosrate
, const char *varnm
)
645 if (osrate
> AMD_NB_SCRUBCTL_RATE_MAX
) {
646 cmn_err(CE_WARN
, "%s is too large, resetting to 0x%x\n",
647 varnm
, AMD_NB_SCRUBCTL_RATE_MAX
);
648 osrate
= AMD_NB_SCRUBCTL_RATE_MAX
;
651 switch (authamd_scrub_policy
) {
652 case AUTHAMD_SCRUB_FIXED
:
657 cmn_err(CE_WARN
, "Unknown authamd_scrub_policy %d - "
658 "using default policy of AUTHAMD_SCRUB_MAX",
659 authamd_scrub_policy
);
662 case AUTHAMD_SCRUB_MAX
:
663 if (osrate
!= 0 && biosrate
!= 0)
664 rate
= MIN(osrate
, biosrate
); /* small is fast */
666 rate
= osrate
? osrate
: biosrate
;
673 * cms_mca_init entry point.
677 authamd_mca_init(cmi_hdl_t hdl
, int nbanks
)
679 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
680 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
681 uint_t procnodeid
= authamd
->amd_shared
->ans_procnodeid
;
684 * On chips with a NB online spare control register take control
685 * and clear ECC counts.
687 if (AUTHAMD_HAS_ONLINESPARECTL(rev
) &&
688 authamd_chip_once(authamd
, AUTHAMD_CFGONCE_ONLNSPRCFG
)) {
689 authamd_clear_ecccnt(authamd
, B_TRUE
);
693 * And since we are claiming the telemetry stop the BIOS receiving
694 * an SMI on NB threshold overflow.
696 if (AUTHAMD_NBMISC_NUM(rev
) &&
697 authamd_chip_once(authamd
, AUTHAMD_CFGONCE_NBTHRESH
)) {
698 union mcmsr_nbmisc nbm
;
701 authamd_bankstatus_prewrite(hdl
, authamd
);
703 for (i
= 0; i
< AUTHAMD_NBMISC_NUM(rev
); i
++) {
704 if (cmi_hdl_rdmsr(hdl
, MC_MSR_NB_MISC(i
),
705 (uint64_t *)&nbm
) != CMI_SUCCESS
)
708 if (X86_CHIPREV_ATLEAST(rev
, X86_CHIPREV_AMD_F_REV_F
) &&
709 MCMSR_FIELD_F_revFG(&nbm
, mcmisc_Valid
) &&
710 MCMSR_FIELD_F_revFG(&nbm
, mcmisc_CntP
)) {
711 MCMSR_FIELD_F_revFG(&nbm
, mcmisc_IntType
) = 0;
712 } else if (X86_CHIPREV_ATLEAST(rev
,
713 X86_CHIPREV_AMD_10_REV_A
) &&
714 MCMSR_FIELD_10_revAB(&nbm
, mcmisc_Valid
) &&
715 MCMSR_FIELD_10_revAB(&nbm
, mcmisc_CntP
)) {
716 MCMSR_FIELD_10_revAB(&nbm
, mcmisc_IntType
) = 0;
719 (void) cmi_hdl_wrmsr(hdl
, MC_MSR_NB_MISC(i
),
723 authamd_bankstatus_postwrite(hdl
, authamd
);
727 * NB MCA Configuration Register.
729 if (AUTHAMD_DO_NBMCACFG(rev
) &&
730 authamd_chip_once(authamd
, AUTHAMD_CFGONCE_NBMCACFG
)) {
731 uint32_t val
= authamd_pcicfg_read(procnodeid
, MC_FUNC_MISCCTL
,
734 switch (authamd_nb_watchdog_policy
) {
735 case AUTHAMD_NB_WDOG_LEAVEALONE
:
738 case AUTHAMD_NB_WDOG_DISABLE
:
739 val
&= ~(AMD_NB_CFG_WDOGTMRBASESEL_MASK
|
740 AMD_NB_CFG_WDOGTMRCNTSEL_MASK
);
741 val
|= AMD_NB_CFG_WDOGTMRDIS
;
745 cmn_err(CE_NOTE
, "authamd_nb_watchdog_policy=%d "
746 "unrecognised, using default policy",
747 authamd_nb_watchdog_policy
);
750 case AUTHAMD_NB_WDOG_ENABLE_IF_DISABLED
:
751 if (!(val
& AMD_NB_CFG_WDOGTMRDIS
))
752 break; /* if enabled leave rate intact */
755 case AUTHAMD_NB_WDOG_ENABLE_FORCE_RATE
:
756 val
&= ~(AMD_NB_CFG_WDOGTMRBASESEL_MASK
|
757 AMD_NB_CFG_WDOGTMRCNTSEL_MASK
|
758 AMD_NB_CFG_WDOGTMRDIS
);
759 val
|= authamd_nb_mcacfg_wdog
;
764 * Bit 0 of the NB MCA Config register is reserved on family
767 if (X86_CHIPREV_ATLEAST(rev
, X86_CHIPREV_AMD_10_REV_A
))
768 authamd_nb_mcacfg_add
&= ~AMD_NB_CFG_CPUECCERREN
;
770 val
&= ~authamd_nb_mcacfg_remove
;
771 val
|= authamd_nb_mcacfg_add
;
773 authamd_pcicfg_write(procnodeid
, MC_FUNC_MISCCTL
,
774 MC_CTL_REG_NBCFG
, val
);
778 * Cache scrubbing. We can't enable DRAM scrubbing since
779 * we don't know the DRAM base for this node.
781 if (AUTHAMD_HAS_CHIPSCRUB(rev
) &&
782 authamd_scrub_policy
!= AUTHAMD_SCRUB_BIOSDEFAULT
&&
783 authamd_chip_once(authamd
, AUTHAMD_CFGONCE_CACHESCRUB
)) {
784 uint32_t val
= authamd_pcicfg_read(procnodeid
, MC_FUNC_MISCCTL
,
785 MC_CTL_REG_SCRUBCTL
);
788 if (AUTHAMD_L3CAPABLE(rev
)) {
789 l3cap
= (authamd_pcicfg_read(procnodeid
,
790 MC_FUNC_MISCCTL
, MC_CTL_REG_NBCAP
) &
791 MC_NBCAP_L3CAPABLE
) != 0;
794 authamd_scrub_rate_dcache
=
795 authamd_scrubrate(authamd_scrub_rate_dcache
,
796 (val
& AMD_NB_SCRUBCTL_DC_MASK
) >> AMD_NB_SCRUBCTL_DC_SHIFT
,
797 "authamd_scrub_rate_dcache");
799 authamd_scrub_rate_l2cache
=
800 authamd_scrubrate(authamd_scrub_rate_l2cache
,
801 (val
& AMD_NB_SCRUBCTL_L2_MASK
) >> AMD_NB_SCRUBCTL_L2_SHIFT
,
802 "authamd_scrub_rate_l2cache");
804 authamd_scrub_rate_l3cache
= l3cap
?
805 authamd_scrubrate(authamd_scrub_rate_l3cache
,
806 (val
& AMD_NB_SCRUBCTL_L3_MASK
) >> AMD_NB_SCRUBCTL_L3_SHIFT
,
807 "authamd_scrub_rate_l3cache") : 0;
809 val
= AMD_NB_MKSCRUBCTL(authamd_scrub_rate_l3cache
,
810 authamd_scrub_rate_dcache
, authamd_scrub_rate_l2cache
,
811 val
& AMD_NB_SCRUBCTL_DRAM_MASK
);
813 authamd_pcicfg_write(procnodeid
, MC_FUNC_MISCCTL
,
814 MC_CTL_REG_SCRUBCTL
, val
);
818 * ECC symbol size. Defaults to 4.
819 * Set to 8 on systems that support x8 ECC and have it enabled.
821 if (authamd_chip_once(authamd
, AUTHAMD_CFGONCE_ECCSYMSZ
)) {
822 authamd
->amd_shared
->ans_eccsymsz
= "C4";
823 if (AUTHAMD_SUPPORTS_X8ECC(rev
) &&
824 (authamd_pcicfg_read(procnodeid
, MC_FUNC_MISCCTL
,
825 MC_CTL_REG_EXTNBCFG
) & MC_EXTNBCFG_ECCSYMSZ
))
826 authamd
->amd_shared
->ans_eccsymsz
= "C8";
831 * cms_poll_ownermask entry point.
834 authamd_poll_ownermask(cmi_hdl_t hdl
, hrtime_t pintvl
)
836 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
837 struct authamd_nodeshared
*ansp
= authamd
->amd_shared
;
838 hrtime_t now
= gethrtime_waitfree();
839 hrtime_t last
= ansp
->ans_poll_timestamp
;
842 if (now
- last
> 2 * pintvl
|| last
== 0) {
843 ansp
->ans_pollowner
= hdl
;
845 } else if (ansp
->ans_pollowner
== hdl
) {
850 ansp
->ans_poll_timestamp
= now
;
852 return (dopoll
? -1ULL : ~(1 << AMD_MCA_BANK_NB
));
857 * cms_bank_logout entry point.
861 authamd_bank_logout(cmi_hdl_t hdl
, int bank
, uint64_t status
,
862 uint64_t addr
, uint64_t misc
, void *mslogout
)
864 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
865 struct authamd_logout
*msl
= mslogout
;
866 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
872 * For main memory ECC errors on revisions with an Online Spare
873 * Control Register grab the ECC counts by channel and chip-select
874 * and reset them to 0.
876 if (AUTHAMD_MEMECC_RECOGNISED(rev
) &&
877 AUTHAMD_IS_MEMECCERR(bank
, status
) &&
878 AUTHAMD_HAS_ONLINESPARECTL(rev
)) {
879 if (authamd_read_ecccnt(authamd
, msl
))
880 authamd_clear_ecccnt(authamd
, B_FALSE
);
885 * cms_error_action entry point
888 int authamd_forgive_uc
= 0; /* For test/debug only */
889 int authamd_forgive_pcc
= 0; /* For test/debug only */
890 int authamd_fake_poison
= 0; /* For test/debug only */
894 authamd_error_action(cmi_hdl_t hdl
, int ismc
, int bank
,
895 uint64_t status
, uint64_t addr
, uint64_t misc
, void *mslogout
)
897 authamd_error_disp_t
*disp
;
900 if (authamd_forgive_uc
)
901 rv
|= CMS_ERRSCOPE_CLEARED_UC
;
903 if (authamd_forgive_pcc
)
904 rv
|= CMS_ERRSCOPE_CURCONTEXT_OK
;
906 if (authamd_fake_poison
&& status
& MSR_MC_STATUS_UC
)
907 rv
|= CMS_ERRSCOPE_POISONED
;
912 disp
= authamd_disp_match(hdl
, ismc
, bank
, status
, addr
, misc
,
915 if (disp
== &authamd_gart_disp
) {
917 * GART walk errors set UC and possibly PCC (if source CPU)
918 * but should not be regarded as terminal.
920 return (CMS_ERRSCOPE_IGNORE_ERR
);
924 * May also want to consider master abort and target abort. These
925 * also set UC and PCC (if src CPU) but the requester gets -1
926 * and I believe the IO stuff in Solaris will handle that.
933 * cms_disp_match entry point
937 authamd_disp_match(cmi_hdl_t hdl
, int ismc
, int bank
, uint64_t status
,
938 uint64_t addr
, uint64_t misc
, void *mslogout
)
940 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
941 /* uint16_t errcode = MCAX86_ERRCODE(status); */
942 uint16_t exterrcode
= AMD_EXT_ERRCODE(status
);
943 uint32_t rev
= authamd
->amd_shared
->ans_rev
;
946 * Recognise main memory ECC errors
948 if (AUTHAMD_MEMECC_RECOGNISED(rev
) &&
949 AUTHAMD_IS_MEMECCERR(bank
, status
)) {
950 if (status
& AMD_BANK_STAT_CECC
) {
951 return (exterrcode
== 0 ? &authamd_memce_disp
:
952 &authamd_ckmemce_disp
);
953 } else if (status
& AMD_BANK_STAT_UECC
) {
954 return (exterrcode
== 0 ? &authamd_memue_disp
:
955 &authamd_ckmemue_disp
);
960 * Recognise GART walk errors
962 if (AUTHAMD_NOGARTTBLWLK_MC(rev
) && AUTHAMD_IS_GARTERR(bank
, status
))
963 return (&authamd_gart_disp
);
969 * cms_ereport_class entry point
973 authamd_ereport_class(cmi_hdl_t hdl
, cms_cookie_t mscookie
,
974 const char **cpuclsp
, const char **leafclsp
)
976 const authamd_error_disp_t
*aed
= mscookie
;
981 if (aed
->aad_subclass
!= NULL
)
982 *cpuclsp
= aed
->aad_subclass
;
983 if (aed
->aad_leafclass
!= NULL
)
984 *leafclsp
= aed
->aad_leafclass
;
989 authamd_ereport_add_resource(cmi_hdl_t hdl
, authamd_data_t
*authamd
,
990 nvlist_t
*ereport
, nv_alloc_t
*nva
, void *mslogout
)
992 nvlist_t
*elems
[AUTHAMD_DRAM_NCHANNEL
* AUTHAMD_DRAM_NCS
];
993 uint8_t counts
[AUTHAMD_DRAM_NCHANNEL
* AUTHAMD_DRAM_NCS
];
994 authamd_logout_t
*msl
;
998 nvlist_t
*board_list
= NULL
;
1000 if ((msl
= mslogout
) == NULL
)
1003 /* Assume all processors have the same number of nodes */
1004 mc
= authamd
->amd_shared
->ans_procnodeid
%
1005 cpuid_get_procnodes_per_pkg(CPU
);
1007 for (chan
= 0; chan
< AUTHAMD_DRAM_NCHANNEL
; chan
++) {
1008 for (cs
= 0; cs
< AUTHAMD_DRAM_NCS
; cs
++) {
1009 if (msl
->aal_eccerrcnt
[chan
][cs
] == 0)
1012 if ((nvl
= fm_nvlist_create(nva
)) == NULL
)
1015 elems
[nelems
] = nvl
;
1016 counts
[nelems
++] = msl
->aal_eccerrcnt
[chan
][cs
];
1018 if (!x86gentopo_legacy
) {
1019 board_list
= cmi_hdl_smb_bboard(hdl
);
1020 if (board_list
== NULL
)
1022 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
,
1023 NULL
, NULL
, board_list
, 4,
1024 "chip", cmi_hdl_smb_chipid(hdl
),
1025 "memory-controller", 0,
1026 "dram-channel", chan
,
1029 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
,
1032 "chip", authamd
->amd_shared
->ans_chipid
,
1033 "memory-controller", mc
,
1034 "dram-channel", chan
,
1043 fm_payload_set(ereport
, FM_EREPORT_GENAMD_PAYLOAD_NAME_RESOURCE
,
1044 DATA_TYPE_NVLIST_ARRAY
, nelems
, elems
,
1047 fm_payload_set(ereport
, FM_EREPORT_GENAMD_PAYLOAD_NAME_RESOURCECNT
,
1048 DATA_TYPE_UINT8_ARRAY
, nelems
, &counts
[0],
1051 for (i
= 0; i
< nelems
; i
++)
1052 fm_nvlist_destroy(elems
[i
], nva
? FM_NVA_RETAIN
: FM_NVA_FREE
);
1056 * cms_ereport_add_logout entry point
1060 authamd_ereport_add_logout(cmi_hdl_t hdl
, nvlist_t
*ereport
, nv_alloc_t
*nva
,
1061 int bank
, uint64_t status
, uint64_t addr
, uint64_t misc
,
1062 void *mslogout
, cms_cookie_t mscookie
)
1064 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
1065 const authamd_error_disp_t
*aed
= mscookie
;
1071 members
= aed
->aad_ereport_members
;
1073 if (members
& FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYND
) {
1074 fm_payload_set(ereport
, FM_EREPORT_GENAMD_PAYLOAD_NAME_SYND
,
1075 DATA_TYPE_UINT16
, (uint16_t)AMD_BANK_SYND(status
),
1078 if (members
& FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYNDTYPE
) {
1079 fm_payload_set(ereport
,
1080 FM_EREPORT_GENAMD_PAYLOAD_NAME_SYNDTYPE
,
1081 DATA_TYPE_STRING
, "E",
1086 if (members
& FM_EREPORT_GENAMD_PAYLOAD_FLAG_CKSYND
) {
1087 fm_payload_set(ereport
, FM_EREPORT_GENAMD_PAYLOAD_NAME_CKSYND
,
1088 DATA_TYPE_UINT16
, (uint16_t)AMD_NB_STAT_CKSYND(status
),
1091 if (members
& FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYNDTYPE
) {
1092 fm_payload_set(ereport
,
1093 FM_EREPORT_GENAMD_PAYLOAD_NAME_SYNDTYPE
,
1094 DATA_TYPE_STRING
, authamd
->amd_shared
->ans_eccsymsz
,
1099 if (members
& FM_EREPORT_GENAMD_PAYLOAD_FLAG_RESOURCE
&&
1100 status
& MSR_MC_STATUS_ADDRV
) {
1101 authamd_ereport_add_resource(hdl
, authamd
, ereport
, nva
,
1107 * cms_msrinject entry point
1110 authamd_msrinject(cmi_hdl_t hdl
, uint_t msr
, uint64_t val
)
1112 authamd_data_t
*authamd
= cms_hdl_getcmsdata(hdl
);
1113 cms_errno_t rv
= CMSERR_BADMSRWRITE
;
1115 authamd_bankstatus_prewrite(hdl
, authamd
);
1116 if (cmi_hdl_wrmsr(hdl
, msr
, val
) == CMI_SUCCESS
)
1118 authamd_bankstatus_postwrite(hdl
, authamd
);
1123 cms_api_ver_t _cms_api_version
= CMS_API_VERSION_2
;
1125 const cms_ops_t _cms_ops
= {
1126 authamd_init
, /* cms_init */
1127 NULL
, /* cms_post_startup */
1128 NULL
, /* cms_post_mpstartup */
1129 authamd_logout_size
, /* cms_logout_size */
1130 authamd_mcgctl_val
, /* cms_mcgctl_val */
1131 authamd_bankctl_skipinit
, /* cms_bankctl_skipinit */
1132 authamd_bankctl_val
, /* cms_bankctl_val */
1133 NULL
, /* cms_bankstatus_skipinit */
1134 NULL
, /* cms_bankstatus_val */
1135 authamd_mca_init
, /* cms_mca_init */
1136 authamd_poll_ownermask
, /* cms_poll_ownermask */
1137 authamd_bank_logout
, /* cms_bank_logout */
1138 authamd_error_action
, /* cms_error_action */
1139 authamd_disp_match
, /* cms_disp_match */
1140 authamd_ereport_class
, /* cms_ereport_class */
1141 NULL
, /* cms_ereport_detector */
1142 NULL
, /* cms_ereport_includestack */
1143 authamd_ereport_add_logout
, /* cms_ereport_add_logout */
1144 authamd_msrinject
, /* cms_msrinject */
1145 NULL
, /* cms_fini */
1148 static struct modlcpu modlcpu
= {
1150 "Generic AMD model-specific MCA"
1153 static struct modlinkage modlinkage
= {
1162 return (mod_install(&modlinkage
));
1166 _info(struct modinfo
*modinfop
)
1168 return (mod_info(&modlinkage
, modinfop
));
1174 return (mod_remove(&modlinkage
));