Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / i86pc / cpu / authenticamd / authamd_main.c
blobb073734aaa97434fa664b4fc87c29116bb80b828
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
27 * "Generic AMD" model-specific support. If no more-specific support can
28 * be found, or such modules declines to initialize, then for AuthenticAMD
29 * cpus this module can have a crack at providing some AMD model-specific
30 * support that at least goes beyond common MCA architectural features
31 * if not down to the nitty-gritty level for a particular model. We
32 * are layered on top of a cpu module, likely cpu.generic, so there is no
33 * need for us to perform common architecturally-accessible functions.
36 #include <sys/types.h>
37 #include <sys/cmn_err.h>
38 #include <sys/modctl.h>
39 #include <sys/cpu_module.h>
40 #include <sys/mca_x86.h>
41 #include <sys/pci_cfgspace.h>
42 #include <sys/x86_archext.h>
43 #include <sys/mc_amd.h>
44 #include <sys/fm/protocol.h>
45 #include <sys/fm/cpu/GENAMD.h>
46 #include <sys/fm/smb/fmsmb.h>
47 #include <sys/fm/util.h>
48 #include <sys/nvpair.h>
49 #include <sys/controlregs.h>
50 #include <sys/pghw.h>
51 #include <sys/sunddi.h>
52 #include <sys/sysmacros.h>
53 #include <sys/cpu_module_ms_impl.h>
55 #include "authamd.h"
57 extern int x86gentopo_legacy; /* x86 generic topo support */
59 int authamd_ms_support_disable = 0;
61 #define AUTHAMD_F_REVS_BCDE \
62 (X86_CHIPREV_AMD_F_REV_B | X86_CHIPREV_AMD_F_REV_C0 | \
63 X86_CHIPREV_AMD_F_REV_CG | X86_CHIPREV_AMD_F_REV_D | \
64 X86_CHIPREV_AMD_F_REV_E)
66 #define AUTHAMD_F_REVS_FG \
67 (X86_CHIPREV_AMD_F_REV_F | X86_CHIPREV_AMD_F_REV_G)
69 #define AUTHAMD_10_REVS_AB \
70 (X86_CHIPREV_AMD_10_REV_A | X86_CHIPREV_AMD_10_REV_B)
73 * Bitmasks of support for various features. Try to enable features
74 * via inclusion in one of these bitmasks and check that at the
75 * feature imlementation - that way new family support may often simply
76 * simply need to update these bitmasks.
80 * Models that include an on-chip NorthBridge.
82 #define AUTHAMD_NBONCHIP(rev) \
83 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
84 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
87 * Families/revisions for which we can recognise main memory ECC errors.
89 #define AUTHAMD_MEMECC_RECOGNISED(rev) \
90 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
91 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
94 * Families/revisions that have an Online Spare Control Register
96 #define AUTHAMD_HAS_ONLINESPARECTL(rev) \
97 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F) || \
98 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
101 * Families/revisions for which we will perform NB MCA Config changes
103 #define AUTHAMD_DO_NBMCACFG(rev) \
104 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
105 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
108 * Families/revisions that have chip cache scrubbers.
110 #define AUTHAMD_HAS_CHIPSCRUB(rev) \
111 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
112 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
115 * Families/revisions that have a NB misc register or registers -
116 * evaluates to 0 if no support, otherwise the number of MC4_MISCj.
118 #define AUTHAMD_NBMISC_NUM(rev) \
119 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F)? 1 : \
120 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A) ? 3 : 0))
123 * Families/revision for which we wish not to machine check for GART
124 * table walk errors - bit 10 of NB CTL.
126 #define AUTHAMD_NOGARTTBLWLK_MC(rev) \
127 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
128 X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
131 * Families/revisions that are potentially L3 capable
133 #define AUTHAMD_L3CAPABLE(rev) \
134 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
137 * Families/revisions that support x8 ChipKill ECC
139 #define AUTHAMD_SUPPORTS_X8ECC(rev) \
140 (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_D0))
143 * We recognise main memory ECC errors for AUTHAMD_MEMECC_RECOGNISED
144 * revisions as:
146 * - being reported by the NB
147 * - being a compound bus/interconnect error (external to chip)
148 * - having LL of LG
149 * - having II of MEM (but could still be a master/target abort)
150 * - having CECC or UECC set
152 * We do not check the extended error code (first nibble of the
153 * model-specific error code on AMD) since this has changed from
154 * family 0xf to family 0x10 (ext code 0 now reserved on family 0x10).
155 * Instead we use CECC/UECC to separate off the master/target
156 * abort cases.
158 * We insist that the detector be the NorthBridge bank; although
159 * IC/DC can report some main memory errors, they do not capture
160 * an address at sufficient resolution to be useful and the NB will
161 * report most errors.
163 #define AUTHAMD_IS_MEMECCERR(bank, status) \
164 ((bank) == AMD_MCA_BANK_NB && \
165 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status)) && \
166 MCAX86_ERRCODE_LL(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_LL_LG && \
167 MCAX86_ERRCODE_II(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_II_MEM && \
168 ((status) & (AMD_BANK_STAT_CECC | AMD_BANK_STAT_UECC)))
170 static authamd_error_disp_t authamd_memce_disp = {
171 FM_EREPORT_CPU_GENAMD,
172 FM_EREPORT_CPU_GENAMD_MEM_CE,
173 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_MEM_CE
176 static authamd_error_disp_t authamd_memue_disp = {
177 FM_EREPORT_CPU_GENAMD,
178 FM_EREPORT_CPU_GENAMD_MEM_UE,
179 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_MEM_UE
182 static authamd_error_disp_t authamd_ckmemce_disp = {
183 FM_EREPORT_CPU_GENAMD,
184 FM_EREPORT_CPU_GENAMD_CKMEM_CE,
185 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_CKMEM_CE
188 static authamd_error_disp_t authamd_ckmemue_disp = {
189 FM_EREPORT_CPU_GENAMD,
190 FM_EREPORT_CPU_GENAMD_CKMEM_UE,
191 FM_EREPORT_GENAMD_PAYLOAD_FLAGS_CKMEM_UE
195 * We recognise GART walk errors as:
197 * - being reported by the NB
198 * - being a compound TLB error
199 * - having LL of LG and TT of GEN
200 * - having UC set
201 * - possibly having PCC set (if source CPU)
203 #define AUTHAMD_IS_GARTERR(bank, status) \
204 ((bank) == AMD_MCA_BANK_NB && \
205 MCAX86_ERRCODE_ISTLB(MCAX86_ERRCODE(status)) && \
206 MCAX86_ERRCODE_LL(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_LL_LG && \
207 MCAX86_ERRCODE_TT(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_TT_GEN && \
208 (status) & MSR_MC_STATUS_UC)
210 static authamd_error_disp_t authamd_gart_disp = {
211 FM_EREPORT_CPU_GENAMD, /* use generic subclass */
212 FM_EREPORT_CPU_GENADM_GARTTBLWLK, /* use generic leafclass */
213 0 /* no additional payload */
217 static struct authamd_nodeshared *authamd_shared[AUTHAMD_MAX_NODES];
219 static int
220 authamd_chip_once(authamd_data_t *authamd, enum authamd_cfgonce_bitnum what)
222 return (atomic_set_long_excl(&authamd->amd_shared->ans_cfgonce,
223 what) == 0 ? B_TRUE : B_FALSE);
226 static void
227 authamd_pcicfg_write(uint_t procnodeid, uint_t func, uint_t reg, uint32_t val)
229 ASSERT(procnodeid + 24 <= 31);
230 ASSERT((func & 7) == func);
231 ASSERT((reg & 3) == 0 && reg < 4096);
233 cmi_pci_putl(0, procnodeid + 24, func, reg, 0, val);
236 static uint32_t
237 authamd_pcicfg_read(uint_t procnodeid, uint_t func, uint_t reg)
239 ASSERT(procnodeid + 24 <= 31);
240 ASSERT((func & 7) == func);
241 ASSERT((reg & 3) == 0 && reg < 4096);
243 return (cmi_pci_getl(0, procnodeid + 24, func, reg, 0, 0));
246 void
247 authamd_bankstatus_prewrite(cmi_hdl_t hdl, authamd_data_t *authamd)
249 uint64_t hwcr;
251 if (cmi_hdl_rdmsr(hdl, MSR_AMD_HWCR, &hwcr) != CMI_SUCCESS)
252 return;
254 authamd->amd_hwcr = hwcr;
256 if (!(hwcr & AMD_HWCR_MCI_STATUS_WREN)) {
257 hwcr |= AMD_HWCR_MCI_STATUS_WREN;
258 (void) cmi_hdl_wrmsr(hdl, MSR_AMD_HWCR, hwcr);
262 void
263 authamd_bankstatus_postwrite(cmi_hdl_t hdl, authamd_data_t *authamd)
265 uint64_t hwcr = authamd->amd_hwcr;
267 if (!(hwcr & AMD_HWCR_MCI_STATUS_WREN)) {
268 hwcr &= ~AMD_HWCR_MCI_STATUS_WREN;
269 (void) cmi_hdl_wrmsr(hdl, MSR_AMD_HWCR, hwcr);
274 * Read EccCnt repeatedly for all possible channel/chip-select combos:
276 * - read sparectl register
277 * - if EccErrCntWrEn is set, clear that bit in the just-read value
278 * and write it back to sparectl; this *may* clobber the EccCnt
279 * for the channel/chip-select combination currently selected, so
280 * we leave this bit clear if we had to clear it
281 * - cycle through all channel/chip-select combinations writing each
282 * combination to sparectl before reading the register back for
283 * EccCnt for that combination; since EccErrCntWrEn is clear
284 * the writes to select what count to read will not themselves
285 * zero any counts
287 static int
288 authamd_read_ecccnt(authamd_data_t *authamd, struct authamd_logout *msl)
290 union mcreg_sparectl sparectl;
291 uint_t procnodeid = authamd->amd_shared->ans_procnodeid;
292 uint_t family = authamd->amd_shared->ans_family;
293 uint32_t rev = authamd->amd_shared->ans_rev;
294 int chan, cs;
297 * Check for feature support; this macro will test down to the
298 * family revision number, whereafter we'll switch on family
299 * assuming that future revisions will use the same register
300 * format.
302 if (!AUTHAMD_HAS_ONLINESPARECTL(rev)) {
303 bzero(&msl->aal_eccerrcnt, sizeof (msl->aal_eccerrcnt));
304 return (0);
307 MCREG_VAL32(&sparectl) =
308 authamd_pcicfg_read(procnodeid, MC_FUNC_MISCCTL,
309 MC_CTL_REG_SPARECTL);
311 switch (family) {
312 case AUTHAMD_FAMILY_F:
313 MCREG_FIELD_F_revFG(&sparectl, EccErrCntWrEn) = 0;
314 break;
316 case AUTHAMD_FAMILY_10:
317 MCREG_FIELD_10_revAB(&sparectl, EccErrCntWrEn) = 0;
318 break;
321 for (chan = 0; chan < AUTHAMD_DRAM_NCHANNEL; chan++) {
322 switch (family) {
323 case AUTHAMD_FAMILY_F:
324 MCREG_FIELD_F_revFG(&sparectl, EccErrCntDramChan) =
325 chan;
326 break;
328 case AUTHAMD_FAMILY_10:
329 MCREG_FIELD_10_revAB(&sparectl, EccErrCntDramChan) =
330 chan;
331 break;
334 for (cs = 0; cs < AUTHAMD_DRAM_NCS; cs++) {
335 switch (family) {
336 case AUTHAMD_FAMILY_F:
337 MCREG_FIELD_F_revFG(&sparectl,
338 EccErrCntDramCs) = cs;
339 break;
341 case AUTHAMD_FAMILY_10:
342 MCREG_FIELD_10_revAB(&sparectl,
343 EccErrCntDramCs) = cs;
344 break;
347 authamd_pcicfg_write(procnodeid, MC_FUNC_MISCCTL,
348 MC_CTL_REG_SPARECTL, MCREG_VAL32(&sparectl));
350 MCREG_VAL32(&sparectl) = authamd_pcicfg_read(procnodeid,
351 MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
353 switch (family) {
354 case AUTHAMD_FAMILY_F:
355 msl->aal_eccerrcnt[chan][cs] =
356 MCREG_FIELD_F_revFG(&sparectl, EccErrCnt);
357 break;
358 case AUTHAMD_FAMILY_10:
359 msl->aal_eccerrcnt[chan][cs] =
360 MCREG_FIELD_10_revAB(&sparectl, EccErrCnt);
361 break;
366 return (1);
370 * Clear EccCnt for all possible channel/chip-select combos:
372 * - set EccErrCntWrEn in sparectl, if necessary
373 * - write 0 to EccCnt for all channel/chip-select combinations
374 * - clear EccErrCntWrEn
376 * If requested also disable the interrupts taken on counter overflow
377 * and on swap done.
379 static void
380 authamd_clear_ecccnt(authamd_data_t *authamd, boolean_t clrint)
382 union mcreg_sparectl sparectl;
383 uint_t procnodeid = authamd->amd_shared->ans_procnodeid;
384 uint_t family = authamd->amd_shared->ans_family;
385 uint32_t rev = authamd->amd_shared->ans_rev;
386 int chan, cs;
388 if (!AUTHAMD_HAS_ONLINESPARECTL(rev))
389 return;
391 MCREG_VAL32(&sparectl) =
392 authamd_pcicfg_read(procnodeid, MC_FUNC_MISCCTL,
393 MC_CTL_REG_SPARECTL);
395 switch (family) {
396 case AUTHAMD_FAMILY_F:
397 MCREG_FIELD_F_revFG(&sparectl, EccErrCntWrEn) = 1;
398 if (clrint) {
399 MCREG_FIELD_F_revFG(&sparectl, EccErrInt) = 0;
400 MCREG_FIELD_F_revFG(&sparectl, SwapDoneInt) = 0;
402 break;
404 case AUTHAMD_FAMILY_10:
405 MCREG_FIELD_10_revAB(&sparectl, EccErrCntWrEn) = 1;
406 if (clrint) {
407 MCREG_FIELD_10_revAB(&sparectl, EccErrInt) = 0;
408 MCREG_FIELD_10_revAB(&sparectl, SwapDoneInt) = 0;
410 break;
413 authamd_pcicfg_write(procnodeid, MC_FUNC_MISCCTL,
414 MC_CTL_REG_SPARECTL, MCREG_VAL32(&sparectl));
416 for (chan = 0; chan < AUTHAMD_DRAM_NCHANNEL; chan++) {
417 switch (family) {
418 case AUTHAMD_FAMILY_F:
419 MCREG_FIELD_F_revFG(&sparectl, EccErrCntDramChan) =
420 chan;
421 break;
423 case AUTHAMD_FAMILY_10:
424 MCREG_FIELD_10_revAB(&sparectl, EccErrCntDramChan) =
425 chan;
426 break;
429 for (cs = 0; cs < AUTHAMD_DRAM_NCS; cs++) {
430 switch (family) {
431 case AUTHAMD_FAMILY_F:
432 MCREG_FIELD_F_revFG(&sparectl,
433 EccErrCntDramCs) = cs;
434 MCREG_FIELD_F_revFG(&sparectl,
435 EccErrCnt) = 0;
436 break;
438 case AUTHAMD_FAMILY_10:
439 MCREG_FIELD_10_revAB(&sparectl,
440 EccErrCntDramCs) = cs;
441 MCREG_FIELD_10_revAB(&sparectl,
442 EccErrCnt) = 0;
443 break;
446 authamd_pcicfg_write(procnodeid, MC_FUNC_MISCCTL,
447 MC_CTL_REG_SPARECTL, MCREG_VAL32(&sparectl));
454 * Return
455 * 1: supported
456 * 0: unsupported
458 static int
459 authamd_supported(cmi_hdl_t hdl)
461 uint_t family = cmi_hdl_family(hdl);
463 switch (family) {
464 case AUTHAMD_FAMILY_6:
465 case AUTHAMD_FAMILY_F:
466 case AUTHAMD_FAMILY_10:
467 return (1);
468 default:
469 return (0);
474 * cms_init entry point.
476 * This module provides broad model-specific support for AMD families
477 * 0x6, 0xf and 0x10. Future families will have to be evaluated once their
478 * documentation is available.
481 authamd_init(cmi_hdl_t hdl, void **datap)
483 uint_t chipid = cmi_hdl_chipid(hdl);
484 uint_t procnodeid = cmi_hdl_procnodeid(hdl);
485 struct authamd_nodeshared *sp, *osp;
486 uint_t family = cmi_hdl_family(hdl);
487 uint32_t rev = cmi_hdl_chiprev(hdl);
488 authamd_data_t *authamd;
489 uint64_t cap;
491 if (authamd_ms_support_disable ||
492 !authamd_supported(hdl))
493 return (ENOTSUP);
495 if (!is_x86_feature(x86_featureset, X86FSET_MCA))
496 return (ENOTSUP);
498 if (cmi_hdl_rdmsr(hdl, IA32_MSR_MCG_CAP, &cap) != CMI_SUCCESS)
499 return (ENOTSUP);
501 if (!(cap & MCG_CAP_CTL_P))
502 return (ENOTSUP);
504 authamd = *datap = kmem_zalloc(sizeof (authamd_data_t), KM_SLEEP);
505 cmi_hdl_hold(hdl); /* release in fini */
506 authamd->amd_hdl = hdl;
508 if ((sp = authamd_shared[procnodeid]) == NULL) {
509 sp = kmem_zalloc(sizeof (struct authamd_nodeshared), KM_SLEEP);
510 sp->ans_chipid = chipid;
511 sp->ans_procnodeid = procnodeid;
512 sp->ans_family = family;
513 sp->ans_rev = rev;
514 membar_producer();
516 osp = atomic_cas_ptr(&authamd_shared[procnodeid], NULL, sp);
517 if (osp != NULL) {
518 kmem_free(sp, sizeof (struct authamd_nodeshared));
519 sp = osp;
522 authamd->amd_shared = sp;
524 return (0);
528 * cms_logout_size entry point.
530 /*ARGSUSED*/
531 size_t
532 authamd_logout_size(cmi_hdl_t hdl)
534 return (sizeof (struct authamd_logout));
538 * cms_mcgctl_val entry point
540 * Instead of setting all bits to 1 we can set just those for the
541 * error detector banks known to exist.
543 /*ARGSUSED*/
544 uint64_t
545 authamd_mcgctl_val(cmi_hdl_t hdl, int nbanks, uint64_t proposed)
547 return (nbanks < 64 ? (1ULL << nbanks) - 1 : proposed);
551 * cms_bankctl_skipinit entry point
553 * On K6 we do not initialize MC0_CTL since, reportedly, this bank (for DC)
554 * may produce spurious machine checks.
556 * Only allow a single core to setup the NorthBridge MCi_CTL register.
558 /*ARGSUSED*/
559 boolean_t
560 authamd_bankctl_skipinit(cmi_hdl_t hdl, int bank)
562 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
563 uint32_t rev = authamd->amd_shared->ans_rev;
565 if (authamd->amd_shared->ans_family == AUTHAMD_FAMILY_6)
566 return (bank == 0 ? B_TRUE : B_FALSE);
568 if (AUTHAMD_NBONCHIP(rev) && bank == AMD_MCA_BANK_NB) {
569 return (authamd_chip_once(authamd, AUTHAMD_CFGONCE_NBMCA) ==
570 B_TRUE ? B_FALSE : B_TRUE);
573 return (B_FALSE);
577 * cms_bankctl_val entry point
579 uint64_t
580 authamd_bankctl_val(cmi_hdl_t hdl, int bank, uint64_t proposed)
582 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
583 uint32_t rev = authamd->amd_shared->ans_rev;
584 uint64_t val = proposed;
587 * The Intel MCA says we can write all 1's to enable #MC for
588 * all errors, and AMD docs say much the same. But, depending
589 * perhaps on other config registers, taking machine checks
590 * for some errors such as GART TLB errors and master/target
591 * aborts may be bad - they set UC and sometime also PCC, but
592 * we should not always panic for these error types.
594 * Our cms_error_action entry point can suppress such panics,
595 * however we can also use the cms_bankctl_val entry point to
596 * veto enabling of some of the known villains in the first place.
598 if (bank == AMD_MCA_BANK_NB && AUTHAMD_NOGARTTBLWLK_MC(rev))
599 val &= ~AMD_NB_EN_GARTTBLWK;
601 return (val);
605 * Bits to add to NB MCA config (after watchdog config).
607 uint32_t authamd_nb_mcacfg_add = AMD_NB_CFG_ADD_CMN;
610 * Bits to remove from NB MCA config (after watchdog config)
612 uint32_t authamd_nb_mcacfg_remove = AMD_NB_CFG_REMOVE_CMN;
615 * NB Watchdog policy, and rate we use if enabling.
617 enum {
618 AUTHAMD_NB_WDOG_LEAVEALONE,
619 AUTHAMD_NB_WDOG_DISABLE,
620 AUTHAMD_NB_WDOG_ENABLE_IF_DISABLED,
621 AUTHAMD_NB_WDOG_ENABLE_FORCE_RATE
622 } authamd_nb_watchdog_policy = AUTHAMD_NB_WDOG_ENABLE_IF_DISABLED;
624 uint32_t authamd_nb_mcacfg_wdog = AMD_NB_CFG_WDOGTMRCNTSEL_4095 |
625 AMD_NB_CFG_WDOGTMRBASESEL_1MS;
628 * Per-core cache scrubbing policy and rates.
630 enum {
631 AUTHAMD_SCRUB_BIOSDEFAULT, /* leave as BIOS configured */
632 AUTHAMD_SCRUB_FIXED, /* assign our chosen rate */
633 AUTHAMD_SCRUB_MAX /* use higher of ours and BIOS rate */
634 } authamd_scrub_policy = AUTHAMD_SCRUB_MAX;
636 uint32_t authamd_scrub_rate_dcache = 0xf; /* 64K per 0.67 seconds */
637 uint32_t authamd_scrub_rate_l2cache = 0xe; /* 1MB per 5.3 seconds */
638 uint32_t authamd_scrub_rate_l3cache = 0xd; /* 1MB per 2.7 seconds */
640 static uint32_t
641 authamd_scrubrate(uint32_t osrate, uint32_t biosrate, const char *varnm)
643 uint32_t rate;
645 if (osrate > AMD_NB_SCRUBCTL_RATE_MAX) {
646 cmn_err(CE_WARN, "%s is too large, resetting to 0x%x\n",
647 varnm, AMD_NB_SCRUBCTL_RATE_MAX);
648 osrate = AMD_NB_SCRUBCTL_RATE_MAX;
651 switch (authamd_scrub_policy) {
652 case AUTHAMD_SCRUB_FIXED:
653 rate = osrate;
654 break;
656 default:
657 cmn_err(CE_WARN, "Unknown authamd_scrub_policy %d - "
658 "using default policy of AUTHAMD_SCRUB_MAX",
659 authamd_scrub_policy);
660 /*FALLTHRU*/
662 case AUTHAMD_SCRUB_MAX:
663 if (osrate != 0 && biosrate != 0)
664 rate = MIN(osrate, biosrate); /* small is fast */
665 else
666 rate = osrate ? osrate : biosrate;
669 return (rate);
673 * cms_mca_init entry point.
675 /*ARGSUSED*/
676 void
677 authamd_mca_init(cmi_hdl_t hdl, int nbanks)
679 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
680 uint32_t rev = authamd->amd_shared->ans_rev;
681 uint_t procnodeid = authamd->amd_shared->ans_procnodeid;
684 * On chips with a NB online spare control register take control
685 * and clear ECC counts.
687 if (AUTHAMD_HAS_ONLINESPARECTL(rev) &&
688 authamd_chip_once(authamd, AUTHAMD_CFGONCE_ONLNSPRCFG)) {
689 authamd_clear_ecccnt(authamd, B_TRUE);
693 * And since we are claiming the telemetry stop the BIOS receiving
694 * an SMI on NB threshold overflow.
696 if (AUTHAMD_NBMISC_NUM(rev) &&
697 authamd_chip_once(authamd, AUTHAMD_CFGONCE_NBTHRESH)) {
698 union mcmsr_nbmisc nbm;
699 int i;
701 authamd_bankstatus_prewrite(hdl, authamd);
703 for (i = 0; i < AUTHAMD_NBMISC_NUM(rev); i++) {
704 if (cmi_hdl_rdmsr(hdl, MC_MSR_NB_MISC(i),
705 (uint64_t *)&nbm) != CMI_SUCCESS)
706 continue;
708 if (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F) &&
709 MCMSR_FIELD_F_revFG(&nbm, mcmisc_Valid) &&
710 MCMSR_FIELD_F_revFG(&nbm, mcmisc_CntP)) {
711 MCMSR_FIELD_F_revFG(&nbm, mcmisc_IntType) = 0;
712 } else if (X86_CHIPREV_ATLEAST(rev,
713 X86_CHIPREV_AMD_10_REV_A) &&
714 MCMSR_FIELD_10_revAB(&nbm, mcmisc_Valid) &&
715 MCMSR_FIELD_10_revAB(&nbm, mcmisc_CntP)) {
716 MCMSR_FIELD_10_revAB(&nbm, mcmisc_IntType) = 0;
719 (void) cmi_hdl_wrmsr(hdl, MC_MSR_NB_MISC(i),
720 MCMSR_VAL(&nbm));
723 authamd_bankstatus_postwrite(hdl, authamd);
727 * NB MCA Configuration Register.
729 if (AUTHAMD_DO_NBMCACFG(rev) &&
730 authamd_chip_once(authamd, AUTHAMD_CFGONCE_NBMCACFG)) {
731 uint32_t val = authamd_pcicfg_read(procnodeid, MC_FUNC_MISCCTL,
732 MC_CTL_REG_NBCFG);
734 switch (authamd_nb_watchdog_policy) {
735 case AUTHAMD_NB_WDOG_LEAVEALONE:
736 break;
738 case AUTHAMD_NB_WDOG_DISABLE:
739 val &= ~(AMD_NB_CFG_WDOGTMRBASESEL_MASK |
740 AMD_NB_CFG_WDOGTMRCNTSEL_MASK);
741 val |= AMD_NB_CFG_WDOGTMRDIS;
742 break;
744 default:
745 cmn_err(CE_NOTE, "authamd_nb_watchdog_policy=%d "
746 "unrecognised, using default policy",
747 authamd_nb_watchdog_policy);
748 /*FALLTHRU*/
750 case AUTHAMD_NB_WDOG_ENABLE_IF_DISABLED:
751 if (!(val & AMD_NB_CFG_WDOGTMRDIS))
752 break; /* if enabled leave rate intact */
753 /*FALLTHRU*/
755 case AUTHAMD_NB_WDOG_ENABLE_FORCE_RATE:
756 val &= ~(AMD_NB_CFG_WDOGTMRBASESEL_MASK |
757 AMD_NB_CFG_WDOGTMRCNTSEL_MASK |
758 AMD_NB_CFG_WDOGTMRDIS);
759 val |= authamd_nb_mcacfg_wdog;
760 break;
764 * Bit 0 of the NB MCA Config register is reserved on family
765 * 0x10.
767 if (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
768 authamd_nb_mcacfg_add &= ~AMD_NB_CFG_CPUECCERREN;
770 val &= ~authamd_nb_mcacfg_remove;
771 val |= authamd_nb_mcacfg_add;
773 authamd_pcicfg_write(procnodeid, MC_FUNC_MISCCTL,
774 MC_CTL_REG_NBCFG, val);
778 * Cache scrubbing. We can't enable DRAM scrubbing since
779 * we don't know the DRAM base for this node.
781 if (AUTHAMD_HAS_CHIPSCRUB(rev) &&
782 authamd_scrub_policy != AUTHAMD_SCRUB_BIOSDEFAULT &&
783 authamd_chip_once(authamd, AUTHAMD_CFGONCE_CACHESCRUB)) {
784 uint32_t val = authamd_pcicfg_read(procnodeid, MC_FUNC_MISCCTL,
785 MC_CTL_REG_SCRUBCTL);
786 int l3cap = 0;
788 if (AUTHAMD_L3CAPABLE(rev)) {
789 l3cap = (authamd_pcicfg_read(procnodeid,
790 MC_FUNC_MISCCTL, MC_CTL_REG_NBCAP) &
791 MC_NBCAP_L3CAPABLE) != 0;
794 authamd_scrub_rate_dcache =
795 authamd_scrubrate(authamd_scrub_rate_dcache,
796 (val & AMD_NB_SCRUBCTL_DC_MASK) >> AMD_NB_SCRUBCTL_DC_SHIFT,
797 "authamd_scrub_rate_dcache");
799 authamd_scrub_rate_l2cache =
800 authamd_scrubrate(authamd_scrub_rate_l2cache,
801 (val & AMD_NB_SCRUBCTL_L2_MASK) >> AMD_NB_SCRUBCTL_L2_SHIFT,
802 "authamd_scrub_rate_l2cache");
804 authamd_scrub_rate_l3cache = l3cap ?
805 authamd_scrubrate(authamd_scrub_rate_l3cache,
806 (val & AMD_NB_SCRUBCTL_L3_MASK) >> AMD_NB_SCRUBCTL_L3_SHIFT,
807 "authamd_scrub_rate_l3cache") : 0;
809 val = AMD_NB_MKSCRUBCTL(authamd_scrub_rate_l3cache,
810 authamd_scrub_rate_dcache, authamd_scrub_rate_l2cache,
811 val & AMD_NB_SCRUBCTL_DRAM_MASK);
813 authamd_pcicfg_write(procnodeid, MC_FUNC_MISCCTL,
814 MC_CTL_REG_SCRUBCTL, val);
818 * ECC symbol size. Defaults to 4.
819 * Set to 8 on systems that support x8 ECC and have it enabled.
821 if (authamd_chip_once(authamd, AUTHAMD_CFGONCE_ECCSYMSZ)) {
822 authamd->amd_shared->ans_eccsymsz = "C4";
823 if (AUTHAMD_SUPPORTS_X8ECC(rev) &&
824 (authamd_pcicfg_read(procnodeid, MC_FUNC_MISCCTL,
825 MC_CTL_REG_EXTNBCFG) & MC_EXTNBCFG_ECCSYMSZ))
826 authamd->amd_shared->ans_eccsymsz = "C8";
831 * cms_poll_ownermask entry point.
833 uint64_t
834 authamd_poll_ownermask(cmi_hdl_t hdl, hrtime_t pintvl)
836 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
837 struct authamd_nodeshared *ansp = authamd->amd_shared;
838 hrtime_t now = gethrtime_waitfree();
839 hrtime_t last = ansp->ans_poll_timestamp;
840 int dopoll = 0;
842 if (now - last > 2 * pintvl || last == 0) {
843 ansp->ans_pollowner = hdl;
844 dopoll = 1;
845 } else if (ansp->ans_pollowner == hdl) {
846 dopoll = 1;
849 if (dopoll)
850 ansp->ans_poll_timestamp = now;
852 return (dopoll ? -1ULL : ~(1 << AMD_MCA_BANK_NB));
857 * cms_bank_logout entry point.
859 /*ARGSUSED*/
860 void
861 authamd_bank_logout(cmi_hdl_t hdl, int bank, uint64_t status,
862 uint64_t addr, uint64_t misc, void *mslogout)
864 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
865 struct authamd_logout *msl = mslogout;
866 uint32_t rev = authamd->amd_shared->ans_rev;
868 if (msl == NULL)
869 return;
872 * For main memory ECC errors on revisions with an Online Spare
873 * Control Register grab the ECC counts by channel and chip-select
874 * and reset them to 0.
876 if (AUTHAMD_MEMECC_RECOGNISED(rev) &&
877 AUTHAMD_IS_MEMECCERR(bank, status) &&
878 AUTHAMD_HAS_ONLINESPARECTL(rev)) {
879 if (authamd_read_ecccnt(authamd, msl))
880 authamd_clear_ecccnt(authamd, B_FALSE);
885 * cms_error_action entry point
888 int authamd_forgive_uc = 0; /* For test/debug only */
889 int authamd_forgive_pcc = 0; /* For test/debug only */
890 int authamd_fake_poison = 0; /* For test/debug only */
892 /*ARGSUSED*/
893 uint32_t
894 authamd_error_action(cmi_hdl_t hdl, int ismc, int bank,
895 uint64_t status, uint64_t addr, uint64_t misc, void *mslogout)
897 authamd_error_disp_t *disp;
898 uint32_t rv = 0;
900 if (authamd_forgive_uc)
901 rv |= CMS_ERRSCOPE_CLEARED_UC;
903 if (authamd_forgive_pcc)
904 rv |= CMS_ERRSCOPE_CURCONTEXT_OK;
906 if (authamd_fake_poison && status & MSR_MC_STATUS_UC)
907 rv |= CMS_ERRSCOPE_POISONED;
909 if (rv)
910 return (rv);
912 disp = authamd_disp_match(hdl, ismc, bank, status, addr, misc,
913 mslogout);
915 if (disp == &authamd_gart_disp) {
917 * GART walk errors set UC and possibly PCC (if source CPU)
918 * but should not be regarded as terminal.
920 return (CMS_ERRSCOPE_IGNORE_ERR);
924 * May also want to consider master abort and target abort. These
925 * also set UC and PCC (if src CPU) but the requester gets -1
926 * and I believe the IO stuff in Solaris will handle that.
929 return (rv);
933 * cms_disp_match entry point
935 /*ARGSUSED*/
936 cms_cookie_t
937 authamd_disp_match(cmi_hdl_t hdl, int ismc, int bank, uint64_t status,
938 uint64_t addr, uint64_t misc, void *mslogout)
940 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
941 /* uint16_t errcode = MCAX86_ERRCODE(status); */
942 uint16_t exterrcode = AMD_EXT_ERRCODE(status);
943 uint32_t rev = authamd->amd_shared->ans_rev;
946 * Recognise main memory ECC errors
948 if (AUTHAMD_MEMECC_RECOGNISED(rev) &&
949 AUTHAMD_IS_MEMECCERR(bank, status)) {
950 if (status & AMD_BANK_STAT_CECC) {
951 return (exterrcode == 0 ? &authamd_memce_disp :
952 &authamd_ckmemce_disp);
953 } else if (status & AMD_BANK_STAT_UECC) {
954 return (exterrcode == 0 ? &authamd_memue_disp :
955 &authamd_ckmemue_disp);
960 * Recognise GART walk errors
962 if (AUTHAMD_NOGARTTBLWLK_MC(rev) && AUTHAMD_IS_GARTERR(bank, status))
963 return (&authamd_gart_disp);
965 return (NULL);
969 * cms_ereport_class entry point
971 /*ARGSUSED*/
972 void
973 authamd_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie,
974 const char **cpuclsp, const char **leafclsp)
976 const authamd_error_disp_t *aed = mscookie;
978 if (aed == NULL)
979 return;
981 if (aed->aad_subclass != NULL)
982 *cpuclsp = aed->aad_subclass;
983 if (aed->aad_leafclass != NULL)
984 *leafclsp = aed->aad_leafclass;
987 /*ARGSUSED*/
988 static void
989 authamd_ereport_add_resource(cmi_hdl_t hdl, authamd_data_t *authamd,
990 nvlist_t *ereport, nv_alloc_t *nva, void *mslogout)
992 nvlist_t *elems[AUTHAMD_DRAM_NCHANNEL * AUTHAMD_DRAM_NCS];
993 uint8_t counts[AUTHAMD_DRAM_NCHANNEL * AUTHAMD_DRAM_NCS];
994 authamd_logout_t *msl;
995 nvlist_t *nvl;
996 int nelems = 0;
997 int i, chan, cs, mc;
998 nvlist_t *board_list = NULL;
1000 if ((msl = mslogout) == NULL)
1001 return;
1003 /* Assume all processors have the same number of nodes */
1004 mc = authamd->amd_shared->ans_procnodeid %
1005 cpuid_get_procnodes_per_pkg(CPU);
1007 for (chan = 0; chan < AUTHAMD_DRAM_NCHANNEL; chan++) {
1008 for (cs = 0; cs < AUTHAMD_DRAM_NCS; cs++) {
1009 if (msl->aal_eccerrcnt[chan][cs] == 0)
1010 continue;
1012 if ((nvl = fm_nvlist_create(nva)) == NULL)
1013 continue;
1015 elems[nelems] = nvl;
1016 counts[nelems++] = msl->aal_eccerrcnt[chan][cs];
1018 if (!x86gentopo_legacy) {
1019 board_list = cmi_hdl_smb_bboard(hdl);
1020 if (board_list == NULL)
1021 continue;
1022 fm_fmri_hc_create(nvl, FM_HC_SCHEME_VERSION,
1023 NULL, NULL, board_list, 4,
1024 "chip", cmi_hdl_smb_chipid(hdl),
1025 "memory-controller", 0,
1026 "dram-channel", chan,
1027 "chip-select", cs);
1028 } else {
1029 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION,
1030 NULL, NULL, 5,
1031 "motherboard", 0,
1032 "chip", authamd->amd_shared->ans_chipid,
1033 "memory-controller", mc,
1034 "dram-channel", chan,
1035 "chip-select", cs);
1040 if (nelems == 0)
1041 return;
1043 fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_RESOURCE,
1044 DATA_TYPE_NVLIST_ARRAY, nelems, elems,
1045 NULL);
1047 fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_RESOURCECNT,
1048 DATA_TYPE_UINT8_ARRAY, nelems, &counts[0],
1049 NULL);
1051 for (i = 0; i < nelems; i++)
1052 fm_nvlist_destroy(elems[i], nva ? FM_NVA_RETAIN : FM_NVA_FREE);
1056 * cms_ereport_add_logout entry point
1058 /*ARGSUSED*/
1059 void
1060 authamd_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport, nv_alloc_t *nva,
1061 int bank, uint64_t status, uint64_t addr, uint64_t misc,
1062 void *mslogout, cms_cookie_t mscookie)
1064 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
1065 const authamd_error_disp_t *aed = mscookie;
1066 uint64_t members;
1068 if (aed == NULL)
1069 return;
1071 members = aed->aad_ereport_members;
1073 if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYND) {
1074 fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_SYND,
1075 DATA_TYPE_UINT16, (uint16_t)AMD_BANK_SYND(status),
1076 NULL);
1078 if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYNDTYPE) {
1079 fm_payload_set(ereport,
1080 FM_EREPORT_GENAMD_PAYLOAD_NAME_SYNDTYPE,
1081 DATA_TYPE_STRING, "E",
1082 NULL);
1086 if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_CKSYND) {
1087 fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_CKSYND,
1088 DATA_TYPE_UINT16, (uint16_t)AMD_NB_STAT_CKSYND(status),
1089 NULL);
1091 if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYNDTYPE) {
1092 fm_payload_set(ereport,
1093 FM_EREPORT_GENAMD_PAYLOAD_NAME_SYNDTYPE,
1094 DATA_TYPE_STRING, authamd->amd_shared->ans_eccsymsz,
1095 NULL);
1099 if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_RESOURCE &&
1100 status & MSR_MC_STATUS_ADDRV) {
1101 authamd_ereport_add_resource(hdl, authamd, ereport, nva,
1102 mslogout);
1107 * cms_msrinject entry point
1109 cms_errno_t
1110 authamd_msrinject(cmi_hdl_t hdl, uint_t msr, uint64_t val)
1112 authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
1113 cms_errno_t rv = CMSERR_BADMSRWRITE;
1115 authamd_bankstatus_prewrite(hdl, authamd);
1116 if (cmi_hdl_wrmsr(hdl, msr, val) == CMI_SUCCESS)
1117 rv = CMS_SUCCESS;
1118 authamd_bankstatus_postwrite(hdl, authamd);
1120 return (rv);
1123 cms_api_ver_t _cms_api_version = CMS_API_VERSION_2;
1125 const cms_ops_t _cms_ops = {
1126 authamd_init, /* cms_init */
1127 NULL, /* cms_post_startup */
1128 NULL, /* cms_post_mpstartup */
1129 authamd_logout_size, /* cms_logout_size */
1130 authamd_mcgctl_val, /* cms_mcgctl_val */
1131 authamd_bankctl_skipinit, /* cms_bankctl_skipinit */
1132 authamd_bankctl_val, /* cms_bankctl_val */
1133 NULL, /* cms_bankstatus_skipinit */
1134 NULL, /* cms_bankstatus_val */
1135 authamd_mca_init, /* cms_mca_init */
1136 authamd_poll_ownermask, /* cms_poll_ownermask */
1137 authamd_bank_logout, /* cms_bank_logout */
1138 authamd_error_action, /* cms_error_action */
1139 authamd_disp_match, /* cms_disp_match */
1140 authamd_ereport_class, /* cms_ereport_class */
1141 NULL, /* cms_ereport_detector */
1142 NULL, /* cms_ereport_includestack */
1143 authamd_ereport_add_logout, /* cms_ereport_add_logout */
1144 authamd_msrinject, /* cms_msrinject */
1145 NULL, /* cms_fini */
1148 static struct modlcpu modlcpu = {
1149 &mod_cpuops,
1150 "Generic AMD model-specific MCA"
1153 static struct modlinkage modlinkage = {
1154 MODREV_1,
1155 (void *)&modlcpu,
1156 NULL
1160 _init(void)
1162 return (mod_install(&modlinkage));
1166 _info(struct modinfo *modinfop)
1168 return (mod_info(&modlinkage, modinfop));
1172 _fini(void)
1174 return (mod_remove(&modlinkage));