4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Intel model-specific support. Right now all this conists of is
28 * to modify the ereport subclass to produce different ereport classes
29 * so that we can have different diagnosis rules and corresponding faults.
32 #include <sys/types.h>
33 #include <sys/cmn_err.h>
34 #include <sys/modctl.h>
35 #include <sys/mca_x86.h>
36 #include <sys/cpu_module_ms_impl.h>
37 #include <sys/mc_intel.h>
38 #include <sys/pci_cfgspace.h>
39 #include <sys/fm/protocol.h>
40 #include <sys/fm/util.h>
41 #include <sys/fm/smb/fmsmb.h>
43 extern int x86gentopo_legacy
;
45 int gintel_ms_support_disable
= 0;
46 int gintel_error_action_return
= 0;
47 int gintel_ms_unconstrained
= 0;
50 int max_bus_number
= 0xff;
52 #define ERR_COUNTER_INDEX 2
53 #define MAX_CPU_NODES 2
54 #define N_MC_COR_ECC_CNT 6
55 uint32_t err_counter_array
[MAX_CPU_NODES
][ERR_COUNTER_INDEX
][N_MC_COR_ECC_CNT
];
56 uint8_t err_counter_index
[MAX_CPU_NODES
];
58 #define MAX_BUS_NUMBER max_bus_number
59 #define SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu))
61 #define MC_COR_ECC_CNT(chipid, reg) (*pci_getl_func)(SOCKET_BUS(chipid), \
62 NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \
65 #define MSCOD_MEM_ECC_READ 0x1
66 #define MSCOD_MEM_ECC_SCRUB 0x2
67 #define MSCOD_MEM_WR_PARITY 0x4
68 #define MSCOD_MEM_REDUNDANT_MEM 0x8
69 #define MSCOD_MEM_SPARE_MEM 0x10
70 #define MSCOD_MEM_ILLEGAL_ADDR 0x20
71 #define MSCOD_MEM_BAD_ID 0x40
72 #define MSCOD_MEM_ADDR_PARITY 0x80
73 #define MSCOD_MEM_BYTE_PARITY 0x100
75 #define GINTEL_ERROR_MEM 0x1000
76 #define GINTEL_ERROR_QUICKPATH 0x2000
77 #define GINTEL_ERROR_UNKNOWN 0x4000
79 #define GINTEL_ERR_SPARE_MEM (GINTEL_ERROR_MEM | 1)
80 #define GINTEL_ERR_MEM_UE (GINTEL_ERROR_MEM | 2)
81 #define GINTEL_ERR_MEM_CE (GINTEL_ERROR_MEM | 3)
82 #define GINTEL_ERR_MEM_PARITY (GINTEL_ERROR_MEM | 4)
83 #define GINTEL_ERR_MEM_ADDR_PARITY (GINTEL_ERROR_MEM | 5)
84 #define GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6)
85 #define GINTEL_ERR_MEM_BAD_ADDR (GINTEL_ERROR_MEM | 7)
86 #define GINTEL_ERR_MEM_BAD_ID (GINTEL_ERROR_MEM | 8)
87 #define GINTEL_ERR_MEM_UNKNOWN (GINTEL_ERROR_MEM | 0xfff)
89 #define MSR_MC_MISC_MEM_CHANNEL_MASK 0x00000000000c0000ULL
90 #define MSR_MC_MISC_MEM_CHANNEL_SHIFT 18
91 #define MSR_MC_MISC_MEM_DIMM_MASK 0x0000000000030000ULL
92 #define MSR_MC_MISC_MEM_DIMM_SHIFT 16
93 #define MSR_MC_MISC_MEM_SYNDROME_MASK 0xffffffff00000000ULL
94 #define MSR_MC_MISC_MEM_SYNDROME_SHIFT 32
96 #define CPU_GENERATION_DONT_CARE 0
97 #define CPU_GENERATION_NEHALEM_EP 1
99 #define INTEL_CPU_6_ID 0x6
100 #define INTEL_NEHALEM_CPU_FAMILY_ID 0x6
101 #define INTEL_NEHALEM_CPU_MODEL_ID 0x1A
103 #define NEHALEM_EP_MEMORY_CONTROLLER_DEV 0x3
104 #define NEHALEM_EP_MEMORY_CONTROLLER_FUNC 0x2
108 gintel_init(cmi_hdl_t hdl
, void **datap
)
112 if (gintel_ms_support_disable
)
115 if (!is_x86_feature(x86_featureset
, X86FSET_MCA
))
118 nb_chipset
= (*pci_getl_func
)(0, 0, 0, 0x0);
119 switch (nb_chipset
) {
128 if (!gintel_ms_unconstrained
)
129 gintel_error_action_return
|= CMS_ERRSCOPE_POISONED
;
166 gintel_error_action(cmi_hdl_t hdl
, int ismc
, int bank
,
167 uint64_t status
, uint64_t addr
, uint64_t misc
, void *mslogout
)
171 if (ismc
== 0 && bank
== 0 &&
172 cmi_hdl_family(hdl
) == INTEL_CPU_6_ID
&&
173 cmi_hdl_model(hdl
) < INTEL_NEHALEM_CPU_MODEL_ID
&&
174 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status
)) &&
175 MCAX86_MSERRCODE(status
) == 0) {
176 rc
= CMS_ERRSCOPE_CURCONTEXT_OK
| CMS_ERRSCOPE_CLEARED_UC
;
177 } else if ((status
& MSR_MC_STATUS_PCC
) == 0) {
178 rc
= gintel_error_action_return
;
180 rc
= gintel_error_action_return
& ~CMS_ERRSCOPE_POISONED
;
187 gintel_disp_match(cmi_hdl_t hdl
, int ismc
, int bank
, uint64_t status
,
188 uint64_t addr
, uint64_t misc
, void *mslogout
)
190 cms_cookie_t rt
= (cms_cookie_t
)NULL
;
191 uint16_t mcacode
= MCAX86_ERRCODE(status
);
192 uint16_t mscode
= MCAX86_MSERRCODE(status
);
194 if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode
)) {
196 * memory controller errors
198 if (mscode
& MSCOD_MEM_SPARE_MEM
) {
199 rt
= (cms_cookie_t
)GINTEL_ERR_SPARE_MEM
;
200 } else if (mscode
& (MSCOD_MEM_ECC_READ
|
201 MSCOD_MEM_ECC_SCRUB
)) {
202 if (status
& MSR_MC_STATUS_UC
)
203 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_UE
;
205 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_CE
;
206 } else if (mscode
& (MSCOD_MEM_WR_PARITY
|
207 MSCOD_MEM_BYTE_PARITY
)) {
208 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_PARITY
;
209 } else if (mscode
& MSCOD_MEM_ADDR_PARITY
) {
210 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_ADDR_PARITY
;
211 } else if (mscode
& MSCOD_MEM_REDUNDANT_MEM
) {
212 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_REDUNDANT
;
213 } else if (mscode
& MSCOD_MEM_ILLEGAL_ADDR
) {
214 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_BAD_ADDR
;
215 } else if (mscode
& MSCOD_MEM_BAD_ID
) {
216 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_BAD_ID
;
218 rt
= (cms_cookie_t
)GINTEL_ERR_MEM_UNKNOWN
;
220 } else if (quickpath
&&
221 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status
))) {
222 rt
= (cms_cookie_t
)GINTEL_ERROR_QUICKPATH
;
223 } else if (ismc
== 0 && bank
== 0 &&
224 cmi_hdl_family(hdl
) == INTEL_CPU_6_ID
&&
225 cmi_hdl_model(hdl
) < INTEL_NEHALEM_CPU_MODEL_ID
&&
226 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status
)) &&
227 MCAX86_MSERRCODE(status
) == 0) {
228 rt
= (cms_cookie_t
)GINTEL_ERROR_UNKNOWN
;
235 gintel_ereport_class(cmi_hdl_t hdl
, cms_cookie_t mscookie
,
236 const char **cpuclsp
, const char **leafclsp
)
238 *cpuclsp
= FM_EREPORT_CPU_INTEL
;
239 switch ((uintptr_t)mscookie
) {
240 case GINTEL_ERROR_QUICKPATH
:
241 *leafclsp
= "quickpath.interconnect";
243 case GINTEL_ERR_SPARE_MEM
:
244 *leafclsp
= "quickpath.mem_spare";
246 case GINTEL_ERR_MEM_UE
:
247 *leafclsp
= "quickpath.mem_ue";
249 case GINTEL_ERR_MEM_CE
:
250 *leafclsp
= "quickpath.mem_ce";
252 case GINTEL_ERR_MEM_PARITY
:
253 *leafclsp
= "quickpath.mem_parity";
255 case GINTEL_ERR_MEM_ADDR_PARITY
:
256 *leafclsp
= "quickpath.mem_addr_parity";
258 case GINTEL_ERR_MEM_REDUNDANT
:
259 *leafclsp
= "quickpath.mem_redundant";
261 case GINTEL_ERR_MEM_BAD_ADDR
:
262 *leafclsp
= "quickpath.mem_bad_addr";
264 case GINTEL_ERR_MEM_BAD_ID
:
265 *leafclsp
= "quickpath.mem_bad_id";
267 case GINTEL_ERR_MEM_UNKNOWN
:
268 *leafclsp
= "quickpath.mem_unknown";
270 case GINTEL_ERROR_UNKNOWN
:
271 *leafclsp
= "unknown";
277 gintel_gentopo_ereport_detector(cmi_hdl_t hdl
, cms_cookie_t mscookie
,
280 nvlist_t
*nvl
= (nvlist_t
*)NULL
;
281 nvlist_t
*board_list
= (nvlist_t
*)NULL
;
284 board_list
= cmi_hdl_smb_bboard(hdl
);
286 if (board_list
== NULL
)
289 if ((nvl
= fm_nvlist_create(nva
)) == NULL
)
292 if ((uintptr_t)mscookie
& GINTEL_ERROR_QUICKPATH
) {
293 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
,
294 NULL
, NULL
, board_list
, 1,
295 "chip", cmi_hdl_smb_chipid(hdl
));
297 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
,
298 NULL
, NULL
, board_list
, 2,
299 "chip", cmi_hdl_smb_chipid(hdl
),
300 "memory-controller", 0);
308 gintel_ereport_detector(cmi_hdl_t hdl
, int bankno
, cms_cookie_t mscookie
,
311 nvlist_t
*nvl
= (nvlist_t
*)NULL
;
313 if (!x86gentopo_legacy
) {
314 nvl
= gintel_gentopo_ereport_detector(hdl
, mscookie
, nva
);
319 if ((nvl
= fm_nvlist_create(nva
)) == NULL
)
321 if (((uintptr_t)mscookie
& GINTEL_ERROR_QUICKPATH
) ||
322 ((uintptr_t)mscookie
& GINTEL_ERROR_UNKNOWN
)) {
323 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
, NULL
, NULL
, 2,
325 "chip", cmi_hdl_chipid(hdl
));
327 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
, NULL
, NULL
, 3,
329 "chip", cmi_hdl_chipid(hdl
),
330 "memory-controller", 0);
337 gintel_gentopo_ereport_create_resource_elem(cmi_hdl_t hdl
, nv_alloc_t
*nva
,
340 nvlist_t
*nvl
, *snvl
;
341 nvlist_t
*board_list
= NULL
;
343 board_list
= cmi_hdl_smb_bboard(hdl
);
344 if (board_list
== NULL
) {
348 if ((nvl
= fm_nvlist_create(nva
)) == NULL
) /* freed by caller */
351 if ((snvl
= fm_nvlist_create(nva
)) == NULL
) {
352 fm_nvlist_destroy(nvl
, nva
? FM_NVA_RETAIN
: FM_NVA_FREE
);
356 (void) nvlist_add_uint64(snvl
, FM_FMRI_HC_SPECIFIC_OFFSET
,
359 if (unump
->unum_chan
== -1) {
360 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
,
362 "chip", cmi_hdl_smb_chipid(hdl
),
363 "memory-controller", unump
->unum_mc
);
364 } else if (unump
->unum_cs
== -1) {
365 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
,
367 "chip", cmi_hdl_smb_chipid(hdl
),
368 "memory-controller", unump
->unum_mc
,
369 "dram-channel", unump
->unum_chan
);
370 } else if (unump
->unum_rank
== -1) {
371 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
,
373 "chip", cmi_hdl_smb_chipid(hdl
),
374 "memory-controller", unump
->unum_mc
,
375 "dram-channel", unump
->unum_chan
,
376 "dimm", unump
->unum_cs
);
378 fm_fmri_hc_create(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
,
380 "chip", cmi_hdl_smb_chipid(hdl
),
381 "memory-controller", unump
->unum_mc
,
382 "dram-channel", unump
->unum_chan
,
383 "dimm", unump
->unum_cs
,
384 "rank", unump
->unum_rank
);
387 fm_nvlist_destroy(snvl
, nva
? FM_NVA_RETAIN
: FM_NVA_FREE
);
393 gintel_ereport_create_resource_elem(nv_alloc_t
*nva
, mc_unum_t
*unump
)
395 nvlist_t
*nvl
, *snvl
;
397 if ((nvl
= fm_nvlist_create(nva
)) == NULL
) /* freed by caller */
400 if ((snvl
= fm_nvlist_create(nva
)) == NULL
) {
401 fm_nvlist_destroy(nvl
, nva
? FM_NVA_RETAIN
: FM_NVA_FREE
);
405 (void) nvlist_add_uint64(snvl
, FM_FMRI_HC_SPECIFIC_OFFSET
,
408 if (unump
->unum_chan
== -1) {
409 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
, 3,
410 "motherboard", unump
->unum_board
,
411 "chip", unump
->unum_chip
,
412 "memory-controller", unump
->unum_mc
);
413 } else if (unump
->unum_cs
== -1) {
414 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
, 4,
415 "motherboard", unump
->unum_board
,
416 "chip", unump
->unum_chip
,
417 "memory-controller", unump
->unum_mc
,
418 "dram-channel", unump
->unum_chan
);
419 } else if (unump
->unum_rank
== -1) {
420 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
, 5,
421 "motherboard", unump
->unum_board
,
422 "chip", unump
->unum_chip
,
423 "memory-controller", unump
->unum_mc
,
424 "dram-channel", unump
->unum_chan
,
425 "dimm", unump
->unum_cs
);
427 fm_fmri_hc_set(nvl
, FM_HC_SCHEME_VERSION
, NULL
, snvl
, 6,
428 "motherboard", unump
->unum_board
,
429 "chip", unump
->unum_chip
,
430 "memory-controller", unump
->unum_mc
,
431 "dram-channel", unump
->unum_chan
,
432 "dimm", unump
->unum_cs
,
433 "rank", unump
->unum_rank
);
436 fm_nvlist_destroy(snvl
, nva
? FM_NVA_RETAIN
: FM_NVA_FREE
);
442 nehalem_ep_ereport_add_memory_error_counter(uint_t chipid
,
443 uint32_t *this_err_counter_array
)
447 for (index
= 0; index
< N_MC_COR_ECC_CNT
; index
++)
448 this_err_counter_array
[index
] = MC_COR_ECC_CNT(chipid
, index
);
452 gintel_cpu_generation(cmi_hdl_t hdl
)
454 int cpu_generation
= CPU_GENERATION_DONT_CARE
;
456 if ((cmi_hdl_family(hdl
) == INTEL_NEHALEM_CPU_FAMILY_ID
) &&
457 (cmi_hdl_model(hdl
) == INTEL_NEHALEM_CPU_MODEL_ID
))
458 cpu_generation
= CPU_GENERATION_NEHALEM_EP
;
460 return (cpu_generation
);
465 gintel_ereport_add_logout(cmi_hdl_t hdl
, nvlist_t
*ereport
,
466 nv_alloc_t
*nva
, int banknum
, uint64_t status
, uint64_t addr
,
467 uint64_t misc
, void *mslogout
, cms_cookie_t mscookie
)
472 int chan
= MCAX86_ERRCODE_CCCC(status
);
473 uint8_t last_index
, this_index
;
479 if ((uintptr_t)mscookie
& GINTEL_ERROR_MEM
) {
481 unum
.unum_chip
= cmi_hdl_chipid(hdl
);
483 unum
.unum_chan
= chan
;
486 unum
.unum_offset
= -1ULL;
487 if (status
& MSR_MC_STATUS_MISCV
) {
489 (misc
& MSR_MC_MISC_MEM_CHANNEL_MASK
) >>
490 MSR_MC_MISC_MEM_CHANNEL_SHIFT
;
492 (misc
& MSR_MC_MISC_MEM_DIMM_MASK
) >>
493 MSR_MC_MISC_MEM_DIMM_SHIFT
;
494 synd
= (misc
& MSR_MC_MISC_MEM_SYNDROME_MASK
) >>
495 MSR_MC_MISC_MEM_SYNDROME_SHIFT
;
496 fm_payload_set(ereport
, FM_EREPORT_PAYLOAD_ECC_SYND
,
497 DATA_TYPE_UINT32
, synd
, 0);
499 if (status
& MSR_MC_STATUS_ADDRV
) {
500 fm_payload_set(ereport
, FM_FMRI_MEM_PHYSADDR
,
501 DATA_TYPE_UINT64
, addr
, NULL
);
502 (void) cmi_mc_patounum(addr
, 0, 0, synd
, 0, &unum
);
503 if (unum
.unum_offset
!= -1ULL &&
504 (unum
.unum_offset
& OFFSET_ROW_BANK_COL
) != 0) {
505 fm_payload_set(ereport
,
506 FM_EREPORT_PAYLOAD_NAME_BANK
,
508 TCODE_OFFSET_BANK(unum
.unum_offset
), NULL
);
509 fm_payload_set(ereport
,
510 FM_EREPORT_PAYLOAD_NAME_CAS
,
512 TCODE_OFFSET_CAS(unum
.unum_offset
), NULL
);
513 fm_payload_set(ereport
,
514 FM_EREPORT_PAYLOAD_NAME_RAS
,
516 TCODE_OFFSET_RAS(unum
.unum_offset
), NULL
);
520 if (!x86gentopo_legacy
) {
521 resource
= gintel_gentopo_ereport_create_resource_elem(
524 resource
= gintel_ereport_create_resource_elem(nva
,
528 fm_payload_set(ereport
, FM_EREPORT_PAYLOAD_NAME_RESOURCE
,
529 DATA_TYPE_NVLIST_ARRAY
, 1, &resource
, NULL
);
530 fm_nvlist_destroy(resource
, nva
? FM_NVA_RETAIN
:FM_NVA_FREE
);
532 if (gintel_cpu_generation(hdl
) == CPU_GENERATION_NEHALEM_EP
) {
534 chipid
= unum
.unum_chip
;
535 if (chipid
< MAX_CPU_NODES
) {
536 last_index
= err_counter_index
[chipid
];
538 (last_index
+ 1) % ERR_COUNTER_INDEX
;
539 err_counter_index
[chipid
] = this_index
;
540 nehalem_ep_ereport_add_memory_error_counter(
542 err_counter_array
[chipid
][this_index
]);
543 fm_payload_set(ereport
,
544 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS
,
545 DATA_TYPE_UINT32_ARRAY
, N_MC_COR_ECC_CNT
,
546 err_counter_array
[chipid
][this_index
],
548 fm_payload_set(ereport
,
549 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST
,
550 DATA_TYPE_UINT32_ARRAY
, N_MC_COR_ECC_CNT
,
551 err_counter_array
[chipid
][last_index
],
559 gintel_bankctl_skipinit(cmi_hdl_t hdl
, int banknum
)
562 * On Intel family 6 before QuickPath we must not enable machine check
563 * from bank 0 detectors. bank 0 is reserved for the platform
567 cmi_hdl_family(hdl
) == INTEL_NEHALEM_CPU_FAMILY_ID
&&
568 cmi_hdl_model(hdl
) < INTEL_NEHALEM_CPU_MODEL_ID
)
574 cms_api_ver_t _cms_api_version
= CMS_API_VERSION_2
;
576 const cms_ops_t _cms_ops
= {
577 gintel_init
, /* cms_init */
578 NULL
, /* cms_post_startup */
579 NULL
, /* cms_post_mpstartup */
580 NULL
, /* cms_logout_size */
581 NULL
, /* cms_mcgctl_val */
582 gintel_bankctl_skipinit
, /* cms_bankctl_skipinit */
583 NULL
, /* cms_bankctl_val */
584 NULL
, /* cms_bankstatus_skipinit */
585 NULL
, /* cms_bankstatus_val */
586 NULL
, /* cms_mca_init */
587 NULL
, /* cms_poll_ownermask */
588 NULL
, /* cms_bank_logout */
589 gintel_error_action
, /* cms_error_action */
590 gintel_disp_match
, /* cms_disp_match */
591 gintel_ereport_class
, /* cms_ereport_class */
592 gintel_ereport_detector
, /* cms_ereport_detector */
593 NULL
, /* cms_ereport_includestack */
594 gintel_ereport_add_logout
, /* cms_ereport_add_logout */
595 NULL
, /* cms_msrinject */
599 static struct modlcpu modlcpu
= {
601 "Generic Intel model-specific MCA"
604 static struct modlinkage modlinkage
= {
613 return (mod_install(&modlinkage
));
617 _info(struct modinfo
*modinfop
)
619 return (mod_info(&modlinkage
, modinfop
));
625 return (mod_remove(&modlinkage
));