1 // SPDX-License-Identifier: GPL-2.0-only
3 * (c) 2005-2016 Advanced Micro Devices, Inc.
5 * Written by Jacob Shin - AMD, Inc.
6 * Maintained by: Borislav Petkov <bp@alien8.de>
8 * All MC4_MISCi registers are shared between cores on a node.
10 #include <linux/interrupt.h>
11 #include <linux/notifier.h>
12 #include <linux/kobject.h>
13 #include <linux/percpu.h>
14 #include <linux/errno.h>
15 #include <linux/sched.h>
16 #include <linux/sysfs.h>
17 #include <linux/slab.h>
18 #include <linux/init.h>
19 #include <linux/cpu.h>
20 #include <linux/smp.h>
21 #include <linux/string.h>
23 #include <asm/amd_nb.h>
24 #include <asm/traps.h>
28 #include <asm/trace/irq_vectors.h>
33 #define THRESHOLD_MAX 0xFFF
34 #define INT_TYPE_APIC 0x00020000
35 #define MASK_VALID_HI 0x80000000
36 #define MASK_CNTP_HI 0x40000000
37 #define MASK_LOCKED_HI 0x20000000
38 #define MASK_LVTOFF_HI 0x00F00000
39 #define MASK_COUNT_EN_HI 0x00080000
40 #define MASK_INT_TYPE_HI 0x00060000
41 #define MASK_OVERFLOW_HI 0x00010000
42 #define MASK_ERR_COUNT_HI 0x00000FFF
43 #define MASK_BLKPTR_LO 0xFF000000
44 #define MCG_XBLK_ADDR 0xC0000400
46 /* Deferred error settings */
47 #define MSR_CU_DEF_ERR 0xC0000410
48 #define MASK_DEF_LVTOFF 0x000000F0
49 #define MASK_DEF_INT_TYPE 0x00000006
50 #define DEF_LVT_OFF 0x2
51 #define DEF_INT_TYPE_APIC 0x2
55 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
56 #define SMCA_THR_LVT_OFF 0xF000
58 static bool thresholding_irq_en
;
60 static const char * const th_names
[] = {
69 static const char * const smca_umc_block_names
[] = {
74 #define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
77 unsigned int bank_type
; /* Use with smca_bank_types for easy indexing. */
78 u32 hwid_mcatype
; /* (hwid,mcatype) tuple */
82 const struct smca_hwid
*hwid
;
83 u32 id
; /* Value of MCA_IPID[InstanceId]. */
84 u8 sysfs_id
; /* Value used for sysfs name. */
87 static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank
[MAX_NR_BANKS
], smca_banks
);
88 static DEFINE_PER_CPU_READ_MOSTLY(u8
[N_SMCA_BANK_TYPES
], smca_bank_counts
);
90 static const char * const smca_names
[] = {
91 [SMCA_LS
... SMCA_LS_V2
] = "load_store",
92 [SMCA_IF
] = "insn_fetch",
93 [SMCA_L2_CACHE
] = "l2_cache",
94 [SMCA_DE
] = "decode_unit",
95 [SMCA_RESERVED
] = "reserved",
96 [SMCA_EX
] = "execution_unit",
97 [SMCA_FP
] = "floating_point",
98 [SMCA_L3_CACHE
] = "l3_cache",
99 [SMCA_CS
... SMCA_CS_V2
] = "coherent_slave",
102 /* UMC v2 is separate because both of them can exist in a single system. */
104 [SMCA_UMC_V2
] = "umc_v2",
105 [SMCA_MA_LLC
] = "ma_llc",
106 [SMCA_PB
] = "param_block",
107 [SMCA_PSP
... SMCA_PSP_V2
] = "psp",
108 [SMCA_SMU
... SMCA_SMU_V2
] = "smu",
110 [SMCA_MPDMA
] = "mpdma",
111 [SMCA_NBIO
] = "nbio",
112 [SMCA_PCIE
... SMCA_PCIE_V2
] = "pcie",
113 [SMCA_XGMI_PCS
] = "xgmi_pcs",
114 [SMCA_NBIF
] = "nbif",
115 [SMCA_SHUB
] = "shub",
116 [SMCA_SATA
] = "sata",
118 [SMCA_USR_DP
] = "usr_dp",
119 [SMCA_USR_CP
] = "usr_cp",
120 [SMCA_GMI_PCS
] = "gmi_pcs",
121 [SMCA_XGMI_PHY
] = "xgmi_phy",
122 [SMCA_WAFL_PHY
] = "wafl_phy",
123 [SMCA_GMI_PHY
] = "gmi_phy",
126 static const char *smca_get_name(enum smca_bank_types t
)
128 if (t
>= N_SMCA_BANK_TYPES
)
131 return smca_names
[t
];
134 enum smca_bank_types
smca_get_bank_type(unsigned int cpu
, unsigned int bank
)
138 if (bank
>= MAX_NR_BANKS
)
139 return N_SMCA_BANK_TYPES
;
141 b
= &per_cpu(smca_banks
, cpu
)[bank
];
143 return N_SMCA_BANK_TYPES
;
145 return b
->hwid
->bank_type
;
147 EXPORT_SYMBOL_GPL(smca_get_bank_type
);
149 static const struct smca_hwid smca_hwid_mcatypes
[] = {
150 /* { bank_type, hwid_mcatype } */
153 { SMCA_RESERVED
, HWID_MCATYPE(0x00, 0x0) },
155 /* ZN Core (HWID=0xB0) MCA types */
156 { SMCA_LS
, HWID_MCATYPE(0xB0, 0x0) },
157 { SMCA_LS_V2
, HWID_MCATYPE(0xB0, 0x10) },
158 { SMCA_IF
, HWID_MCATYPE(0xB0, 0x1) },
159 { SMCA_L2_CACHE
, HWID_MCATYPE(0xB0, 0x2) },
160 { SMCA_DE
, HWID_MCATYPE(0xB0, 0x3) },
161 /* HWID 0xB0 MCATYPE 0x4 is Reserved */
162 { SMCA_EX
, HWID_MCATYPE(0xB0, 0x5) },
163 { SMCA_FP
, HWID_MCATYPE(0xB0, 0x6) },
164 { SMCA_L3_CACHE
, HWID_MCATYPE(0xB0, 0x7) },
166 /* Data Fabric MCA types */
167 { SMCA_CS
, HWID_MCATYPE(0x2E, 0x0) },
168 { SMCA_PIE
, HWID_MCATYPE(0x2E, 0x1) },
169 { SMCA_CS_V2
, HWID_MCATYPE(0x2E, 0x2) },
170 { SMCA_MA_LLC
, HWID_MCATYPE(0x2E, 0x4) },
172 /* Unified Memory Controller MCA type */
173 { SMCA_UMC
, HWID_MCATYPE(0x96, 0x0) },
174 { SMCA_UMC_V2
, HWID_MCATYPE(0x96, 0x1) },
176 /* Parameter Block MCA type */
177 { SMCA_PB
, HWID_MCATYPE(0x05, 0x0) },
179 /* Platform Security Processor MCA type */
180 { SMCA_PSP
, HWID_MCATYPE(0xFF, 0x0) },
181 { SMCA_PSP_V2
, HWID_MCATYPE(0xFF, 0x1) },
183 /* System Management Unit MCA type */
184 { SMCA_SMU
, HWID_MCATYPE(0x01, 0x0) },
185 { SMCA_SMU_V2
, HWID_MCATYPE(0x01, 0x1) },
187 /* Microprocessor 5 Unit MCA type */
188 { SMCA_MP5
, HWID_MCATYPE(0x01, 0x2) },
191 { SMCA_MPDMA
, HWID_MCATYPE(0x01, 0x3) },
193 /* Northbridge IO Unit MCA type */
194 { SMCA_NBIO
, HWID_MCATYPE(0x18, 0x0) },
196 /* PCI Express Unit MCA type */
197 { SMCA_PCIE
, HWID_MCATYPE(0x46, 0x0) },
198 { SMCA_PCIE_V2
, HWID_MCATYPE(0x46, 0x1) },
200 { SMCA_XGMI_PCS
, HWID_MCATYPE(0x50, 0x0) },
201 { SMCA_NBIF
, HWID_MCATYPE(0x6C, 0x0) },
202 { SMCA_SHUB
, HWID_MCATYPE(0x80, 0x0) },
203 { SMCA_SATA
, HWID_MCATYPE(0xA8, 0x0) },
204 { SMCA_USB
, HWID_MCATYPE(0xAA, 0x0) },
205 { SMCA_USR_DP
, HWID_MCATYPE(0x170, 0x0) },
206 { SMCA_USR_CP
, HWID_MCATYPE(0x180, 0x0) },
207 { SMCA_GMI_PCS
, HWID_MCATYPE(0x241, 0x0) },
208 { SMCA_XGMI_PHY
, HWID_MCATYPE(0x259, 0x0) },
209 { SMCA_WAFL_PHY
, HWID_MCATYPE(0x267, 0x0) },
210 { SMCA_GMI_PHY
, HWID_MCATYPE(0x269, 0x0) },
214 * In SMCA enabled processors, we can have multiple banks for a given IP type.
215 * So to define a unique name for each bank, we use a temp c-string to append
216 * the MCA_IPID[InstanceId] to type's name in get_name().
218 * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
219 * is greater than 8 plus 1 (for underscore) plus length of longest type name.
221 #define MAX_MCATYPE_NAME_LEN 30
222 static char buf_mcatype
[MAX_MCATYPE_NAME_LEN
];
224 static DEFINE_PER_CPU(struct threshold_bank
**, threshold_banks
);
227 * A list of the banks enabled on each logical CPU. Controls which respective
228 * descriptors to initialize later in mce_threshold_create_device().
230 static DEFINE_PER_CPU(u64
, bank_map
);
232 /* Map of banks that have more than MCA_MISC0 available. */
233 static DEFINE_PER_CPU(u64
, smca_misc_banks_map
);
235 static void amd_threshold_interrupt(void);
236 static void amd_deferred_error_interrupt(void);
238 static void default_deferred_error_interrupt(void)
240 pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR
);
242 void (*deferred_error_int_vector
)(void) = default_deferred_error_interrupt
;
244 static void smca_set_misc_banks_map(unsigned int bank
, unsigned int cpu
)
249 * For SMCA enabled processors, BLKPTR field of the first MISC register
250 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
252 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank
), &low
, &high
))
255 if (!(low
& MCI_CONFIG_MCAX
))
258 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank
), &low
, &high
))
261 if (low
& MASK_BLKPTR_LO
)
262 per_cpu(smca_misc_banks_map
, cpu
) |= BIT_ULL(bank
);
266 static void smca_configure(unsigned int bank
, unsigned int cpu
)
268 u8
*bank_counts
= this_cpu_ptr(smca_bank_counts
);
269 const struct smca_hwid
*s_hwid
;
270 unsigned int i
, hwid_mcatype
;
272 u32 smca_config
= MSR_AMD64_SMCA_MCx_CONFIG(bank
);
274 /* Set appropriate bits in MCA_CONFIG */
275 if (!rdmsr_safe(smca_config
, &low
, &high
)) {
277 * OS is required to set the MCAX bit to acknowledge that it is
278 * now using the new MSR ranges and new registers under each
279 * bank. It also means that the OS will configure deferred
280 * errors in the new MCx_CONFIG register. If the bit is not set,
281 * uncorrectable errors will cause a system panic.
283 * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
288 * SMCA sets the Deferred Error Interrupt type per bank.
290 * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
291 * if the DeferredIntType bit field is available.
293 * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
294 * high portion of the MSR). OS should set this to 0x1 to enable
295 * APIC based interrupt. First, check that no interrupt has been
298 if ((low
& BIT(5)) && !((high
>> 5) & 0x3))
301 this_cpu_ptr(mce_banks_array
)[bank
].lsb_in_status
= !!(low
& BIT(8));
303 wrmsr(smca_config
, low
, high
);
306 smca_set_misc_banks_map(bank
, cpu
);
308 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank
), &low
, &high
)) {
309 pr_warn("Failed to read MCA_IPID for bank %d\n", bank
);
313 hwid_mcatype
= HWID_MCATYPE(high
& MCI_IPID_HWID
,
314 (high
& MCI_IPID_MCATYPE
) >> 16);
316 for (i
= 0; i
< ARRAY_SIZE(smca_hwid_mcatypes
); i
++) {
317 s_hwid
= &smca_hwid_mcatypes
[i
];
319 if (hwid_mcatype
== s_hwid
->hwid_mcatype
) {
320 this_cpu_ptr(smca_banks
)[bank
].hwid
= s_hwid
;
321 this_cpu_ptr(smca_banks
)[bank
].id
= low
;
322 this_cpu_ptr(smca_banks
)[bank
].sysfs_id
= bank_counts
[s_hwid
->bank_type
]++;
328 struct thresh_restart
{
329 struct threshold_block
*b
;
336 static inline bool is_shared_bank(int bank
)
339 * Scalable MCA provides for only one core to have access to the MSRs of
345 /* Bank 4 is for northbridge reporting and is thus shared */
349 static const char *bank4_names(const struct threshold_block
*b
)
351 switch (b
->address
) {
363 WARN(1, "Funny MSR: 0x%08x\n", b
->address
);
369 static bool lvt_interrupt_supported(unsigned int bank
, u32 msr_high_bits
)
372 * bank 4 supports APIC LVT interrupts implicitly since forever.
378 * IntP: interrupt present; if this bit is set, the thresholding
379 * bank can generate APIC LVT interrupts
381 return msr_high_bits
& BIT(28);
384 static int lvt_off_valid(struct threshold_block
*b
, int apic
, u32 lo
, u32 hi
)
386 int msr
= (hi
& MASK_LVTOFF_HI
) >> 20;
389 pr_err(FW_BUG
"cpu %d, failed to setup threshold interrupt "
390 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b
->cpu
,
391 b
->bank
, b
->block
, b
->address
, hi
, lo
);
397 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
398 * the BIOS provides the value. The original field where LVT offset
399 * was set is reserved. Return early here:
404 pr_err(FW_BUG
"cpu %d, invalid threshold interrupt offset %d "
405 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
406 b
->cpu
, apic
, b
->bank
, b
->block
, b
->address
, hi
, lo
);
413 /* Reprogram MCx_MISC MSR behind this threshold bank. */
414 static void threshold_restart_bank(void *_tr
)
416 struct thresh_restart
*tr
= _tr
;
419 /* sysfs write might race against an offline operation */
420 if (!this_cpu_read(threshold_banks
) && !tr
->set_lvt_off
)
423 rdmsr(tr
->b
->address
, lo
, hi
);
425 if (tr
->b
->threshold_limit
< (hi
& THRESHOLD_MAX
))
426 tr
->reset
= 1; /* limit cannot be lower than err count */
428 if (tr
->reset
) { /* reset err count and overflow bit */
430 (hi
& ~(MASK_ERR_COUNT_HI
| MASK_OVERFLOW_HI
)) |
431 (THRESHOLD_MAX
- tr
->b
->threshold_limit
);
432 } else if (tr
->old_limit
) { /* change limit w/o reset */
433 int new_count
= (hi
& THRESHOLD_MAX
) +
434 (tr
->old_limit
- tr
->b
->threshold_limit
);
436 hi
= (hi
& ~MASK_ERR_COUNT_HI
) |
437 (new_count
& THRESHOLD_MAX
);
441 hi
&= ~MASK_INT_TYPE_HI
;
443 if (!tr
->b
->interrupt_capable
)
446 if (tr
->set_lvt_off
) {
447 if (lvt_off_valid(tr
->b
, tr
->lvt_off
, lo
, hi
)) {
448 /* set new lvt offset */
449 hi
&= ~MASK_LVTOFF_HI
;
450 hi
|= tr
->lvt_off
<< 20;
454 if (tr
->b
->interrupt_enable
)
459 hi
|= MASK_COUNT_EN_HI
;
460 wrmsr(tr
->b
->address
, lo
, hi
);
463 static void mce_threshold_block_init(struct threshold_block
*b
, int offset
)
465 struct thresh_restart tr
= {
471 b
->threshold_limit
= THRESHOLD_MAX
;
472 threshold_restart_bank(&tr
);
475 static int setup_APIC_mce_threshold(int reserved
, int new)
477 if (reserved
< 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR
,
478 APIC_EILVT_MSG_FIX
, 0))
484 static int setup_APIC_deferred_error(int reserved
, int new)
486 if (reserved
< 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR
,
487 APIC_EILVT_MSG_FIX
, 0))
493 static void deferred_error_interrupt_enable(struct cpuinfo_x86
*c
)
495 u32 low
= 0, high
= 0;
496 int def_offset
= -1, def_new
;
498 if (rdmsr_safe(MSR_CU_DEF_ERR
, &low
, &high
))
501 def_new
= (low
& MASK_DEF_LVTOFF
) >> 4;
502 if (!(low
& MASK_DEF_LVTOFF
)) {
503 pr_err(FW_BUG
"Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
504 def_new
= DEF_LVT_OFF
;
505 low
= (low
& ~MASK_DEF_LVTOFF
) | (DEF_LVT_OFF
<< 4);
508 def_offset
= setup_APIC_deferred_error(def_offset
, def_new
);
509 if ((def_offset
== def_new
) &&
510 (deferred_error_int_vector
!= amd_deferred_error_interrupt
))
511 deferred_error_int_vector
= amd_deferred_error_interrupt
;
514 low
= (low
& ~MASK_DEF_INT_TYPE
) | DEF_INT_TYPE_APIC
;
516 wrmsr(MSR_CU_DEF_ERR
, low
, high
);
519 static u32
smca_get_block_address(unsigned int bank
, unsigned int block
,
523 return MSR_AMD64_SMCA_MCx_MISC(bank
);
525 if (!(per_cpu(smca_misc_banks_map
, cpu
) & BIT_ULL(bank
)))
528 return MSR_AMD64_SMCA_MCx_MISCy(bank
, block
- 1);
531 static u32
get_block_address(u32 current_addr
, u32 low
, u32 high
,
532 unsigned int bank
, unsigned int block
,
535 u32 addr
= 0, offset
= 0;
537 if ((bank
>= per_cpu(mce_num_banks
, cpu
)) || (block
>= NR_BLOCKS
))
541 return smca_get_block_address(bank
, block
, cpu
);
543 /* Fall back to method we used for older processors: */
546 addr
= mca_msr_reg(bank
, MCA_MISC
);
549 offset
= ((low
& MASK_BLKPTR_LO
) >> 21);
551 addr
= MCG_XBLK_ADDR
+ offset
;
554 addr
= ++current_addr
;
560 prepare_threshold_block(unsigned int bank
, unsigned int block
, u32 addr
,
561 int offset
, u32 misc_high
)
563 unsigned int cpu
= smp_processor_id();
564 u32 smca_low
, smca_high
;
565 struct threshold_block b
;
569 per_cpu(bank_map
, cpu
) |= BIT_ULL(bank
);
571 memset(&b
, 0, sizeof(b
));
576 b
.interrupt_capable
= lvt_interrupt_supported(bank
, misc_high
);
578 if (!b
.interrupt_capable
)
581 b
.interrupt_enable
= 1;
583 if (!mce_flags
.smca
) {
584 new = (misc_high
& MASK_LVTOFF_HI
) >> 20;
588 /* Gather LVT offset for thresholding: */
589 if (rdmsr_safe(MSR_CU_DEF_ERR
, &smca_low
, &smca_high
))
592 new = (smca_low
& SMCA_THR_LVT_OFF
) >> 12;
595 offset
= setup_APIC_mce_threshold(offset
, new);
597 thresholding_irq_en
= true;
600 mce_threshold_block_init(&b
, offset
);
606 bool amd_filter_mce(struct mce
*m
)
608 enum smca_bank_types bank_type
= smca_get_bank_type(m
->extcpu
, m
->bank
);
609 struct cpuinfo_x86
*c
= &boot_cpu_data
;
611 /* See Family 17h Models 10h-2Fh Erratum #1114. */
612 if (c
->x86
== 0x17 &&
613 c
->x86_model
>= 0x10 && c
->x86_model
<= 0x2F &&
614 bank_type
== SMCA_IF
&& XEC(m
->status
, 0x3f) == 10)
617 /* NB GART TLB error reporting is disabled by default. */
619 if (m
->bank
== 4 && XEC(m
->status
, 0x1f) == 0x5)
627 * Turn off thresholding banks for the following conditions:
628 * - MC4_MISC thresholding is not supported on Family 0x15.
629 * - Prevent possible spurious interrupts from the IF bank on Family 0x17
630 * Models 0x10-0x2F due to Erratum #1114.
632 static void disable_err_thresholding(struct cpuinfo_x86
*c
, unsigned int bank
)
639 if (c
->x86
== 0x15 && bank
== 4) {
640 msrs
[0] = 0x00000413; /* MC4_MISC0 */
641 msrs
[1] = 0xc0000408; /* MC4_MISC1 */
643 } else if (c
->x86
== 0x17 &&
644 (c
->x86_model
>= 0x10 && c
->x86_model
<= 0x2F)) {
646 if (smca_get_bank_type(smp_processor_id(), bank
) != SMCA_IF
)
649 msrs
[0] = MSR_AMD64_SMCA_MCx_MISC(bank
);
655 rdmsrl(MSR_K7_HWCR
, hwcr
);
657 /* McStatusWrEn has to be set */
658 need_toggle
= !(hwcr
& BIT(18));
660 wrmsrl(MSR_K7_HWCR
, hwcr
| BIT(18));
662 /* Clear CntP bit safely */
663 for (i
= 0; i
< num_msrs
; i
++)
664 msr_clear_bit(msrs
[i
], 62);
666 /* restore old settings */
668 wrmsrl(MSR_K7_HWCR
, hwcr
);
671 /* cpu init entry point, called from mce.c with preempt off */
672 void mce_amd_feature_init(struct cpuinfo_x86
*c
)
674 unsigned int bank
, block
, cpu
= smp_processor_id();
675 u32 low
= 0, high
= 0, address
= 0;
679 for (bank
= 0; bank
< this_cpu_read(mce_num_banks
); ++bank
) {
681 smca_configure(bank
, cpu
);
683 disable_err_thresholding(c
, bank
);
685 for (block
= 0; block
< NR_BLOCKS
; ++block
) {
686 address
= get_block_address(address
, low
, high
, bank
, block
, cpu
);
690 if (rdmsr_safe(address
, &low
, &high
))
693 if (!(high
& MASK_VALID_HI
))
696 if (!(high
& MASK_CNTP_HI
) ||
697 (high
& MASK_LOCKED_HI
))
700 offset
= prepare_threshold_block(bank
, block
, address
, offset
, high
);
704 if (mce_flags
.succor
)
705 deferred_error_interrupt_enable(c
);
709 * DRAM ECC errors are reported in the Northbridge (bank 4) with
710 * Extended Error Code 8.
712 static bool legacy_mce_is_memory_error(struct mce
*m
)
714 return m
->bank
== 4 && XEC(m
->status
, 0x1f) == 8;
718 * DRAM ECC errors are reported in Unified Memory Controllers with
719 * Extended Error Code 0.
721 static bool smca_mce_is_memory_error(struct mce
*m
)
723 enum smca_bank_types bank_type
;
725 if (XEC(m
->status
, 0x3f))
728 bank_type
= smca_get_bank_type(m
->extcpu
, m
->bank
);
730 return bank_type
== SMCA_UMC
|| bank_type
== SMCA_UMC_V2
;
733 bool amd_mce_is_memory_error(struct mce
*m
)
736 return smca_mce_is_memory_error(m
);
738 return legacy_mce_is_memory_error(m
);
742 * AMD systems do not have an explicit indicator that the value in MCA_ADDR is
743 * a system physical address. Therefore, individual cases need to be detected.
744 * Future cases and checks will be added as needed.
747 * a) Assume address is not usable.
749 * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy
750 * northbridge (bank 4).
751 * b) Refers to poison consumption in the core. Does not include "no action",
752 * "action optional", or "deferred" error severities.
753 * c) Will include a usable address so that immediate action can be taken.
754 * 3) Northbridge DRAM ECC errors
755 * a) Reported in legacy bank 4 with extended error code (XEC) 8.
756 * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore,
757 * this bit should not be checked.
759 * NOTE: SMCA UMC memory errors fall into case #1.
761 bool amd_mce_usable_address(struct mce
*m
)
763 /* Check special northbridge case 3) first. */
764 if (!mce_flags
.smca
) {
765 if (legacy_mce_is_memory_error(m
))
767 else if (m
->bank
== 4)
771 /* Check poison bit for all other bank types. */
772 if (m
->status
& MCI_STATUS_POISON
)
775 /* Assume address is not usable for all others. */
779 static void __log_error(unsigned int bank
, u64 status
, u64 addr
, u64 misc
)
781 struct mce_hw_err err
;
782 struct mce
*m
= &err
.m
;
784 mce_prep_record(&err
);
791 if (m
->status
& MCI_STATUS_ADDRV
) {
794 smca_extract_err_addr(m
);
797 if (mce_flags
.smca
) {
798 rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank
), m
->ipid
);
800 if (m
->status
& MCI_STATUS_SYNDV
) {
801 rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank
), m
->synd
);
802 rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank
), err
.vendor
.amd
.synd1
);
803 rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank
), err
.vendor
.amd
.synd2
);
810 DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error
)
812 trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR
);
813 inc_irq_stat(irq_deferred_error_count
);
814 deferred_error_int_vector();
815 trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR
);
820 * Returns true if the logged error is deferred. False, otherwise.
823 _log_error_bank(unsigned int bank
, u32 msr_stat
, u32 msr_addr
, u64 misc
)
825 u64 status
, addr
= 0;
827 rdmsrl(msr_stat
, status
);
828 if (!(status
& MCI_STATUS_VAL
))
831 if (status
& MCI_STATUS_ADDRV
)
832 rdmsrl(msr_addr
, addr
);
834 __log_error(bank
, status
, addr
, misc
);
838 return status
& MCI_STATUS_DEFERRED
;
841 static bool _log_error_deferred(unsigned int bank
, u32 misc
)
843 if (!_log_error_bank(bank
, mca_msr_reg(bank
, MCA_STATUS
),
844 mca_msr_reg(bank
, MCA_ADDR
), misc
))
848 * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
849 * Return true here to avoid accessing these registers.
854 /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
855 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank
), 0);
860 * We have three scenarios for checking for Deferred errors:
862 * 1) Non-SMCA systems check MCA_STATUS and log error if found.
863 * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
865 * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
868 static void log_error_deferred(unsigned int bank
)
870 if (_log_error_deferred(bank
, 0))
874 * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
877 _log_error_bank(bank
, MSR_AMD64_SMCA_MCx_DESTAT(bank
),
878 MSR_AMD64_SMCA_MCx_DEADDR(bank
), 0);
881 /* APIC interrupt handler for deferred errors */
882 static void amd_deferred_error_interrupt(void)
886 for (bank
= 0; bank
< this_cpu_read(mce_num_banks
); ++bank
)
887 log_error_deferred(bank
);
890 static void log_error_thresholding(unsigned int bank
, u64 misc
)
892 _log_error_deferred(bank
, misc
);
895 static void log_and_reset_block(struct threshold_block
*block
)
897 struct thresh_restart tr
;
898 u32 low
= 0, high
= 0;
903 if (rdmsr_safe(block
->address
, &low
, &high
))
906 if (!(high
& MASK_OVERFLOW_HI
))
909 /* Log the MCE which caused the threshold event. */
910 log_error_thresholding(block
->bank
, ((u64
)high
<< 32) | low
);
912 /* Reset threshold block after logging error. */
913 memset(&tr
, 0, sizeof(tr
));
915 threshold_restart_bank(&tr
);
919 * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
920 * goes off when error_count reaches threshold_limit.
922 static void amd_threshold_interrupt(void)
924 struct threshold_block
*first_block
= NULL
, *block
= NULL
, *tmp
= NULL
;
925 struct threshold_bank
**bp
= this_cpu_read(threshold_banks
);
926 unsigned int bank
, cpu
= smp_processor_id();
929 * Validate that the threshold bank has been initialized already. The
930 * handler is installed at boot time, but on a hotplug event the
931 * interrupt might fire before the data has been initialized.
936 for (bank
= 0; bank
< this_cpu_read(mce_num_banks
); ++bank
) {
937 if (!(per_cpu(bank_map
, cpu
) & BIT_ULL(bank
)))
940 first_block
= bp
[bank
]->blocks
;
945 * The first block is also the head of the list. Check it first
946 * before iterating over the rest.
948 log_and_reset_block(first_block
);
949 list_for_each_entry_safe(block
, tmp
, &first_block
->miscj
, miscj
)
950 log_and_reset_block(block
);
958 struct threshold_attr
{
959 struct attribute attr
;
960 ssize_t (*show
) (struct threshold_block
*, char *);
961 ssize_t (*store
) (struct threshold_block
*, const char *, size_t count
);
964 #define SHOW_FIELDS(name) \
965 static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
967 return sprintf(buf, "%lu\n", (unsigned long) b->name); \
969 SHOW_FIELDS(interrupt_enable
)
970 SHOW_FIELDS(threshold_limit
)
973 store_interrupt_enable(struct threshold_block
*b
, const char *buf
, size_t size
)
975 struct thresh_restart tr
;
978 if (!b
->interrupt_capable
)
981 if (kstrtoul(buf
, 0, &new) < 0)
984 b
->interrupt_enable
= !!new;
986 memset(&tr
, 0, sizeof(tr
));
989 if (smp_call_function_single(b
->cpu
, threshold_restart_bank
, &tr
, 1))
996 store_threshold_limit(struct threshold_block
*b
, const char *buf
, size_t size
)
998 struct thresh_restart tr
;
1001 if (kstrtoul(buf
, 0, &new) < 0)
1004 if (new > THRESHOLD_MAX
)
1005 new = THRESHOLD_MAX
;
1009 memset(&tr
, 0, sizeof(tr
));
1010 tr
.old_limit
= b
->threshold_limit
;
1011 b
->threshold_limit
= new;
1014 if (smp_call_function_single(b
->cpu
, threshold_restart_bank
, &tr
, 1))
1020 static ssize_t
show_error_count(struct threshold_block
*b
, char *buf
)
1024 /* CPU might be offline by now */
1025 if (rdmsr_on_cpu(b
->cpu
, b
->address
, &lo
, &hi
))
1028 return sprintf(buf
, "%u\n", ((hi
& THRESHOLD_MAX
) -
1029 (THRESHOLD_MAX
- b
->threshold_limit
)));
1032 static struct threshold_attr error_count
= {
1033 .attr
= {.name
= __stringify(error_count
), .mode
= 0444 },
1034 .show
= show_error_count
,
1037 #define RW_ATTR(val) \
1038 static struct threshold_attr val = { \
1039 .attr = {.name = __stringify(val), .mode = 0644 }, \
1040 .show = show_## val, \
1041 .store = store_## val, \
1044 RW_ATTR(interrupt_enable
);
1045 RW_ATTR(threshold_limit
);
1047 static struct attribute
*default_attrs
[] = {
1048 &threshold_limit
.attr
,
1050 NULL
, /* possibly interrupt_enable if supported, see below */
1053 ATTRIBUTE_GROUPS(default);
1055 #define to_block(k) container_of(k, struct threshold_block, kobj)
1056 #define to_attr(a) container_of(a, struct threshold_attr, attr)
1058 static ssize_t
show(struct kobject
*kobj
, struct attribute
*attr
, char *buf
)
1060 struct threshold_block
*b
= to_block(kobj
);
1061 struct threshold_attr
*a
= to_attr(attr
);
1064 ret
= a
->show
? a
->show(b
, buf
) : -EIO
;
1069 static ssize_t
store(struct kobject
*kobj
, struct attribute
*attr
,
1070 const char *buf
, size_t count
)
1072 struct threshold_block
*b
= to_block(kobj
);
1073 struct threshold_attr
*a
= to_attr(attr
);
1076 ret
= a
->store
? a
->store(b
, buf
, count
) : -EIO
;
1081 static const struct sysfs_ops threshold_ops
= {
1086 static void threshold_block_release(struct kobject
*kobj
);
1088 static const struct kobj_type threshold_ktype
= {
1089 .sysfs_ops
= &threshold_ops
,
1090 .default_groups
= default_groups
,
1091 .release
= threshold_block_release
,
1094 static const char *get_name(unsigned int cpu
, unsigned int bank
, struct threshold_block
*b
)
1096 enum smca_bank_types bank_type
;
1098 if (!mce_flags
.smca
) {
1100 return bank4_names(b
);
1102 return th_names
[bank
];
1105 bank_type
= smca_get_bank_type(cpu
, bank
);
1106 if (bank_type
>= N_SMCA_BANK_TYPES
)
1109 if (b
&& (bank_type
== SMCA_UMC
|| bank_type
== SMCA_UMC_V2
)) {
1110 if (b
->block
< ARRAY_SIZE(smca_umc_block_names
))
1111 return smca_umc_block_names
[b
->block
];
1115 if (per_cpu(smca_bank_counts
, cpu
)[bank_type
] == 1)
1116 return smca_get_name(bank_type
);
1118 snprintf(buf_mcatype
, MAX_MCATYPE_NAME_LEN
,
1119 "%s_%u", smca_get_name(bank_type
),
1120 per_cpu(smca_banks
, cpu
)[bank
].sysfs_id
);
1124 static int allocate_threshold_blocks(unsigned int cpu
, struct threshold_bank
*tb
,
1125 unsigned int bank
, unsigned int block
,
1128 struct threshold_block
*b
= NULL
;
1132 if ((bank
>= this_cpu_read(mce_num_banks
)) || (block
>= NR_BLOCKS
))
1135 if (rdmsr_safe(address
, &low
, &high
))
1138 if (!(high
& MASK_VALID_HI
)) {
1145 if (!(high
& MASK_CNTP_HI
) ||
1146 (high
& MASK_LOCKED_HI
))
1149 b
= kzalloc(sizeof(struct threshold_block
), GFP_KERNEL
);
1156 b
->address
= address
;
1157 b
->interrupt_enable
= 0;
1158 b
->interrupt_capable
= lvt_interrupt_supported(bank
, high
);
1159 b
->threshold_limit
= THRESHOLD_MAX
;
1161 if (b
->interrupt_capable
) {
1162 default_attrs
[2] = &interrupt_enable
.attr
;
1163 b
->interrupt_enable
= 1;
1165 default_attrs
[2] = NULL
;
1168 INIT_LIST_HEAD(&b
->miscj
);
1170 /* This is safe as @tb is not visible yet */
1172 list_add(&b
->miscj
, &tb
->blocks
->miscj
);
1176 err
= kobject_init_and_add(&b
->kobj
, &threshold_ktype
, tb
->kobj
, get_name(cpu
, bank
, b
));
1180 address
= get_block_address(address
, low
, high
, bank
, ++block
, cpu
);
1184 err
= allocate_threshold_blocks(cpu
, tb
, bank
, block
, address
);
1189 kobject_uevent(&b
->kobj
, KOBJ_ADD
);
1195 list_del(&b
->miscj
);
1196 kobject_put(&b
->kobj
);
1201 static int __threshold_add_blocks(struct threshold_bank
*b
)
1203 struct list_head
*head
= &b
->blocks
->miscj
;
1204 struct threshold_block
*pos
= NULL
;
1205 struct threshold_block
*tmp
= NULL
;
1208 err
= kobject_add(&b
->blocks
->kobj
, b
->kobj
, b
->blocks
->kobj
.name
);
1212 list_for_each_entry_safe(pos
, tmp
, head
, miscj
) {
1214 err
= kobject_add(&pos
->kobj
, b
->kobj
, pos
->kobj
.name
);
1216 list_for_each_entry_safe_reverse(pos
, tmp
, head
, miscj
)
1217 kobject_del(&pos
->kobj
);
1225 static int threshold_create_bank(struct threshold_bank
**bp
, unsigned int cpu
,
1228 struct device
*dev
= this_cpu_read(mce_device
);
1229 struct amd_northbridge
*nb
= NULL
;
1230 struct threshold_bank
*b
= NULL
;
1231 const char *name
= get_name(cpu
, bank
, NULL
);
1237 if (is_shared_bank(bank
)) {
1238 nb
= node_to_amd_nb(topology_amd_node_id(cpu
));
1240 /* threshold descriptor already initialized on this node? */
1241 if (nb
&& nb
->bank4
) {
1244 err
= kobject_add(b
->kobj
, &dev
->kobj
, name
);
1249 refcount_inc(&b
->cpus
);
1251 err
= __threshold_add_blocks(b
);
1257 b
= kzalloc(sizeof(struct threshold_bank
), GFP_KERNEL
);
1263 /* Associate the bank with the per-CPU MCE device */
1264 b
->kobj
= kobject_create_and_add(name
, &dev
->kobj
);
1270 if (is_shared_bank(bank
)) {
1272 refcount_set(&b
->cpus
, 1);
1274 /* nb is already initialized, see above */
1281 err
= allocate_threshold_blocks(cpu
, b
, bank
, 0, mca_msr_reg(bank
, MCA_MISC
));
1289 kobject_put(b
->kobj
);
1296 static void threshold_block_release(struct kobject
*kobj
)
1298 kfree(to_block(kobj
));
1301 static void deallocate_threshold_blocks(struct threshold_bank
*bank
)
1303 struct threshold_block
*pos
, *tmp
;
1305 list_for_each_entry_safe(pos
, tmp
, &bank
->blocks
->miscj
, miscj
) {
1306 list_del(&pos
->miscj
);
1307 kobject_put(&pos
->kobj
);
1310 kobject_put(&bank
->blocks
->kobj
);
1313 static void __threshold_remove_blocks(struct threshold_bank
*b
)
1315 struct threshold_block
*pos
= NULL
;
1316 struct threshold_block
*tmp
= NULL
;
1318 kobject_put(b
->kobj
);
1320 list_for_each_entry_safe(pos
, tmp
, &b
->blocks
->miscj
, miscj
)
1321 kobject_put(b
->kobj
);
1324 static void threshold_remove_bank(struct threshold_bank
*bank
)
1326 struct amd_northbridge
*nb
;
1334 if (!refcount_dec_and_test(&bank
->cpus
)) {
1335 __threshold_remove_blocks(bank
);
1339 * The last CPU on this node using the shared bank is going
1340 * away, remove that bank now.
1342 nb
= node_to_amd_nb(topology_amd_node_id(smp_processor_id()));
1347 deallocate_threshold_blocks(bank
);
1350 kobject_put(bank
->kobj
);
1354 static void __threshold_remove_device(struct threshold_bank
**bp
)
1356 unsigned int bank
, numbanks
= this_cpu_read(mce_num_banks
);
1358 for (bank
= 0; bank
< numbanks
; bank
++) {
1362 threshold_remove_bank(bp
[bank
]);
1368 int mce_threshold_remove_device(unsigned int cpu
)
1370 struct threshold_bank
**bp
= this_cpu_read(threshold_banks
);
1376 * Clear the pointer before cleaning up, so that the interrupt won't
1377 * touch anything of this.
1379 this_cpu_write(threshold_banks
, NULL
);
1381 __threshold_remove_device(bp
);
1386 * mce_threshold_create_device - Create the per-CPU MCE threshold device
1387 * @cpu: The plugged in CPU
1389 * Create directories and files for all valid threshold banks.
1391 * This is invoked from the CPU hotplug callback which was installed in
1392 * mcheck_init_device(). The invocation happens in context of the hotplug
1393 * thread running on @cpu. The callback is invoked on all CPUs which are
1394 * online when the callback is installed or during a real hotplug event.
1396 int mce_threshold_create_device(unsigned int cpu
)
1398 unsigned int numbanks
, bank
;
1399 struct threshold_bank
**bp
;
1402 if (!mce_flags
.amd_threshold
)
1405 bp
= this_cpu_read(threshold_banks
);
1409 numbanks
= this_cpu_read(mce_num_banks
);
1410 bp
= kcalloc(numbanks
, sizeof(*bp
), GFP_KERNEL
);
1414 for (bank
= 0; bank
< numbanks
; ++bank
) {
1415 if (!(this_cpu_read(bank_map
) & BIT_ULL(bank
)))
1417 err
= threshold_create_bank(bp
, cpu
, bank
);
1419 __threshold_remove_device(bp
);
1423 this_cpu_write(threshold_banks
, bp
);
1425 if (thresholding_irq_en
)
1426 mce_threshold_vector
= amd_threshold_interrupt
;