1 // SPDX-License-Identifier: GPL-2.0-only
4 * Copyright 2008, 2009 Intel Corporation.
8 #include <linux/kernel.h>
9 #include <linux/seq_file.h>
10 #include <linux/init.h>
11 #include <linux/debugfs.h>
12 #include <linux/uaccess.h>
15 #include <asm/cpu_device_id.h>
16 #include <asm/traps.h>
18 #include <asm/insn-eval.h>
23 * Grade an mce by severity. In general the most severe ones are processed
24 * first. Since there are quite a lot of combinations test the bits in a
25 * table-driven way. The rules are simply processed in order, first
28 * Note this is only used for machine check exceptions, the corrected
29 * errors use much simpler rules. The exceptions still check for the corrected
30 * errors, but only to leave them alone for the CMCI handler (except for
34 enum context
{ IN_KERNEL
= 1, IN_USER
= 2, IN_KERNEL_RECOV
= 3 };
35 enum ser
{ SER_REQUIRED
= 1, NO_SER
= 2 };
36 enum exception
{ EXCP_CONTEXT
= 1, NO_EXCP
= 2 };
38 static struct severity
{
42 unsigned short mcgmask
;
43 unsigned short mcgres
;
45 unsigned char context
;
47 unsigned char covered
;
49 unsigned char cpu_minstepping
;
50 unsigned char bank_lo
, bank_hi
;
53 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
54 #define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
55 #define VFM_STEPPING(m, s) .cpu_vfm = m, .cpu_minstepping = s
56 #define KERNEL .context = IN_KERNEL
57 #define USER .context = IN_USER
58 #define KERNEL_RECOV .context = IN_KERNEL_RECOV
59 #define SER .ser = SER_REQUIRED
60 #define NOSER .ser = NO_SER
61 #define EXCP .excp = EXCP_CONTEXT
62 #define NOEXCP .excp = NO_EXCP
63 #define BITCLR(x) .mask = x, .result = 0
64 #define BITSET(x) .mask = x, .result = x
65 #define MCGMASK(x, y) .mcgmask = x, .mcgres = y
66 #define MASK(x, y) .mask = x, .result = y
67 #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
68 #define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
69 #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
70 #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
74 BITCLR(MCI_STATUS_VAL
)
78 EXCP
, BITCLR(MCI_STATUS_EN
)
81 PANIC
, "Processor context corrupt",
82 BITSET(MCI_STATUS_PCC
)
84 /* When MCIP is not set something is very confused */
86 PANIC
, "MCIP not set in MCA handler",
87 EXCP
, MCGMASK(MCG_STATUS_MCIP
, 0)
89 /* Neither return not error IP -- no chance to recover -> PANIC */
91 PANIC
, "Neither restart nor error IP",
92 EXCP
, MCGMASK(MCG_STATUS_RIPV
|MCG_STATUS_EIPV
, 0)
95 PANIC
, "In kernel and no restart IP",
96 EXCP
, KERNEL
, MCGMASK(MCG_STATUS_RIPV
, 0)
99 PANIC
, "In kernel and no restart IP",
100 EXCP
, KERNEL_RECOV
, MCGMASK(MCG_STATUS_RIPV
, 0)
103 KEEP
, "Corrected error",
104 NOSER
, BITCLR(MCI_STATUS_UC
)
107 * known AO MCACODs reported via MCE or CMC:
109 * SRAO could be signaled either via a machine check exception or
110 * CMCI with the corresponding bit S 1 or 0. So we don't need to
111 * check bit S for SRAO.
114 AO
, "Action optional: memory scrubbing error",
115 SER
, MASK(MCI_UC_AR
|MCACOD_SCRUBMSK
, MCI_STATUS_UC
|MCACOD_SCRUB
)
118 AO
, "Action optional: last level cache writeback error",
119 SER
, MASK(MCI_UC_AR
|MCACOD
, MCI_STATUS_UC
|MCACOD_L3WB
)
122 * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
123 * to report uncorrected errors using CMCI with a special signature.
124 * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
125 * in one of the memory controller banks.
126 * Set severity to "AO" for same action as normal patrol scrub error.
129 AO
, "Uncorrected Patrol Scrub Error",
130 SER
, MASK(MCI_STATUS_UC
|MCI_ADDR
|0xffffeff0, MCI_ADDR
|0x001000c0),
131 VFM_STEPPING(INTEL_SKYLAKE_X
, 4), BANK_RANGE(13, 18)
134 /* ignore OVER for UCNA */
136 UCNA
, "Uncorrected no action required",
137 SER
, MASK(MCI_UC_SAR
, MCI_STATUS_UC
)
140 PANIC
, "Illegal combination (UCNA with AR=1)",
142 MASK(MCI_STATUS_OVER
|MCI_UC_SAR
, MCI_STATUS_UC
|MCI_STATUS_AR
)
145 KEEP
, "Non signaled machine check",
146 SER
, BITCLR(MCI_STATUS_S
)
150 PANIC
, "Action required with lost events",
151 SER
, BITSET(MCI_STATUS_OVER
|MCI_UC_SAR
)
154 /* known AR MCACODs: */
155 #ifdef CONFIG_MEMORY_FAILURE
157 KEEP
, "Action required but unaffected thread is continuable",
158 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
, MCI_UC_SAR
|MCI_ADDR
),
159 MCGMASK(MCG_STATUS_RIPV
|MCG_STATUS_EIPV
, MCG_STATUS_RIPV
)
162 AR
, "Action required: data load in error recoverable area of kernel",
163 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_DATA
),
167 AR
, "Action required: data load error in a user process",
168 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_DATA
),
172 AR
, "Action required: instruction fetch error in a user process",
173 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_INSTR
),
177 AR
, "Data load error in SEAM non-root mode",
178 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_DATA
),
179 MCGMASK(MCG_STATUS_SEAM_NR
, MCG_STATUS_SEAM_NR
),
183 AR
, "Instruction fetch error in SEAM non-root mode",
184 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_INSTR
),
185 MCGMASK(MCG_STATUS_SEAM_NR
, MCG_STATUS_SEAM_NR
),
189 PANIC
, "Data load in unrecoverable area of kernel",
190 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_DATA
),
194 PANIC
, "Instruction fetch error in kernel",
195 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
|MCI_ADDR
|MCACOD
, MCI_UC_SAR
|MCI_ADDR
|MCACOD_INSTR
),
200 PANIC
, "Action required: unknown MCACOD",
201 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
, MCI_UC_SAR
)
205 SOME
, "Action optional: unknown MCACOD",
206 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
, MCI_UC_S
)
209 SOME
, "Action optional with lost events",
210 SER
, MASK(MCI_STATUS_OVER
|MCI_UC_SAR
, MCI_STATUS_OVER
|MCI_UC_S
)
214 PANIC
, "Overflowed uncorrected",
215 BITSET(MCI_STATUS_OVER
|MCI_STATUS_UC
)
218 PANIC
, "Uncorrected in kernel",
219 BITSET(MCI_STATUS_UC
),
224 BITSET(MCI_STATUS_UC
)
229 ) /* always matches. keep at end */
232 #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
233 (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
235 static bool is_copy_from_user(struct pt_regs
*regs
)
237 u8 insn_buf
[MAX_INSN_SIZE
];
245 if (copy_from_kernel_nofault(insn_buf
, (void *)regs
->ip
, MAX_INSN_SIZE
))
248 ret
= insn_decode_kernel(&insn
, insn_buf
);
252 switch (insn
.opcode
.value
) {
254 case 0x8A: case 0x8B:
256 case 0xB60F: case 0xB70F:
257 addr
= (unsigned long)insn_get_addr_ref(&insn
, regs
);
260 case 0xA4: case 0xA5:
267 if (fault_in_kernel_space(addr
))
270 current
->mce_vaddr
= (void __user
*)addr
;
276 * If mcgstatus indicated that ip/cs on the stack were
277 * no good, then "m->cs" will be zero and we will have
278 * to assume the worst case (IN_KERNEL) as we actually
279 * have no idea what we were executing when the machine
281 * If we do have a good "m->cs" (or a faked one in the
282 * case we were executing in VM86 mode) we can use it to
283 * distinguish an exception taken in user from from one
284 * taken in the kernel.
286 static noinstr
int error_context(struct mce
*m
, struct pt_regs
*regs
)
291 if ((m
->cs
& 3) == 3)
294 if (!mc_recoverable(m
->mcgstatus
))
297 /* Allow instrumentation around external facilities usage. */
298 instrumentation_begin();
299 fixup_type
= ex_get_fixup_type(m
->ip
);
300 copy_user
= is_copy_from_user(regs
);
301 instrumentation_end();
303 switch (fixup_type
) {
304 case EX_TYPE_UACCESS
:
307 m
->kflags
|= MCE_IN_KERNEL_COPYIN
;
310 case EX_TYPE_FAULT_MCE_SAFE
:
311 case EX_TYPE_DEFAULT_MCE_SAFE
:
312 m
->kflags
|= MCE_IN_KERNEL_RECOV
;
313 return IN_KERNEL_RECOV
;
320 /* See AMD PPR(s) section Machine Check Error Handling. */
321 static noinstr
int mce_severity_amd(struct mce
*m
, struct pt_regs
*regs
, char **msg
, bool is_excp
)
323 char *panic_msg
= NULL
;
327 * Default return value: Action required, the error must be handled
330 ret
= MCE_AR_SEVERITY
;
332 /* Processor Context Corrupt, no need to fumble too much, die! */
333 if (m
->status
& MCI_STATUS_PCC
) {
334 panic_msg
= "Processor Context Corrupt";
335 ret
= MCE_PANIC_SEVERITY
;
339 if (m
->status
& MCI_STATUS_DEFERRED
) {
340 ret
= MCE_DEFERRED_SEVERITY
;
345 * If the UC bit is not set, the system either corrected or deferred
346 * the error. No action will be required after logging the error.
348 if (!(m
->status
& MCI_STATUS_UC
)) {
349 ret
= MCE_KEEP_SEVERITY
;
354 * On MCA overflow, without the MCA overflow recovery feature the
355 * system will not be able to recover, panic.
357 if ((m
->status
& MCI_STATUS_OVER
) && !mce_flags
.overflow_recov
) {
358 panic_msg
= "Overflowed uncorrected error without MCA Overflow Recovery";
359 ret
= MCE_PANIC_SEVERITY
;
363 if (!mce_flags
.succor
) {
364 panic_msg
= "Uncorrected error without MCA Recovery";
365 ret
= MCE_PANIC_SEVERITY
;
369 if (error_context(m
, regs
) == IN_KERNEL
) {
370 panic_msg
= "Uncorrected unrecoverable error in kernel context";
371 ret
= MCE_PANIC_SEVERITY
;
375 if (msg
&& panic_msg
)
381 static noinstr
int mce_severity_intel(struct mce
*m
, struct pt_regs
*regs
, char **msg
, bool is_excp
)
383 enum exception excp
= (is_excp
? EXCP_CONTEXT
: NO_EXCP
);
384 enum context ctx
= error_context(m
, regs
);
387 for (s
= severities
;; s
++) {
388 if ((m
->status
& s
->mask
) != s
->result
)
390 if ((m
->mcgstatus
& s
->mcgmask
) != s
->mcgres
)
392 if (s
->ser
== SER_REQUIRED
&& !mca_cfg
.ser
)
394 if (s
->ser
== NO_SER
&& mca_cfg
.ser
)
396 if (s
->context
&& ctx
!= s
->context
)
398 if (s
->excp
&& excp
!= s
->excp
)
400 if (s
->cpu_vfm
&& boot_cpu_data
.x86_vfm
!= s
->cpu_vfm
)
402 if (s
->cpu_minstepping
&& boot_cpu_data
.x86_stepping
< s
->cpu_minstepping
)
404 if (s
->bank_lo
&& (m
->bank
< s
->bank_lo
|| m
->bank
> s
->bank_hi
))
414 int noinstr
mce_severity(struct mce
*m
, struct pt_regs
*regs
, char **msg
, bool is_excp
)
416 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
||
417 boot_cpu_data
.x86_vendor
== X86_VENDOR_HYGON
)
418 return mce_severity_amd(m
, regs
, msg
, is_excp
);
420 return mce_severity_intel(m
, regs
, msg
, is_excp
);
423 #ifdef CONFIG_DEBUG_FS
424 static void *s_start(struct seq_file
*f
, loff_t
*pos
)
426 if (*pos
>= ARRAY_SIZE(severities
))
428 return &severities
[*pos
];
431 static void *s_next(struct seq_file
*f
, void *data
, loff_t
*pos
)
433 if (++(*pos
) >= ARRAY_SIZE(severities
))
435 return &severities
[*pos
];
438 static void s_stop(struct seq_file
*f
, void *data
)
442 static int s_show(struct seq_file
*f
, void *data
)
444 struct severity
*ser
= data
;
445 seq_printf(f
, "%d\t%s\n", ser
->covered
, ser
->msg
);
449 static const struct seq_operations severities_seq_ops
= {
456 static int severities_coverage_open(struct inode
*inode
, struct file
*file
)
458 return seq_open(file
, &severities_seq_ops
);
461 static ssize_t
severities_coverage_write(struct file
*file
,
462 const char __user
*ubuf
,
463 size_t count
, loff_t
*ppos
)
466 for (i
= 0; i
< ARRAY_SIZE(severities
); i
++)
467 severities
[i
].covered
= 0;
471 static const struct file_operations severities_coverage_fops
= {
472 .open
= severities_coverage_open
,
473 .release
= seq_release
,
475 .write
= severities_coverage_write
,
479 static int __init
severities_debugfs_init(void)
483 dmce
= mce_get_debugfs_dir();
485 debugfs_create_file("severities-coverage", 0444, dmce
, NULL
,
486 &severities_coverage_fops
);
489 late_initcall(severities_debugfs_init
);
490 #endif /* CONFIG_DEBUG_FS */