1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Machine check exception handling.
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
10 #define pr_fmt(fmt) "mce: " fmt
12 #include <linux/hardirq.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/percpu.h>
16 #include <linux/export.h>
17 #include <linux/irq_work.h>
18 #include <linux/extable.h>
19 #include <linux/ftrace.h>
21 #include <asm/machdep.h>
25 static DEFINE_PER_CPU(int, mce_nest_count
);
26 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event
);
28 /* Queue for delayed MCE events. */
29 static DEFINE_PER_CPU(int, mce_queue_count
);
30 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event_queue
);
32 /* Queue for delayed MCE UE events. */
33 static DEFINE_PER_CPU(int, mce_ue_count
);
34 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
],
37 static void machine_check_process_queued_event(struct irq_work
*work
);
38 static void machine_check_ue_irq_work(struct irq_work
*work
);
39 static void machine_check_ue_event(struct machine_check_event
*evt
);
40 static void machine_process_ue_event(struct work_struct
*work
);
42 static struct irq_work mce_event_process_work
= {
43 .func
= machine_check_process_queued_event
,
46 static struct irq_work mce_ue_event_irq_work
= {
47 .func
= machine_check_ue_irq_work
,
50 DECLARE_WORK(mce_ue_event_work
, machine_process_ue_event
);
52 static BLOCKING_NOTIFIER_HEAD(mce_notifier_list
);
54 int mce_register_notifier(struct notifier_block
*nb
)
56 return blocking_notifier_chain_register(&mce_notifier_list
, nb
);
58 EXPORT_SYMBOL_GPL(mce_register_notifier
);
60 int mce_unregister_notifier(struct notifier_block
*nb
)
62 return blocking_notifier_chain_unregister(&mce_notifier_list
, nb
);
64 EXPORT_SYMBOL_GPL(mce_unregister_notifier
);
66 static void mce_set_error_info(struct machine_check_event
*mce
,
67 struct mce_error_info
*mce_err
)
69 mce
->error_type
= mce_err
->error_type
;
70 switch (mce_err
->error_type
) {
71 case MCE_ERROR_TYPE_UE
:
72 mce
->u
.ue_error
.ue_error_type
= mce_err
->u
.ue_error_type
;
74 case MCE_ERROR_TYPE_SLB
:
75 mce
->u
.slb_error
.slb_error_type
= mce_err
->u
.slb_error_type
;
77 case MCE_ERROR_TYPE_ERAT
:
78 mce
->u
.erat_error
.erat_error_type
= mce_err
->u
.erat_error_type
;
80 case MCE_ERROR_TYPE_TLB
:
81 mce
->u
.tlb_error
.tlb_error_type
= mce_err
->u
.tlb_error_type
;
83 case MCE_ERROR_TYPE_USER
:
84 mce
->u
.user_error
.user_error_type
= mce_err
->u
.user_error_type
;
86 case MCE_ERROR_TYPE_RA
:
87 mce
->u
.ra_error
.ra_error_type
= mce_err
->u
.ra_error_type
;
89 case MCE_ERROR_TYPE_LINK
:
90 mce
->u
.link_error
.link_error_type
= mce_err
->u
.link_error_type
;
92 case MCE_ERROR_TYPE_UNKNOWN
:
99 * Decode and save high level MCE information into per cpu buffer which
100 * is an array of machine_check_event structure.
102 void save_mce_event(struct pt_regs
*regs
, long handled
,
103 struct mce_error_info
*mce_err
,
104 uint64_t nip
, uint64_t addr
, uint64_t phys_addr
)
106 int index
= __this_cpu_inc_return(mce_nest_count
) - 1;
107 struct machine_check_event
*mce
= this_cpu_ptr(&mce_event
[index
]);
110 * Return if we don't have enough space to log mce event.
111 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
112 * the check below will stop buffer overrun.
114 if (index
>= MAX_MC_EVT
)
117 /* Populate generic machine check info */
118 mce
->version
= MCE_V1
;
120 mce
->srr1
= regs
->msr
;
121 mce
->gpr3
= regs
->gpr
[3];
123 mce
->cpu
= get_paca()->paca_index
;
125 /* Mark it recovered if we have handled it and MSR(RI=1). */
126 if (handled
&& (regs
->msr
& MSR_RI
))
127 mce
->disposition
= MCE_DISPOSITION_RECOVERED
;
129 mce
->disposition
= MCE_DISPOSITION_NOT_RECOVERED
;
131 mce
->initiator
= mce_err
->initiator
;
132 mce
->severity
= mce_err
->severity
;
133 mce
->sync_error
= mce_err
->sync_error
;
134 mce
->error_class
= mce_err
->error_class
;
137 * Populate the mce error_type and type-specific error_type.
139 mce_set_error_info(mce
, mce_err
);
144 if (mce
->error_type
== MCE_ERROR_TYPE_TLB
) {
145 mce
->u
.tlb_error
.effective_address_provided
= true;
146 mce
->u
.tlb_error
.effective_address
= addr
;
147 } else if (mce
->error_type
== MCE_ERROR_TYPE_SLB
) {
148 mce
->u
.slb_error
.effective_address_provided
= true;
149 mce
->u
.slb_error
.effective_address
= addr
;
150 } else if (mce
->error_type
== MCE_ERROR_TYPE_ERAT
) {
151 mce
->u
.erat_error
.effective_address_provided
= true;
152 mce
->u
.erat_error
.effective_address
= addr
;
153 } else if (mce
->error_type
== MCE_ERROR_TYPE_USER
) {
154 mce
->u
.user_error
.effective_address_provided
= true;
155 mce
->u
.user_error
.effective_address
= addr
;
156 } else if (mce
->error_type
== MCE_ERROR_TYPE_RA
) {
157 mce
->u
.ra_error
.effective_address_provided
= true;
158 mce
->u
.ra_error
.effective_address
= addr
;
159 } else if (mce
->error_type
== MCE_ERROR_TYPE_LINK
) {
160 mce
->u
.link_error
.effective_address_provided
= true;
161 mce
->u
.link_error
.effective_address
= addr
;
162 } else if (mce
->error_type
== MCE_ERROR_TYPE_UE
) {
163 mce
->u
.ue_error
.effective_address_provided
= true;
164 mce
->u
.ue_error
.effective_address
= addr
;
165 if (phys_addr
!= ULONG_MAX
) {
166 mce
->u
.ue_error
.physical_address_provided
= true;
167 mce
->u
.ue_error
.physical_address
= phys_addr
;
168 mce
->u
.ue_error
.ignore_event
= mce_err
->ignore_event
;
169 machine_check_ue_event(mce
);
177 * mce Pointer to machine_check_event structure to be filled.
178 * release Flag to indicate whether to free the event slot or not.
179 * 0 <= do not release the mce event. Caller will invoke
180 * release_mce_event() once event has been consumed.
181 * 1 <= release the slot.
186 * get_mce_event() will be called by platform specific machine check
187 * handle routine and in KVM.
188 * When we call get_mce_event(), we are still in interrupt context and
189 * preemption will not be scheduled until ret_from_expect() routine
192 int get_mce_event(struct machine_check_event
*mce
, bool release
)
194 int index
= __this_cpu_read(mce_nest_count
) - 1;
195 struct machine_check_event
*mc_evt
;
202 /* Check if we have MCE info to process. */
203 if (index
< MAX_MC_EVT
) {
204 mc_evt
= this_cpu_ptr(&mce_event
[index
]);
205 /* Copy the event structure and release the original */
212 /* Decrement the count to free the slot. */
214 __this_cpu_dec(mce_nest_count
);
219 void release_mce_event(void)
221 get_mce_event(NULL
, true);
224 static void machine_check_ue_irq_work(struct irq_work
*work
)
226 schedule_work(&mce_ue_event_work
);
230 * Queue up the MCE event which then can be handled later.
232 static void machine_check_ue_event(struct machine_check_event
*evt
)
236 index
= __this_cpu_inc_return(mce_ue_count
) - 1;
237 /* If queue is full, just return for now. */
238 if (index
>= MAX_MC_EVT
) {
239 __this_cpu_dec(mce_ue_count
);
242 memcpy(this_cpu_ptr(&mce_ue_event_queue
[index
]), evt
, sizeof(*evt
));
244 /* Queue work to process this event later. */
245 irq_work_queue(&mce_ue_event_irq_work
);
249 * Queue up the MCE event which then can be handled later.
251 void machine_check_queue_event(void)
254 struct machine_check_event evt
;
256 if (!get_mce_event(&evt
, MCE_EVENT_RELEASE
))
259 index
= __this_cpu_inc_return(mce_queue_count
) - 1;
260 /* If queue is full, just return for now. */
261 if (index
>= MAX_MC_EVT
) {
262 __this_cpu_dec(mce_queue_count
);
265 memcpy(this_cpu_ptr(&mce_event_queue
[index
]), &evt
, sizeof(evt
));
267 /* Queue irq work to process this event later. */
268 irq_work_queue(&mce_event_process_work
);
271 void mce_common_process_ue(struct pt_regs
*regs
,
272 struct mce_error_info
*mce_err
)
274 const struct exception_table_entry
*entry
;
276 entry
= search_kernel_exception_table(regs
->nip
);
278 mce_err
->ignore_event
= true;
279 regs
->nip
= extable_fixup(entry
);
284 * process pending MCE event from the mce event queue. This function will be
285 * called during syscall exit.
287 static void machine_process_ue_event(struct work_struct
*work
)
290 struct machine_check_event
*evt
;
292 while (__this_cpu_read(mce_ue_count
) > 0) {
293 index
= __this_cpu_read(mce_ue_count
) - 1;
294 evt
= this_cpu_ptr(&mce_ue_event_queue
[index
]);
295 blocking_notifier_call_chain(&mce_notifier_list
, 0, evt
);
296 #ifdef CONFIG_MEMORY_FAILURE
298 * This should probably queued elsewhere, but
301 * Don't report this machine check because the caller has a
302 * asked us to ignore the event, it has a fixup handler which
303 * will do the appropriate error handling and reporting.
305 if (evt
->error_type
== MCE_ERROR_TYPE_UE
) {
306 if (evt
->u
.ue_error
.ignore_event
) {
307 __this_cpu_dec(mce_ue_count
);
311 if (evt
->u
.ue_error
.physical_address_provided
) {
314 pfn
= evt
->u
.ue_error
.physical_address
>>
316 memory_failure(pfn
, 0);
318 pr_warn("Failed to identify bad address from "
319 "where the uncorrectable error (UE) "
323 __this_cpu_dec(mce_ue_count
);
327 * process pending MCE event from the mce event queue. This function will be
328 * called during syscall exit.
330 static void machine_check_process_queued_event(struct irq_work
*work
)
333 struct machine_check_event
*evt
;
335 add_taint(TAINT_MACHINE_CHECK
, LOCKDEP_NOW_UNRELIABLE
);
338 * For now just print it to console.
339 * TODO: log this error event to FSP or nvram.
341 while (__this_cpu_read(mce_queue_count
) > 0) {
342 index
= __this_cpu_read(mce_queue_count
) - 1;
343 evt
= this_cpu_ptr(&mce_event_queue
[index
]);
345 if (evt
->error_type
== MCE_ERROR_TYPE_UE
&&
346 evt
->u
.ue_error
.ignore_event
) {
347 __this_cpu_dec(mce_queue_count
);
350 machine_check_print_event_info(evt
, false, false);
351 __this_cpu_dec(mce_queue_count
);
355 void machine_check_print_event_info(struct machine_check_event
*evt
,
356 bool user_mode
, bool in_guest
)
358 const char *level
, *sevstr
, *subtype
, *err_type
, *initiator
;
359 uint64_t ea
= 0, pa
= 0;
363 static const char *mc_ue_types
[] = {
366 "Page table walk ifetch",
368 "Page table walk Load/Store",
370 static const char *mc_slb_types
[] = {
375 static const char *mc_erat_types
[] = {
380 static const char *mc_tlb_types
[] = {
385 static const char *mc_user_types
[] = {
390 static const char *mc_ra_types
[] = {
392 "Instruction fetch (bad)",
393 "Instruction fetch (foreign)",
394 "Page table walk ifetch (bad)",
395 "Page table walk ifetch (foreign)",
398 "Page table walk Load/Store (bad)",
399 "Page table walk Load/Store (foreign)",
400 "Load/Store (foreign)",
402 static const char *mc_link_types
[] = {
404 "Instruction fetch (timeout)",
405 "Page table walk ifetch (timeout)",
408 "Page table walk Load/Store (timeout)",
410 static const char *mc_error_class
[] = {
413 "Probable Hardware error (some chance of software cause)",
415 "Probable Software error (some chance of hardware cause)",
418 /* Print things out */
419 if (evt
->version
!= MCE_V1
) {
420 pr_err("Machine Check Exception, Unknown event version %d !\n",
424 switch (evt
->severity
) {
425 case MCE_SEV_NO_ERROR
:
429 case MCE_SEV_WARNING
:
430 level
= KERN_WARNING
;
444 switch(evt
->initiator
) {
445 case MCE_INITIATOR_CPU
:
448 case MCE_INITIATOR_PCI
:
451 case MCE_INITIATOR_ISA
:
454 case MCE_INITIATOR_MEMORY
:
455 initiator
= "Memory";
457 case MCE_INITIATOR_POWERMGM
:
458 initiator
= "Power Management";
460 case MCE_INITIATOR_UNKNOWN
:
462 initiator
= "Unknown";
466 switch (evt
->error_type
) {
467 case MCE_ERROR_TYPE_UE
:
469 subtype
= evt
->u
.ue_error
.ue_error_type
<
470 ARRAY_SIZE(mc_ue_types
) ?
471 mc_ue_types
[evt
->u
.ue_error
.ue_error_type
]
473 if (evt
->u
.ue_error
.effective_address_provided
)
474 ea
= evt
->u
.ue_error
.effective_address
;
475 if (evt
->u
.ue_error
.physical_address_provided
)
476 pa
= evt
->u
.ue_error
.physical_address
;
478 case MCE_ERROR_TYPE_SLB
:
480 subtype
= evt
->u
.slb_error
.slb_error_type
<
481 ARRAY_SIZE(mc_slb_types
) ?
482 mc_slb_types
[evt
->u
.slb_error
.slb_error_type
]
484 if (evt
->u
.slb_error
.effective_address_provided
)
485 ea
= evt
->u
.slb_error
.effective_address
;
487 case MCE_ERROR_TYPE_ERAT
:
489 subtype
= evt
->u
.erat_error
.erat_error_type
<
490 ARRAY_SIZE(mc_erat_types
) ?
491 mc_erat_types
[evt
->u
.erat_error
.erat_error_type
]
493 if (evt
->u
.erat_error
.effective_address_provided
)
494 ea
= evt
->u
.erat_error
.effective_address
;
496 case MCE_ERROR_TYPE_TLB
:
498 subtype
= evt
->u
.tlb_error
.tlb_error_type
<
499 ARRAY_SIZE(mc_tlb_types
) ?
500 mc_tlb_types
[evt
->u
.tlb_error
.tlb_error_type
]
502 if (evt
->u
.tlb_error
.effective_address_provided
)
503 ea
= evt
->u
.tlb_error
.effective_address
;
505 case MCE_ERROR_TYPE_USER
:
507 subtype
= evt
->u
.user_error
.user_error_type
<
508 ARRAY_SIZE(mc_user_types
) ?
509 mc_user_types
[evt
->u
.user_error
.user_error_type
]
511 if (evt
->u
.user_error
.effective_address_provided
)
512 ea
= evt
->u
.user_error
.effective_address
;
514 case MCE_ERROR_TYPE_RA
:
515 err_type
= "Real address";
516 subtype
= evt
->u
.ra_error
.ra_error_type
<
517 ARRAY_SIZE(mc_ra_types
) ?
518 mc_ra_types
[evt
->u
.ra_error
.ra_error_type
]
520 if (evt
->u
.ra_error
.effective_address_provided
)
521 ea
= evt
->u
.ra_error
.effective_address
;
523 case MCE_ERROR_TYPE_LINK
:
525 subtype
= evt
->u
.link_error
.link_error_type
<
526 ARRAY_SIZE(mc_link_types
) ?
527 mc_link_types
[evt
->u
.link_error
.link_error_type
]
529 if (evt
->u
.link_error
.effective_address_provided
)
530 ea
= evt
->u
.link_error
.effective_address
;
532 case MCE_ERROR_TYPE_DCACHE
:
533 err_type
= "D-Cache";
536 case MCE_ERROR_TYPE_ICACHE
:
537 err_type
= "I-Cache";
541 case MCE_ERROR_TYPE_UNKNOWN
:
542 err_type
= "Unknown";
547 dar_str
[0] = pa_str
[0] = '\0';
548 if (ea
&& evt
->srr0
!= ea
) {
549 /* Load/Store address */
550 n
= sprintf(dar_str
, "DAR: %016llx ", ea
);
552 sprintf(dar_str
+ n
, "paddr: %016llx ", pa
);
554 sprintf(pa_str
, " paddr: %016llx", pa
);
557 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
558 level
, evt
->cpu
, sevstr
, in_guest
? "Guest" : "",
559 err_type
, subtype
, dar_str
,
560 evt
->disposition
== MCE_DISPOSITION_RECOVERED
?
561 "Recovered" : "Not recovered");
563 if (in_guest
|| user_mode
) {
564 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
565 level
, evt
->cpu
, current
->pid
, current
->comm
,
566 in_guest
? "Guest " : "", evt
->srr0
, pa_str
);
568 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
569 level
, evt
->cpu
, evt
->srr0
, (void *)evt
->srr0
, pa_str
);
572 printk("%sMCE: CPU%d: Initiator %s\n", level
, evt
->cpu
, initiator
);
574 subtype
= evt
->error_class
< ARRAY_SIZE(mc_error_class
) ?
575 mc_error_class
[evt
->error_class
] : "Unknown";
576 printk("%sMCE: CPU%d: %s\n", level
, evt
->cpu
, subtype
);
578 #ifdef CONFIG_PPC_BOOK3S_64
579 /* Display faulty slb contents for SLB errors. */
580 if (evt
->error_type
== MCE_ERROR_TYPE_SLB
&& !in_guest
)
581 slb_dump_contents(local_paca
->mce_faulty_slbs
);
584 EXPORT_SYMBOL_GPL(machine_check_print_event_info
);
587 * This function is called in real mode. Strictly no printk's please.
589 * regs->nip and regs->msr contains srr0 and ssr1.
591 long notrace
machine_check_early(struct pt_regs
*regs
)
594 u8 ftrace_enabled
= this_cpu_get_ftrace_enabled();
596 this_cpu_set_ftrace_enabled(0);
597 /* Do not use nmi_enter/exit for pseries hpte guest */
598 if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR
))
601 hv_nmi_check_nonrecoverable(regs
);
604 * See if platform is capable of handling machine check.
606 if (ppc_md
.machine_check_early
)
607 handled
= ppc_md
.machine_check_early(regs
);
609 if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR
))
612 this_cpu_set_ftrace_enabled(ftrace_enabled
);
617 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
620 DTRIG_VECTOR_CI
, /* need to emulate vector CI load instr */
621 DTRIG_SUSPEND_ESCAPE
, /* need to escape from TM suspend mode */
622 } hmer_debug_trig_function
;
624 static int init_debug_trig_function(void)
627 struct device_node
*cpun
;
628 struct property
*prop
= NULL
;
631 /* First look in the device tree */
633 cpun
= of_get_cpu_node(smp_processor_id(), NULL
);
635 of_property_for_each_string(cpun
, "ibm,hmi-special-triggers",
637 if (strcmp(str
, "bit17-vector-ci-load") == 0)
638 hmer_debug_trig_function
= DTRIG_VECTOR_CI
;
639 else if (strcmp(str
, "bit17-tm-suspend-escape") == 0)
640 hmer_debug_trig_function
= DTRIG_SUSPEND_ESCAPE
;
646 /* If we found the property, don't look at PVR */
650 pvr
= mfspr(SPRN_PVR
);
651 /* Check for POWER9 Nimbus (scale-out) */
652 if ((PVR_VER(pvr
) == PVR_POWER9
) && (pvr
& 0xe000) == 0) {
653 /* DD2.2 and later */
654 if ((pvr
& 0xfff) >= 0x202)
655 hmer_debug_trig_function
= DTRIG_SUSPEND_ESCAPE
;
656 /* DD2.0 and DD2.1 - used for vector CI load emulation */
657 else if ((pvr
& 0xfff) >= 0x200)
658 hmer_debug_trig_function
= DTRIG_VECTOR_CI
;
662 switch (hmer_debug_trig_function
) {
663 case DTRIG_VECTOR_CI
:
664 pr_debug("HMI debug trigger used for vector CI load\n");
666 case DTRIG_SUSPEND_ESCAPE
:
667 pr_debug("HMI debug trigger used for TM suspend escape\n");
674 __initcall(init_debug_trig_function
);
677 * Handle HMIs that occur as a result of a debug trigger.
679 * -1 means this is not a HMI cause that we know about
680 * 0 means no further handling is required
681 * 1 means further handling is required
683 long hmi_handle_debugtrig(struct pt_regs
*regs
)
685 unsigned long hmer
= mfspr(SPRN_HMER
);
688 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
689 if (!((hmer
& HMER_DEBUG_TRIG
)
690 && hmer_debug_trig_function
!= DTRIG_UNKNOWN
))
693 hmer
&= ~HMER_DEBUG_TRIG
;
694 /* HMER is a write-AND register */
695 mtspr(SPRN_HMER
, ~HMER_DEBUG_TRIG
);
697 switch (hmer_debug_trig_function
) {
698 case DTRIG_VECTOR_CI
:
700 * Now to avoid problems with soft-disable we
701 * only do the emulation if we are coming from
704 if (regs
&& user_mode(regs
))
705 ret
= local_paca
->hmi_p9_special_emu
= 1;
714 * See if any other HMI causes remain to be handled
716 if (hmer
& mfspr(SPRN_HMEER
))
725 long hmi_exception_realmode(struct pt_regs
*regs
)
729 local_paca
->hmi_irqs
++;
731 ret
= hmi_handle_debugtrig(regs
);
735 wait_for_subcore_guest_exit();
737 if (ppc_md
.hmi_exception_early
)
738 ppc_md
.hmi_exception_early(regs
);
740 wait_for_tb_resync();