1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Machine check exception handling.
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
10 #define pr_fmt(fmt) "mce: " fmt
12 #include <linux/hardirq.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/percpu.h>
16 #include <linux/export.h>
17 #include <linux/irq_work.h>
19 #include <asm/machdep.h>
23 static DEFINE_PER_CPU(int, mce_nest_count
);
24 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event
);
26 /* Queue for delayed MCE events. */
27 static DEFINE_PER_CPU(int, mce_queue_count
);
28 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event_queue
);
30 /* Queue for delayed MCE UE events. */
31 static DEFINE_PER_CPU(int, mce_ue_count
);
32 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
],
35 static void machine_check_process_queued_event(struct irq_work
*work
);
36 static void machine_check_ue_irq_work(struct irq_work
*work
);
37 static void machine_check_ue_event(struct machine_check_event
*evt
);
38 static void machine_process_ue_event(struct work_struct
*work
);
40 static struct irq_work mce_event_process_work
= {
41 .func
= machine_check_process_queued_event
,
44 static struct irq_work mce_ue_event_irq_work
= {
45 .func
= machine_check_ue_irq_work
,
48 DECLARE_WORK(mce_ue_event_work
, machine_process_ue_event
);
50 static void mce_set_error_info(struct machine_check_event
*mce
,
51 struct mce_error_info
*mce_err
)
53 mce
->error_type
= mce_err
->error_type
;
54 switch (mce_err
->error_type
) {
55 case MCE_ERROR_TYPE_UE
:
56 mce
->u
.ue_error
.ue_error_type
= mce_err
->u
.ue_error_type
;
58 case MCE_ERROR_TYPE_SLB
:
59 mce
->u
.slb_error
.slb_error_type
= mce_err
->u
.slb_error_type
;
61 case MCE_ERROR_TYPE_ERAT
:
62 mce
->u
.erat_error
.erat_error_type
= mce_err
->u
.erat_error_type
;
64 case MCE_ERROR_TYPE_TLB
:
65 mce
->u
.tlb_error
.tlb_error_type
= mce_err
->u
.tlb_error_type
;
67 case MCE_ERROR_TYPE_USER
:
68 mce
->u
.user_error
.user_error_type
= mce_err
->u
.user_error_type
;
70 case MCE_ERROR_TYPE_RA
:
71 mce
->u
.ra_error
.ra_error_type
= mce_err
->u
.ra_error_type
;
73 case MCE_ERROR_TYPE_LINK
:
74 mce
->u
.link_error
.link_error_type
= mce_err
->u
.link_error_type
;
76 case MCE_ERROR_TYPE_UNKNOWN
:
83 * Decode and save high level MCE information into per cpu buffer which
84 * is an array of machine_check_event structure.
86 void save_mce_event(struct pt_regs
*regs
, long handled
,
87 struct mce_error_info
*mce_err
,
88 uint64_t nip
, uint64_t addr
, uint64_t phys_addr
)
90 int index
= __this_cpu_inc_return(mce_nest_count
) - 1;
91 struct machine_check_event
*mce
= this_cpu_ptr(&mce_event
[index
]);
94 * Return if we don't have enough space to log mce event.
95 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
96 * the check below will stop buffer overrun.
98 if (index
>= MAX_MC_EVT
)
101 /* Populate generic machine check info */
102 mce
->version
= MCE_V1
;
104 mce
->srr1
= regs
->msr
;
105 mce
->gpr3
= regs
->gpr
[3];
107 mce
->cpu
= get_paca()->paca_index
;
109 /* Mark it recovered if we have handled it and MSR(RI=1). */
110 if (handled
&& (regs
->msr
& MSR_RI
))
111 mce
->disposition
= MCE_DISPOSITION_RECOVERED
;
113 mce
->disposition
= MCE_DISPOSITION_NOT_RECOVERED
;
115 mce
->initiator
= mce_err
->initiator
;
116 mce
->severity
= mce_err
->severity
;
117 mce
->sync_error
= mce_err
->sync_error
;
118 mce
->error_class
= mce_err
->error_class
;
121 * Populate the mce error_type and type-specific error_type.
123 mce_set_error_info(mce
, mce_err
);
128 if (mce
->error_type
== MCE_ERROR_TYPE_TLB
) {
129 mce
->u
.tlb_error
.effective_address_provided
= true;
130 mce
->u
.tlb_error
.effective_address
= addr
;
131 } else if (mce
->error_type
== MCE_ERROR_TYPE_SLB
) {
132 mce
->u
.slb_error
.effective_address_provided
= true;
133 mce
->u
.slb_error
.effective_address
= addr
;
134 } else if (mce
->error_type
== MCE_ERROR_TYPE_ERAT
) {
135 mce
->u
.erat_error
.effective_address_provided
= true;
136 mce
->u
.erat_error
.effective_address
= addr
;
137 } else if (mce
->error_type
== MCE_ERROR_TYPE_USER
) {
138 mce
->u
.user_error
.effective_address_provided
= true;
139 mce
->u
.user_error
.effective_address
= addr
;
140 } else if (mce
->error_type
== MCE_ERROR_TYPE_RA
) {
141 mce
->u
.ra_error
.effective_address_provided
= true;
142 mce
->u
.ra_error
.effective_address
= addr
;
143 } else if (mce
->error_type
== MCE_ERROR_TYPE_LINK
) {
144 mce
->u
.link_error
.effective_address_provided
= true;
145 mce
->u
.link_error
.effective_address
= addr
;
146 } else if (mce
->error_type
== MCE_ERROR_TYPE_UE
) {
147 mce
->u
.ue_error
.effective_address_provided
= true;
148 mce
->u
.ue_error
.effective_address
= addr
;
149 if (phys_addr
!= ULONG_MAX
) {
150 mce
->u
.ue_error
.physical_address_provided
= true;
151 mce
->u
.ue_error
.physical_address
= phys_addr
;
152 mce
->u
.ue_error
.ignore_event
= mce_err
->ignore_event
;
153 machine_check_ue_event(mce
);
161 * mce Pointer to machine_check_event structure to be filled.
162 * release Flag to indicate whether to free the event slot or not.
163 * 0 <= do not release the mce event. Caller will invoke
164 * release_mce_event() once event has been consumed.
165 * 1 <= release the slot.
170 * get_mce_event() will be called by platform specific machine check
171 * handle routine and in KVM.
172 * When we call get_mce_event(), we are still in interrupt context and
173 * preemption will not be scheduled until ret_from_expect() routine
176 int get_mce_event(struct machine_check_event
*mce
, bool release
)
178 int index
= __this_cpu_read(mce_nest_count
) - 1;
179 struct machine_check_event
*mc_evt
;
186 /* Check if we have MCE info to process. */
187 if (index
< MAX_MC_EVT
) {
188 mc_evt
= this_cpu_ptr(&mce_event
[index
]);
189 /* Copy the event structure and release the original */
196 /* Decrement the count to free the slot. */
198 __this_cpu_dec(mce_nest_count
);
203 void release_mce_event(void)
205 get_mce_event(NULL
, true);
208 static void machine_check_ue_irq_work(struct irq_work
*work
)
210 schedule_work(&mce_ue_event_work
);
214 * Queue up the MCE event which then can be handled later.
216 static void machine_check_ue_event(struct machine_check_event
*evt
)
220 index
= __this_cpu_inc_return(mce_ue_count
) - 1;
221 /* If queue is full, just return for now. */
222 if (index
>= MAX_MC_EVT
) {
223 __this_cpu_dec(mce_ue_count
);
226 memcpy(this_cpu_ptr(&mce_ue_event_queue
[index
]), evt
, sizeof(*evt
));
228 /* Queue work to process this event later. */
229 irq_work_queue(&mce_ue_event_irq_work
);
233 * Queue up the MCE event which then can be handled later.
235 void machine_check_queue_event(void)
238 struct machine_check_event evt
;
240 if (!get_mce_event(&evt
, MCE_EVENT_RELEASE
))
243 index
= __this_cpu_inc_return(mce_queue_count
) - 1;
244 /* If queue is full, just return for now. */
245 if (index
>= MAX_MC_EVT
) {
246 __this_cpu_dec(mce_queue_count
);
249 memcpy(this_cpu_ptr(&mce_event_queue
[index
]), &evt
, sizeof(evt
));
251 /* Queue irq work to process this event later. */
252 irq_work_queue(&mce_event_process_work
);
255 * process pending MCE event from the mce event queue. This function will be
256 * called during syscall exit.
258 static void machine_process_ue_event(struct work_struct
*work
)
261 struct machine_check_event
*evt
;
263 while (__this_cpu_read(mce_ue_count
) > 0) {
264 index
= __this_cpu_read(mce_ue_count
) - 1;
265 evt
= this_cpu_ptr(&mce_ue_event_queue
[index
]);
266 #ifdef CONFIG_MEMORY_FAILURE
268 * This should probably queued elsewhere, but
271 * Don't report this machine check because the caller has a
272 * asked us to ignore the event, it has a fixup handler which
273 * will do the appropriate error handling and reporting.
275 if (evt
->error_type
== MCE_ERROR_TYPE_UE
) {
276 if (evt
->u
.ue_error
.ignore_event
) {
277 __this_cpu_dec(mce_ue_count
);
281 if (evt
->u
.ue_error
.physical_address_provided
) {
284 pfn
= evt
->u
.ue_error
.physical_address
>>
286 memory_failure(pfn
, 0);
288 pr_warn("Failed to identify bad address from "
289 "where the uncorrectable error (UE) "
293 __this_cpu_dec(mce_ue_count
);
297 * process pending MCE event from the mce event queue. This function will be
298 * called during syscall exit.
300 static void machine_check_process_queued_event(struct irq_work
*work
)
303 struct machine_check_event
*evt
;
305 add_taint(TAINT_MACHINE_CHECK
, LOCKDEP_NOW_UNRELIABLE
);
308 * For now just print it to console.
309 * TODO: log this error event to FSP or nvram.
311 while (__this_cpu_read(mce_queue_count
) > 0) {
312 index
= __this_cpu_read(mce_queue_count
) - 1;
313 evt
= this_cpu_ptr(&mce_event_queue
[index
]);
315 if (evt
->error_type
== MCE_ERROR_TYPE_UE
&&
316 evt
->u
.ue_error
.ignore_event
) {
317 __this_cpu_dec(mce_queue_count
);
320 machine_check_print_event_info(evt
, false, false);
321 __this_cpu_dec(mce_queue_count
);
325 void machine_check_print_event_info(struct machine_check_event
*evt
,
326 bool user_mode
, bool in_guest
)
328 const char *level
, *sevstr
, *subtype
, *err_type
, *initiator
;
329 uint64_t ea
= 0, pa
= 0;
333 static const char *mc_ue_types
[] = {
336 "Page table walk ifetch",
338 "Page table walk Load/Store",
340 static const char *mc_slb_types
[] = {
345 static const char *mc_erat_types
[] = {
350 static const char *mc_tlb_types
[] = {
355 static const char *mc_user_types
[] = {
359 static const char *mc_ra_types
[] = {
361 "Instruction fetch (bad)",
362 "Instruction fetch (foreign)",
363 "Page table walk ifetch (bad)",
364 "Page table walk ifetch (foreign)",
367 "Page table walk Load/Store (bad)",
368 "Page table walk Load/Store (foreign)",
369 "Load/Store (foreign)",
371 static const char *mc_link_types
[] = {
373 "Instruction fetch (timeout)",
374 "Page table walk ifetch (timeout)",
377 "Page table walk Load/Store (timeout)",
379 static const char *mc_error_class
[] = {
382 "Probable Hardware error (some chance of software cause)",
384 "Probable Software error (some chance of hardware cause)",
387 /* Print things out */
388 if (evt
->version
!= MCE_V1
) {
389 pr_err("Machine Check Exception, Unknown event version %d !\n",
393 switch (evt
->severity
) {
394 case MCE_SEV_NO_ERROR
:
398 case MCE_SEV_WARNING
:
399 level
= KERN_WARNING
;
413 switch(evt
->initiator
) {
414 case MCE_INITIATOR_CPU
:
417 case MCE_INITIATOR_PCI
:
420 case MCE_INITIATOR_ISA
:
423 case MCE_INITIATOR_MEMORY
:
424 initiator
= "Memory";
426 case MCE_INITIATOR_POWERMGM
:
427 initiator
= "Power Management";
429 case MCE_INITIATOR_UNKNOWN
:
431 initiator
= "Unknown";
435 switch (evt
->error_type
) {
436 case MCE_ERROR_TYPE_UE
:
438 subtype
= evt
->u
.ue_error
.ue_error_type
<
439 ARRAY_SIZE(mc_ue_types
) ?
440 mc_ue_types
[evt
->u
.ue_error
.ue_error_type
]
442 if (evt
->u
.ue_error
.effective_address_provided
)
443 ea
= evt
->u
.ue_error
.effective_address
;
444 if (evt
->u
.ue_error
.physical_address_provided
)
445 pa
= evt
->u
.ue_error
.physical_address
;
447 case MCE_ERROR_TYPE_SLB
:
449 subtype
= evt
->u
.slb_error
.slb_error_type
<
450 ARRAY_SIZE(mc_slb_types
) ?
451 mc_slb_types
[evt
->u
.slb_error
.slb_error_type
]
453 if (evt
->u
.slb_error
.effective_address_provided
)
454 ea
= evt
->u
.slb_error
.effective_address
;
456 case MCE_ERROR_TYPE_ERAT
:
458 subtype
= evt
->u
.erat_error
.erat_error_type
<
459 ARRAY_SIZE(mc_erat_types
) ?
460 mc_erat_types
[evt
->u
.erat_error
.erat_error_type
]
462 if (evt
->u
.erat_error
.effective_address_provided
)
463 ea
= evt
->u
.erat_error
.effective_address
;
465 case MCE_ERROR_TYPE_TLB
:
467 subtype
= evt
->u
.tlb_error
.tlb_error_type
<
468 ARRAY_SIZE(mc_tlb_types
) ?
469 mc_tlb_types
[evt
->u
.tlb_error
.tlb_error_type
]
471 if (evt
->u
.tlb_error
.effective_address_provided
)
472 ea
= evt
->u
.tlb_error
.effective_address
;
474 case MCE_ERROR_TYPE_USER
:
476 subtype
= evt
->u
.user_error
.user_error_type
<
477 ARRAY_SIZE(mc_user_types
) ?
478 mc_user_types
[evt
->u
.user_error
.user_error_type
]
480 if (evt
->u
.user_error
.effective_address_provided
)
481 ea
= evt
->u
.user_error
.effective_address
;
483 case MCE_ERROR_TYPE_RA
:
484 err_type
= "Real address";
485 subtype
= evt
->u
.ra_error
.ra_error_type
<
486 ARRAY_SIZE(mc_ra_types
) ?
487 mc_ra_types
[evt
->u
.ra_error
.ra_error_type
]
489 if (evt
->u
.ra_error
.effective_address_provided
)
490 ea
= evt
->u
.ra_error
.effective_address
;
492 case MCE_ERROR_TYPE_LINK
:
494 subtype
= evt
->u
.link_error
.link_error_type
<
495 ARRAY_SIZE(mc_link_types
) ?
496 mc_link_types
[evt
->u
.link_error
.link_error_type
]
498 if (evt
->u
.link_error
.effective_address_provided
)
499 ea
= evt
->u
.link_error
.effective_address
;
501 case MCE_ERROR_TYPE_DCACHE
:
502 err_type
= "D-Cache";
505 case MCE_ERROR_TYPE_ICACHE
:
506 err_type
= "I-Cache";
510 case MCE_ERROR_TYPE_UNKNOWN
:
511 err_type
= "Unknown";
516 dar_str
[0] = pa_str
[0] = '\0';
517 if (ea
&& evt
->srr0
!= ea
) {
518 /* Load/Store address */
519 n
= sprintf(dar_str
, "DAR: %016llx ", ea
);
521 sprintf(dar_str
+ n
, "paddr: %016llx ", pa
);
523 sprintf(pa_str
, " paddr: %016llx", pa
);
526 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
527 level
, evt
->cpu
, sevstr
, in_guest
? "Guest" : "Host",
528 err_type
, subtype
, dar_str
,
529 evt
->disposition
== MCE_DISPOSITION_RECOVERED
?
530 "Recovered" : "Not recovered");
532 if (in_guest
|| user_mode
) {
533 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
534 level
, evt
->cpu
, current
->pid
, current
->comm
,
535 in_guest
? "Guest " : "", evt
->srr0
, pa_str
);
537 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
538 level
, evt
->cpu
, evt
->srr0
, (void *)evt
->srr0
, pa_str
);
541 printk("%sMCE: CPU%d: Initiator %s\n", level
, evt
->cpu
, initiator
);
543 subtype
= evt
->error_class
< ARRAY_SIZE(mc_error_class
) ?
544 mc_error_class
[evt
->error_class
] : "Unknown";
545 printk("%sMCE: CPU%d: %s\n", level
, evt
->cpu
, subtype
);
547 #ifdef CONFIG_PPC_BOOK3S_64
548 /* Display faulty slb contents for SLB errors. */
549 if (evt
->error_type
== MCE_ERROR_TYPE_SLB
)
550 slb_dump_contents(local_paca
->mce_faulty_slbs
);
553 EXPORT_SYMBOL_GPL(machine_check_print_event_info
);
556 * This function is called in real mode. Strictly no printk's please.
558 * regs->nip and regs->msr contains srr0 and ssr1.
560 long machine_check_early(struct pt_regs
*regs
)
564 hv_nmi_check_nonrecoverable(regs
);
567 * See if platform is capable of handling machine check.
569 if (ppc_md
.machine_check_early
)
570 handled
= ppc_md
.machine_check_early(regs
);
574 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
577 DTRIG_VECTOR_CI
, /* need to emulate vector CI load instr */
578 DTRIG_SUSPEND_ESCAPE
, /* need to escape from TM suspend mode */
579 } hmer_debug_trig_function
;
581 static int init_debug_trig_function(void)
584 struct device_node
*cpun
;
585 struct property
*prop
= NULL
;
588 /* First look in the device tree */
590 cpun
= of_get_cpu_node(smp_processor_id(), NULL
);
592 of_property_for_each_string(cpun
, "ibm,hmi-special-triggers",
594 if (strcmp(str
, "bit17-vector-ci-load") == 0)
595 hmer_debug_trig_function
= DTRIG_VECTOR_CI
;
596 else if (strcmp(str
, "bit17-tm-suspend-escape") == 0)
597 hmer_debug_trig_function
= DTRIG_SUSPEND_ESCAPE
;
603 /* If we found the property, don't look at PVR */
607 pvr
= mfspr(SPRN_PVR
);
608 /* Check for POWER9 Nimbus (scale-out) */
609 if ((PVR_VER(pvr
) == PVR_POWER9
) && (pvr
& 0xe000) == 0) {
610 /* DD2.2 and later */
611 if ((pvr
& 0xfff) >= 0x202)
612 hmer_debug_trig_function
= DTRIG_SUSPEND_ESCAPE
;
613 /* DD2.0 and DD2.1 - used for vector CI load emulation */
614 else if ((pvr
& 0xfff) >= 0x200)
615 hmer_debug_trig_function
= DTRIG_VECTOR_CI
;
619 switch (hmer_debug_trig_function
) {
620 case DTRIG_VECTOR_CI
:
621 pr_debug("HMI debug trigger used for vector CI load\n");
623 case DTRIG_SUSPEND_ESCAPE
:
624 pr_debug("HMI debug trigger used for TM suspend escape\n");
631 __initcall(init_debug_trig_function
);
634 * Handle HMIs that occur as a result of a debug trigger.
636 * -1 means this is not a HMI cause that we know about
637 * 0 means no further handling is required
638 * 1 means further handling is required
640 long hmi_handle_debugtrig(struct pt_regs
*regs
)
642 unsigned long hmer
= mfspr(SPRN_HMER
);
645 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
646 if (!((hmer
& HMER_DEBUG_TRIG
)
647 && hmer_debug_trig_function
!= DTRIG_UNKNOWN
))
650 hmer
&= ~HMER_DEBUG_TRIG
;
651 /* HMER is a write-AND register */
652 mtspr(SPRN_HMER
, ~HMER_DEBUG_TRIG
);
654 switch (hmer_debug_trig_function
) {
655 case DTRIG_VECTOR_CI
:
657 * Now to avoid problems with soft-disable we
658 * only do the emulation if we are coming from
661 if (regs
&& user_mode(regs
))
662 ret
= local_paca
->hmi_p9_special_emu
= 1;
671 * See if any other HMI causes remain to be handled
673 if (hmer
& mfspr(SPRN_HMEER
))
682 long hmi_exception_realmode(struct pt_regs
*regs
)
686 __this_cpu_inc(irq_stat
.hmi_exceptions
);
688 ret
= hmi_handle_debugtrig(regs
);
692 wait_for_subcore_guest_exit();
694 if (ppc_md
.hmi_exception_early
)
695 ppc_md
.hmi_exception_early(regs
);
697 wait_for_tb_resync();