2 * Machine check exception handling.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #define pr_fmt(fmt) "mce: " fmt
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
32 #include <asm/machdep.h>
35 static DEFINE_PER_CPU(int, mce_nest_count
);
36 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event
);
38 /* Queue for delayed MCE events. */
39 static DEFINE_PER_CPU(int, mce_queue_count
);
40 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event_queue
);
42 /* Queue for delayed MCE UE events. */
43 static DEFINE_PER_CPU(int, mce_ue_count
);
44 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
],
47 static void machine_check_process_queued_event(struct irq_work
*work
);
48 void machine_check_ue_event(struct machine_check_event
*evt
);
49 static void machine_process_ue_event(struct work_struct
*work
);
51 static struct irq_work mce_event_process_work
= {
52 .func
= machine_check_process_queued_event
,
55 DECLARE_WORK(mce_ue_event_work
, machine_process_ue_event
);
57 static void mce_set_error_info(struct machine_check_event
*mce
,
58 struct mce_error_info
*mce_err
)
60 mce
->error_type
= mce_err
->error_type
;
61 switch (mce_err
->error_type
) {
62 case MCE_ERROR_TYPE_UE
:
63 mce
->u
.ue_error
.ue_error_type
= mce_err
->u
.ue_error_type
;
65 case MCE_ERROR_TYPE_SLB
:
66 mce
->u
.slb_error
.slb_error_type
= mce_err
->u
.slb_error_type
;
68 case MCE_ERROR_TYPE_ERAT
:
69 mce
->u
.erat_error
.erat_error_type
= mce_err
->u
.erat_error_type
;
71 case MCE_ERROR_TYPE_TLB
:
72 mce
->u
.tlb_error
.tlb_error_type
= mce_err
->u
.tlb_error_type
;
74 case MCE_ERROR_TYPE_USER
:
75 mce
->u
.user_error
.user_error_type
= mce_err
->u
.user_error_type
;
77 case MCE_ERROR_TYPE_RA
:
78 mce
->u
.ra_error
.ra_error_type
= mce_err
->u
.ra_error_type
;
80 case MCE_ERROR_TYPE_LINK
:
81 mce
->u
.link_error
.link_error_type
= mce_err
->u
.link_error_type
;
83 case MCE_ERROR_TYPE_UNKNOWN
:
90 * Decode and save high level MCE information into per cpu buffer which
91 * is an array of machine_check_event structure.
93 void save_mce_event(struct pt_regs
*regs
, long handled
,
94 struct mce_error_info
*mce_err
,
95 uint64_t nip
, uint64_t addr
, uint64_t phys_addr
)
97 int index
= __this_cpu_inc_return(mce_nest_count
) - 1;
98 struct machine_check_event
*mce
= this_cpu_ptr(&mce_event
[index
]);
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
105 if (index
>= MAX_MC_EVT
)
108 /* Populate generic machine check info */
109 mce
->version
= MCE_V1
;
111 mce
->srr1
= regs
->msr
;
112 mce
->gpr3
= regs
->gpr
[3];
115 /* Mark it recovered if we have handled it and MSR(RI=1). */
116 if (handled
&& (regs
->msr
& MSR_RI
))
117 mce
->disposition
= MCE_DISPOSITION_RECOVERED
;
119 mce
->disposition
= MCE_DISPOSITION_NOT_RECOVERED
;
121 mce
->initiator
= mce_err
->initiator
;
122 mce
->severity
= mce_err
->severity
;
125 * Populate the mce error_type and type-specific error_type.
127 mce_set_error_info(mce
, mce_err
);
132 if (mce
->error_type
== MCE_ERROR_TYPE_TLB
) {
133 mce
->u
.tlb_error
.effective_address_provided
= true;
134 mce
->u
.tlb_error
.effective_address
= addr
;
135 } else if (mce
->error_type
== MCE_ERROR_TYPE_SLB
) {
136 mce
->u
.slb_error
.effective_address_provided
= true;
137 mce
->u
.slb_error
.effective_address
= addr
;
138 } else if (mce
->error_type
== MCE_ERROR_TYPE_ERAT
) {
139 mce
->u
.erat_error
.effective_address_provided
= true;
140 mce
->u
.erat_error
.effective_address
= addr
;
141 } else if (mce
->error_type
== MCE_ERROR_TYPE_USER
) {
142 mce
->u
.user_error
.effective_address_provided
= true;
143 mce
->u
.user_error
.effective_address
= addr
;
144 } else if (mce
->error_type
== MCE_ERROR_TYPE_RA
) {
145 mce
->u
.ra_error
.effective_address_provided
= true;
146 mce
->u
.ra_error
.effective_address
= addr
;
147 } else if (mce
->error_type
== MCE_ERROR_TYPE_LINK
) {
148 mce
->u
.link_error
.effective_address_provided
= true;
149 mce
->u
.link_error
.effective_address
= addr
;
150 } else if (mce
->error_type
== MCE_ERROR_TYPE_UE
) {
151 mce
->u
.ue_error
.effective_address_provided
= true;
152 mce
->u
.ue_error
.effective_address
= addr
;
153 if (phys_addr
!= ULONG_MAX
) {
154 mce
->u
.ue_error
.physical_address_provided
= true;
155 mce
->u
.ue_error
.physical_address
= phys_addr
;
156 machine_check_ue_event(mce
);
164 * mce Pointer to machine_check_event structure to be filled.
165 * release Flag to indicate whether to free the event slot or not.
166 * 0 <= do not release the mce event. Caller will invoke
167 * release_mce_event() once event has been consumed.
168 * 1 <= release the slot.
173 * get_mce_event() will be called by platform specific machine check
174 * handle routine and in KVM.
175 * When we call get_mce_event(), we are still in interrupt context and
176 * preemption will not be scheduled until ret_from_expect() routine
179 int get_mce_event(struct machine_check_event
*mce
, bool release
)
181 int index
= __this_cpu_read(mce_nest_count
) - 1;
182 struct machine_check_event
*mc_evt
;
189 /* Check if we have MCE info to process. */
190 if (index
< MAX_MC_EVT
) {
191 mc_evt
= this_cpu_ptr(&mce_event
[index
]);
192 /* Copy the event structure and release the original */
199 /* Decrement the count to free the slot. */
201 __this_cpu_dec(mce_nest_count
);
206 void release_mce_event(void)
208 get_mce_event(NULL
, true);
213 * Queue up the MCE event which then can be handled later.
215 void machine_check_ue_event(struct machine_check_event
*evt
)
219 index
= __this_cpu_inc_return(mce_ue_count
) - 1;
220 /* If queue is full, just return for now. */
221 if (index
>= MAX_MC_EVT
) {
222 __this_cpu_dec(mce_ue_count
);
225 memcpy(this_cpu_ptr(&mce_ue_event_queue
[index
]), evt
, sizeof(*evt
));
227 /* Queue work to process this event later. */
228 schedule_work(&mce_ue_event_work
);
232 * Queue up the MCE event which then can be handled later.
234 void machine_check_queue_event(void)
237 struct machine_check_event evt
;
239 if (!get_mce_event(&evt
, MCE_EVENT_RELEASE
))
242 index
= __this_cpu_inc_return(mce_queue_count
) - 1;
243 /* If queue is full, just return for now. */
244 if (index
>= MAX_MC_EVT
) {
245 __this_cpu_dec(mce_queue_count
);
248 memcpy(this_cpu_ptr(&mce_event_queue
[index
]), &evt
, sizeof(evt
));
250 /* Queue irq work to process this event later. */
251 irq_work_queue(&mce_event_process_work
);
254 * process pending MCE event from the mce event queue. This function will be
255 * called during syscall exit.
257 static void machine_process_ue_event(struct work_struct
*work
)
260 struct machine_check_event
*evt
;
262 while (__this_cpu_read(mce_ue_count
) > 0) {
263 index
= __this_cpu_read(mce_ue_count
) - 1;
264 evt
= this_cpu_ptr(&mce_ue_event_queue
[index
]);
265 #ifdef CONFIG_MEMORY_FAILURE
267 * This should probably queued elsewhere, but
270 if (evt
->error_type
== MCE_ERROR_TYPE_UE
) {
271 if (evt
->u
.ue_error
.physical_address_provided
) {
274 pfn
= evt
->u
.ue_error
.physical_address
>>
276 memory_failure(pfn
, 0);
278 pr_warn("Failed to identify bad address from "
279 "where the uncorrectable error (UE) "
283 __this_cpu_dec(mce_ue_count
);
287 * process pending MCE event from the mce event queue. This function will be
288 * called during syscall exit.
290 static void machine_check_process_queued_event(struct irq_work
*work
)
293 struct machine_check_event
*evt
;
295 add_taint(TAINT_MACHINE_CHECK
, LOCKDEP_NOW_UNRELIABLE
);
298 * For now just print it to console.
299 * TODO: log this error event to FSP or nvram.
301 while (__this_cpu_read(mce_queue_count
) > 0) {
302 index
= __this_cpu_read(mce_queue_count
) - 1;
303 evt
= this_cpu_ptr(&mce_event_queue
[index
]);
304 machine_check_print_event_info(evt
, false);
305 __this_cpu_dec(mce_queue_count
);
309 void machine_check_print_event_info(struct machine_check_event
*evt
,
312 const char *level
, *sevstr
, *subtype
;
313 static const char *mc_ue_types
[] = {
316 "Page table walk ifetch",
318 "Page table walk Load/Store",
320 static const char *mc_slb_types
[] = {
325 static const char *mc_erat_types
[] = {
330 static const char *mc_tlb_types
[] = {
335 static const char *mc_user_types
[] = {
339 static const char *mc_ra_types
[] = {
341 "Instruction fetch (bad)",
342 "Instruction fetch (foreign)",
343 "Page table walk ifetch (bad)",
344 "Page table walk ifetch (foreign)",
347 "Page table walk Load/Store (bad)",
348 "Page table walk Load/Store (foreign)",
349 "Load/Store (foreign)",
351 static const char *mc_link_types
[] = {
353 "Instruction fetch (timeout)",
354 "Page table walk ifetch (timeout)",
357 "Page table walk Load/Store (timeout)",
360 /* Print things out */
361 if (evt
->version
!= MCE_V1
) {
362 pr_err("Machine Check Exception, Unknown event version %d !\n",
366 switch (evt
->severity
) {
367 case MCE_SEV_NO_ERROR
:
371 case MCE_SEV_WARNING
:
372 level
= KERN_WARNING
;
375 case MCE_SEV_ERROR_SYNC
:
386 printk("%s%s Machine check interrupt [%s]\n", level
, sevstr
,
387 evt
->disposition
== MCE_DISPOSITION_RECOVERED
?
388 "Recovered" : "Not recovered");
391 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level
,
392 evt
->srr0
, current
->pid
, current
->comm
);
394 printk("%s NIP [%016llx]: %pS\n", level
, evt
->srr0
,
398 printk("%s Initiator: %s\n", level
,
399 evt
->initiator
== MCE_INITIATOR_CPU
? "CPU" : "Unknown");
400 switch (evt
->error_type
) {
401 case MCE_ERROR_TYPE_UE
:
402 subtype
= evt
->u
.ue_error
.ue_error_type
<
403 ARRAY_SIZE(mc_ue_types
) ?
404 mc_ue_types
[evt
->u
.ue_error
.ue_error_type
]
406 printk("%s Error type: UE [%s]\n", level
, subtype
);
407 if (evt
->u
.ue_error
.effective_address_provided
)
408 printk("%s Effective address: %016llx\n",
409 level
, evt
->u
.ue_error
.effective_address
);
410 if (evt
->u
.ue_error
.physical_address_provided
)
411 printk("%s Physical address: %016llx\n",
412 level
, evt
->u
.ue_error
.physical_address
);
414 case MCE_ERROR_TYPE_SLB
:
415 subtype
= evt
->u
.slb_error
.slb_error_type
<
416 ARRAY_SIZE(mc_slb_types
) ?
417 mc_slb_types
[evt
->u
.slb_error
.slb_error_type
]
419 printk("%s Error type: SLB [%s]\n", level
, subtype
);
420 if (evt
->u
.slb_error
.effective_address_provided
)
421 printk("%s Effective address: %016llx\n",
422 level
, evt
->u
.slb_error
.effective_address
);
424 case MCE_ERROR_TYPE_ERAT
:
425 subtype
= evt
->u
.erat_error
.erat_error_type
<
426 ARRAY_SIZE(mc_erat_types
) ?
427 mc_erat_types
[evt
->u
.erat_error
.erat_error_type
]
429 printk("%s Error type: ERAT [%s]\n", level
, subtype
);
430 if (evt
->u
.erat_error
.effective_address_provided
)
431 printk("%s Effective address: %016llx\n",
432 level
, evt
->u
.erat_error
.effective_address
);
434 case MCE_ERROR_TYPE_TLB
:
435 subtype
= evt
->u
.tlb_error
.tlb_error_type
<
436 ARRAY_SIZE(mc_tlb_types
) ?
437 mc_tlb_types
[evt
->u
.tlb_error
.tlb_error_type
]
439 printk("%s Error type: TLB [%s]\n", level
, subtype
);
440 if (evt
->u
.tlb_error
.effective_address_provided
)
441 printk("%s Effective address: %016llx\n",
442 level
, evt
->u
.tlb_error
.effective_address
);
444 case MCE_ERROR_TYPE_USER
:
445 subtype
= evt
->u
.user_error
.user_error_type
<
446 ARRAY_SIZE(mc_user_types
) ?
447 mc_user_types
[evt
->u
.user_error
.user_error_type
]
449 printk("%s Error type: User [%s]\n", level
, subtype
);
450 if (evt
->u
.user_error
.effective_address_provided
)
451 printk("%s Effective address: %016llx\n",
452 level
, evt
->u
.user_error
.effective_address
);
454 case MCE_ERROR_TYPE_RA
:
455 subtype
= evt
->u
.ra_error
.ra_error_type
<
456 ARRAY_SIZE(mc_ra_types
) ?
457 mc_ra_types
[evt
->u
.ra_error
.ra_error_type
]
459 printk("%s Error type: Real address [%s]\n", level
, subtype
);
460 if (evt
->u
.ra_error
.effective_address_provided
)
461 printk("%s Effective address: %016llx\n",
462 level
, evt
->u
.ra_error
.effective_address
);
464 case MCE_ERROR_TYPE_LINK
:
465 subtype
= evt
->u
.link_error
.link_error_type
<
466 ARRAY_SIZE(mc_link_types
) ?
467 mc_link_types
[evt
->u
.link_error
.link_error_type
]
469 printk("%s Error type: Link [%s]\n", level
, subtype
);
470 if (evt
->u
.link_error
.effective_address_provided
)
471 printk("%s Effective address: %016llx\n",
472 level
, evt
->u
.link_error
.effective_address
);
475 case MCE_ERROR_TYPE_UNKNOWN
:
476 printk("%s Error type: Unknown\n", level
);
480 EXPORT_SYMBOL_GPL(machine_check_print_event_info
);
483 * This function is called in real mode. Strictly no printk's please.
485 * regs->nip and regs->msr contains srr0 and ssr1.
487 long machine_check_early(struct pt_regs
*regs
)
491 __this_cpu_inc(irq_stat
.mce_exceptions
);
493 if (cur_cpu_spec
&& cur_cpu_spec
->machine_check_early
)
494 handled
= cur_cpu_spec
->machine_check_early(regs
);
498 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
501 DTRIG_VECTOR_CI
, /* need to emulate vector CI load instr */
502 DTRIG_SUSPEND_ESCAPE
, /* need to escape from TM suspend mode */
503 } hmer_debug_trig_function
;
505 static int init_debug_trig_function(void)
508 struct device_node
*cpun
;
509 struct property
*prop
= NULL
;
512 /* First look in the device tree */
514 cpun
= of_get_cpu_node(smp_processor_id(), NULL
);
516 of_property_for_each_string(cpun
, "ibm,hmi-special-triggers",
518 if (strcmp(str
, "bit17-vector-ci-load") == 0)
519 hmer_debug_trig_function
= DTRIG_VECTOR_CI
;
520 else if (strcmp(str
, "bit17-tm-suspend-escape") == 0)
521 hmer_debug_trig_function
= DTRIG_SUSPEND_ESCAPE
;
527 /* If we found the property, don't look at PVR */
531 pvr
= mfspr(SPRN_PVR
);
532 /* Check for POWER9 Nimbus (scale-out) */
533 if ((PVR_VER(pvr
) == PVR_POWER9
) && (pvr
& 0xe000) == 0) {
534 /* DD2.2 and later */
535 if ((pvr
& 0xfff) >= 0x202)
536 hmer_debug_trig_function
= DTRIG_SUSPEND_ESCAPE
;
537 /* DD2.0 and DD2.1 - used for vector CI load emulation */
538 else if ((pvr
& 0xfff) >= 0x200)
539 hmer_debug_trig_function
= DTRIG_VECTOR_CI
;
543 switch (hmer_debug_trig_function
) {
544 case DTRIG_VECTOR_CI
:
545 pr_debug("HMI debug trigger used for vector CI load\n");
547 case DTRIG_SUSPEND_ESCAPE
:
548 pr_debug("HMI debug trigger used for TM suspend escape\n");
555 __initcall(init_debug_trig_function
);
558 * Handle HMIs that occur as a result of a debug trigger.
560 * -1 means this is not a HMI cause that we know about
561 * 0 means no further handling is required
562 * 1 means further handling is required
564 long hmi_handle_debugtrig(struct pt_regs
*regs
)
566 unsigned long hmer
= mfspr(SPRN_HMER
);
569 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570 if (!((hmer
& HMER_DEBUG_TRIG
)
571 && hmer_debug_trig_function
!= DTRIG_UNKNOWN
))
574 hmer
&= ~HMER_DEBUG_TRIG
;
575 /* HMER is a write-AND register */
576 mtspr(SPRN_HMER
, ~HMER_DEBUG_TRIG
);
578 switch (hmer_debug_trig_function
) {
579 case DTRIG_VECTOR_CI
:
581 * Now to avoid problems with soft-disable we
582 * only do the emulation if we are coming from
585 if (regs
&& user_mode(regs
))
586 ret
= local_paca
->hmi_p9_special_emu
= 1;
595 * See if any other HMI causes remain to be handled
597 if (hmer
& mfspr(SPRN_HMEER
))
606 long hmi_exception_realmode(struct pt_regs
*regs
)
610 __this_cpu_inc(irq_stat
.hmi_exceptions
);
612 ret
= hmi_handle_debugtrig(regs
);
616 wait_for_subcore_guest_exit();
618 if (ppc_md
.hmi_exception_early
)
619 ppc_md
.hmi_exception_early(regs
);
621 wait_for_tb_resync();