2 * Machine check exception handling.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #define pr_fmt(fmt) "mce: " fmt
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <linux/percpu.h>
28 #include <linux/export.h>
29 #include <linux/irq_work.h>
32 static DEFINE_PER_CPU(int, mce_nest_count
);
33 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event
);
35 /* Queue for delayed MCE events. */
36 static DEFINE_PER_CPU(int, mce_queue_count
);
37 static DEFINE_PER_CPU(struct machine_check_event
[MAX_MC_EVT
], mce_event_queue
);
39 static void machine_check_process_queued_event(struct irq_work
*work
);
40 static struct irq_work mce_event_process_work
= {
41 .func
= machine_check_process_queued_event
,
44 static void mce_set_error_info(struct machine_check_event
*mce
,
45 struct mce_error_info
*mce_err
)
47 mce
->error_type
= mce_err
->error_type
;
48 switch (mce_err
->error_type
) {
49 case MCE_ERROR_TYPE_UE
:
50 mce
->u
.ue_error
.ue_error_type
= mce_err
->u
.ue_error_type
;
52 case MCE_ERROR_TYPE_SLB
:
53 mce
->u
.slb_error
.slb_error_type
= mce_err
->u
.slb_error_type
;
55 case MCE_ERROR_TYPE_ERAT
:
56 mce
->u
.erat_error
.erat_error_type
= mce_err
->u
.erat_error_type
;
58 case MCE_ERROR_TYPE_TLB
:
59 mce
->u
.tlb_error
.tlb_error_type
= mce_err
->u
.tlb_error_type
;
61 case MCE_ERROR_TYPE_USER
:
62 mce
->u
.user_error
.user_error_type
= mce_err
->u
.user_error_type
;
64 case MCE_ERROR_TYPE_RA
:
65 mce
->u
.ra_error
.ra_error_type
= mce_err
->u
.ra_error_type
;
67 case MCE_ERROR_TYPE_LINK
:
68 mce
->u
.link_error
.link_error_type
= mce_err
->u
.link_error_type
;
70 case MCE_ERROR_TYPE_UNKNOWN
:
77 * Decode and save high level MCE information into per cpu buffer which
78 * is an array of machine_check_event structure.
80 void save_mce_event(struct pt_regs
*regs
, long handled
,
81 struct mce_error_info
*mce_err
,
82 uint64_t nip
, uint64_t addr
)
84 int index
= __this_cpu_inc_return(mce_nest_count
) - 1;
85 struct machine_check_event
*mce
= this_cpu_ptr(&mce_event
[index
]);
88 * Return if we don't have enough space to log mce event.
89 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
90 * the check below will stop buffer overrun.
92 if (index
>= MAX_MC_EVT
)
95 /* Populate generic machine check info */
96 mce
->version
= MCE_V1
;
98 mce
->srr1
= regs
->msr
;
99 mce
->gpr3
= regs
->gpr
[3];
102 /* Mark it recovered if we have handled it and MSR(RI=1). */
103 if (handled
&& (regs
->msr
& MSR_RI
))
104 mce
->disposition
= MCE_DISPOSITION_RECOVERED
;
106 mce
->disposition
= MCE_DISPOSITION_NOT_RECOVERED
;
108 mce
->initiator
= mce_err
->initiator
;
109 mce
->severity
= mce_err
->severity
;
112 * Populate the mce error_type and type-specific error_type.
114 mce_set_error_info(mce
, mce_err
);
119 if (mce
->error_type
== MCE_ERROR_TYPE_TLB
) {
120 mce
->u
.tlb_error
.effective_address_provided
= true;
121 mce
->u
.tlb_error
.effective_address
= addr
;
122 } else if (mce
->error_type
== MCE_ERROR_TYPE_SLB
) {
123 mce
->u
.slb_error
.effective_address_provided
= true;
124 mce
->u
.slb_error
.effective_address
= addr
;
125 } else if (mce
->error_type
== MCE_ERROR_TYPE_ERAT
) {
126 mce
->u
.erat_error
.effective_address_provided
= true;
127 mce
->u
.erat_error
.effective_address
= addr
;
128 } else if (mce
->error_type
== MCE_ERROR_TYPE_USER
) {
129 mce
->u
.user_error
.effective_address_provided
= true;
130 mce
->u
.user_error
.effective_address
= addr
;
131 } else if (mce
->error_type
== MCE_ERROR_TYPE_RA
) {
132 mce
->u
.ra_error
.effective_address_provided
= true;
133 mce
->u
.ra_error
.effective_address
= addr
;
134 } else if (mce
->error_type
== MCE_ERROR_TYPE_LINK
) {
135 mce
->u
.link_error
.effective_address_provided
= true;
136 mce
->u
.link_error
.effective_address
= addr
;
137 } else if (mce
->error_type
== MCE_ERROR_TYPE_UE
) {
138 mce
->u
.ue_error
.effective_address_provided
= true;
139 mce
->u
.ue_error
.effective_address
= addr
;
146 * mce Pointer to machine_check_event structure to be filled.
147 * release Flag to indicate whether to free the event slot or not.
148 * 0 <= do not release the mce event. Caller will invoke
149 * release_mce_event() once event has been consumed.
150 * 1 <= release the slot.
155 * get_mce_event() will be called by platform specific machine check
156 * handle routine and in KVM.
157 * When we call get_mce_event(), we are still in interrupt context and
158 * preemption will not be scheduled until ret_from_expect() routine
161 int get_mce_event(struct machine_check_event
*mce
, bool release
)
163 int index
= __this_cpu_read(mce_nest_count
) - 1;
164 struct machine_check_event
*mc_evt
;
171 /* Check if we have MCE info to process. */
172 if (index
< MAX_MC_EVT
) {
173 mc_evt
= this_cpu_ptr(&mce_event
[index
]);
174 /* Copy the event structure and release the original */
181 /* Decrement the count to free the slot. */
183 __this_cpu_dec(mce_nest_count
);
188 void release_mce_event(void)
190 get_mce_event(NULL
, true);
194 * Queue up the MCE event which then can be handled later.
196 void machine_check_queue_event(void)
199 struct machine_check_event evt
;
201 if (!get_mce_event(&evt
, MCE_EVENT_RELEASE
))
204 index
= __this_cpu_inc_return(mce_queue_count
) - 1;
205 /* If queue is full, just return for now. */
206 if (index
>= MAX_MC_EVT
) {
207 __this_cpu_dec(mce_queue_count
);
210 memcpy(this_cpu_ptr(&mce_event_queue
[index
]), &evt
, sizeof(evt
));
212 /* Queue irq work to process this event later. */
213 irq_work_queue(&mce_event_process_work
);
217 * process pending MCE event from the mce event queue. This function will be
218 * called during syscall exit.
220 static void machine_check_process_queued_event(struct irq_work
*work
)
224 add_taint(TAINT_MACHINE_CHECK
, LOCKDEP_NOW_UNRELIABLE
);
227 * For now just print it to console.
228 * TODO: log this error event to FSP or nvram.
230 while (__this_cpu_read(mce_queue_count
) > 0) {
231 index
= __this_cpu_read(mce_queue_count
) - 1;
232 machine_check_print_event_info(
233 this_cpu_ptr(&mce_event_queue
[index
]), false);
234 __this_cpu_dec(mce_queue_count
);
238 void machine_check_print_event_info(struct machine_check_event
*evt
,
241 const char *level
, *sevstr
, *subtype
;
242 static const char *mc_ue_types
[] = {
245 "Page table walk ifetch",
247 "Page table walk Load/Store",
249 static const char *mc_slb_types
[] = {
254 static const char *mc_erat_types
[] = {
259 static const char *mc_tlb_types
[] = {
264 static const char *mc_user_types
[] = {
268 static const char *mc_ra_types
[] = {
270 "Instruction fetch (bad)",
271 "Instruction fetch (foreign)",
272 "Page table walk ifetch (bad)",
273 "Page table walk ifetch (foreign)",
276 "Page table walk Load/Store (bad)",
277 "Page table walk Load/Store (foreign)",
278 "Load/Store (foreign)",
280 static const char *mc_link_types
[] = {
282 "Instruction fetch (timeout)",
283 "Page table walk ifetch (timeout)",
286 "Page table walk Load/Store (timeout)",
289 /* Print things out */
290 if (evt
->version
!= MCE_V1
) {
291 pr_err("Machine Check Exception, Unknown event version %d !\n",
295 switch (evt
->severity
) {
296 case MCE_SEV_NO_ERROR
:
300 case MCE_SEV_WARNING
:
301 level
= KERN_WARNING
;
304 case MCE_SEV_ERROR_SYNC
:
315 printk("%s%s Machine check interrupt [%s]\n", level
, sevstr
,
316 evt
->disposition
== MCE_DISPOSITION_RECOVERED
?
317 "Recovered" : "Not recovered");
320 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level
,
321 evt
->srr0
, current
->pid
, current
->comm
);
323 printk("%s NIP [%016llx]: %pS\n", level
, evt
->srr0
,
327 printk("%s Initiator: %s\n", level
,
328 evt
->initiator
== MCE_INITIATOR_CPU
? "CPU" : "Unknown");
329 switch (evt
->error_type
) {
330 case MCE_ERROR_TYPE_UE
:
331 subtype
= evt
->u
.ue_error
.ue_error_type
<
332 ARRAY_SIZE(mc_ue_types
) ?
333 mc_ue_types
[evt
->u
.ue_error
.ue_error_type
]
335 printk("%s Error type: UE [%s]\n", level
, subtype
);
336 if (evt
->u
.ue_error
.effective_address_provided
)
337 printk("%s Effective address: %016llx\n",
338 level
, evt
->u
.ue_error
.effective_address
);
339 if (evt
->u
.ue_error
.physical_address_provided
)
340 printk("%s Physical address: %016llx\n",
341 level
, evt
->u
.ue_error
.physical_address
);
343 case MCE_ERROR_TYPE_SLB
:
344 subtype
= evt
->u
.slb_error
.slb_error_type
<
345 ARRAY_SIZE(mc_slb_types
) ?
346 mc_slb_types
[evt
->u
.slb_error
.slb_error_type
]
348 printk("%s Error type: SLB [%s]\n", level
, subtype
);
349 if (evt
->u
.slb_error
.effective_address_provided
)
350 printk("%s Effective address: %016llx\n",
351 level
, evt
->u
.slb_error
.effective_address
);
353 case MCE_ERROR_TYPE_ERAT
:
354 subtype
= evt
->u
.erat_error
.erat_error_type
<
355 ARRAY_SIZE(mc_erat_types
) ?
356 mc_erat_types
[evt
->u
.erat_error
.erat_error_type
]
358 printk("%s Error type: ERAT [%s]\n", level
, subtype
);
359 if (evt
->u
.erat_error
.effective_address_provided
)
360 printk("%s Effective address: %016llx\n",
361 level
, evt
->u
.erat_error
.effective_address
);
363 case MCE_ERROR_TYPE_TLB
:
364 subtype
= evt
->u
.tlb_error
.tlb_error_type
<
365 ARRAY_SIZE(mc_tlb_types
) ?
366 mc_tlb_types
[evt
->u
.tlb_error
.tlb_error_type
]
368 printk("%s Error type: TLB [%s]\n", level
, subtype
);
369 if (evt
->u
.tlb_error
.effective_address_provided
)
370 printk("%s Effective address: %016llx\n",
371 level
, evt
->u
.tlb_error
.effective_address
);
373 case MCE_ERROR_TYPE_USER
:
374 subtype
= evt
->u
.user_error
.user_error_type
<
375 ARRAY_SIZE(mc_user_types
) ?
376 mc_user_types
[evt
->u
.user_error
.user_error_type
]
378 printk("%s Error type: User [%s]\n", level
, subtype
);
379 if (evt
->u
.user_error
.effective_address_provided
)
380 printk("%s Effective address: %016llx\n",
381 level
, evt
->u
.user_error
.effective_address
);
383 case MCE_ERROR_TYPE_RA
:
384 subtype
= evt
->u
.ra_error
.ra_error_type
<
385 ARRAY_SIZE(mc_ra_types
) ?
386 mc_ra_types
[evt
->u
.ra_error
.ra_error_type
]
388 printk("%s Error type: Real address [%s]\n", level
, subtype
);
389 if (evt
->u
.ra_error
.effective_address_provided
)
390 printk("%s Effective address: %016llx\n",
391 level
, evt
->u
.ra_error
.effective_address
);
393 case MCE_ERROR_TYPE_LINK
:
394 subtype
= evt
->u
.link_error
.link_error_type
<
395 ARRAY_SIZE(mc_link_types
) ?
396 mc_link_types
[evt
->u
.link_error
.link_error_type
]
398 printk("%s Error type: Link [%s]\n", level
, subtype
);
399 if (evt
->u
.link_error
.effective_address_provided
)
400 printk("%s Effective address: %016llx\n",
401 level
, evt
->u
.link_error
.effective_address
);
404 case MCE_ERROR_TYPE_UNKNOWN
:
405 printk("%s Error type: Unknown\n", level
);
409 EXPORT_SYMBOL_GPL(machine_check_print_event_info
);
411 uint64_t get_mce_fault_addr(struct machine_check_event
*evt
)
413 switch (evt
->error_type
) {
414 case MCE_ERROR_TYPE_UE
:
415 if (evt
->u
.ue_error
.effective_address_provided
)
416 return evt
->u
.ue_error
.effective_address
;
418 case MCE_ERROR_TYPE_SLB
:
419 if (evt
->u
.slb_error
.effective_address_provided
)
420 return evt
->u
.slb_error
.effective_address
;
422 case MCE_ERROR_TYPE_ERAT
:
423 if (evt
->u
.erat_error
.effective_address_provided
)
424 return evt
->u
.erat_error
.effective_address
;
426 case MCE_ERROR_TYPE_TLB
:
427 if (evt
->u
.tlb_error
.effective_address_provided
)
428 return evt
->u
.tlb_error
.effective_address
;
430 case MCE_ERROR_TYPE_USER
:
431 if (evt
->u
.user_error
.effective_address_provided
)
432 return evt
->u
.user_error
.effective_address
;
434 case MCE_ERROR_TYPE_RA
:
435 if (evt
->u
.ra_error
.effective_address_provided
)
436 return evt
->u
.ra_error
.effective_address
;
438 case MCE_ERROR_TYPE_LINK
:
439 if (evt
->u
.link_error
.effective_address_provided
)
440 return evt
->u
.link_error
.effective_address
;
443 case MCE_ERROR_TYPE_UNKNOWN
:
448 EXPORT_SYMBOL(get_mce_fault_addr
);