2 * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; If not, see <http://www.gnu.org/licenses/>.
17 * Copyright 2014 IBM Corporation
18 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #include <linux/kernel.h>
24 #include <linux/init.h>
27 #include <linux/slab.h>
30 #include <asm/cputable.h>
31 #include <asm/machdep.h>
35 static int opal_hmi_handler_nb_init
;
36 struct OpalHmiEvtNode
{
37 struct list_head list
;
38 struct OpalHMIEvent hmi_evt
;
42 uint32_t xstop_reason
;
43 const char *unit_failed
;
44 const char *description
;
47 static LIST_HEAD(opal_hmi_evt_list
);
48 static DEFINE_SPINLOCK(opal_hmi_evt_lock
);
50 static void print_core_checkstop_reason(const char *level
,
51 struct OpalHMIEvent
*hmi_evt
)
54 static const struct xstop_reason xstop_reason
[] = {
55 { CORE_CHECKSTOP_IFU_REGFILE
, "IFU",
56 "RegFile core check stop" },
57 { CORE_CHECKSTOP_IFU_LOGIC
, "IFU", "Logic core check stop" },
58 { CORE_CHECKSTOP_PC_DURING_RECOV
, "PC",
59 "Core checkstop during recovery" },
60 { CORE_CHECKSTOP_ISU_REGFILE
, "ISU",
61 "RegFile core check stop (mapper error)" },
62 { CORE_CHECKSTOP_ISU_LOGIC
, "ISU", "Logic core check stop" },
63 { CORE_CHECKSTOP_FXU_LOGIC
, "FXU", "Logic core check stop" },
64 { CORE_CHECKSTOP_VSU_LOGIC
, "VSU", "Logic core check stop" },
65 { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE
, "PC",
66 "Recovery in maintenance mode" },
67 { CORE_CHECKSTOP_LSU_REGFILE
, "LSU",
68 "RegFile core check stop" },
69 { CORE_CHECKSTOP_PC_FWD_PROGRESS
, "PC",
70 "Forward Progress Error" },
71 { CORE_CHECKSTOP_LSU_LOGIC
, "LSU", "Logic core check stop" },
72 { CORE_CHECKSTOP_PC_LOGIC
, "PC", "Logic core check stop" },
73 { CORE_CHECKSTOP_PC_HYP_RESOURCE
, "PC",
74 "Hypervisor Resource error - core check stop" },
75 { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED
, "PC",
76 "Hang Recovery Failed (core check stop)" },
77 { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED
, "PC",
78 "Ambiguous Hang Detected (unknown source)" },
79 { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ
, "PC",
80 "Debug Trigger Error inject" },
81 { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ
, "PC",
82 "Hypervisor check stop via SPRC/SPRD" },
86 if (!hmi_evt
->u
.xstop_error
.xstop_reason
) {
87 printk("%s Unknown Core check stop.\n", level
);
91 printk("%s CPU PIR: %08x\n", level
,
92 be32_to_cpu(hmi_evt
->u
.xstop_error
.u
.pir
));
93 for (i
= 0; i
< ARRAY_SIZE(xstop_reason
); i
++)
94 if (be32_to_cpu(hmi_evt
->u
.xstop_error
.xstop_reason
) &
95 xstop_reason
[i
].xstop_reason
)
96 printk("%s [Unit: %-3s] %s\n", level
,
97 xstop_reason
[i
].unit_failed
,
98 xstop_reason
[i
].description
);
101 static void print_nx_checkstop_reason(const char *level
,
102 struct OpalHMIEvent
*hmi_evt
)
105 static const struct xstop_reason xstop_reason
[] = {
106 { NX_CHECKSTOP_SHM_INVAL_STATE_ERR
, "DMA & Engine",
107 "SHM invalid state error" },
108 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1
, "DMA & Engine",
109 "DMA invalid state error bit 15" },
110 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2
, "DMA & Engine",
111 "DMA invalid state error bit 16" },
112 { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR
, "DMA & Engine",
113 "Channel 0 invalid state error" },
114 { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR
, "DMA & Engine",
115 "Channel 1 invalid state error" },
116 { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR
, "DMA & Engine",
117 "Channel 2 invalid state error" },
118 { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR
, "DMA & Engine",
119 "Channel 3 invalid state error" },
120 { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR
, "DMA & Engine",
121 "Channel 4 invalid state error" },
122 { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR
, "DMA & Engine",
123 "Channel 5 invalid state error" },
124 { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR
, "DMA & Engine",
125 "Channel 6 invalid state error" },
126 { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR
, "DMA & Engine",
127 "Channel 7 invalid state error" },
128 { NX_CHECKSTOP_DMA_CRB_UE
, "DMA & Engine",
129 "UE error on CRB(CSB address, CCB)" },
130 { NX_CHECKSTOP_DMA_CRB_SUE
, "DMA & Engine",
131 "SUE error on CRB(CSB address, CCB)" },
132 { NX_CHECKSTOP_PBI_ISN_UE
, "PowerBus Interface",
133 "CRB Kill ISN received while holding ISN with UE error" },
137 if (!hmi_evt
->u
.xstop_error
.xstop_reason
) {
138 printk("%s Unknown NX check stop.\n", level
);
142 printk("%s NX checkstop on CHIP ID: %x\n", level
,
143 be32_to_cpu(hmi_evt
->u
.xstop_error
.u
.chip_id
));
144 for (i
= 0; i
< ARRAY_SIZE(xstop_reason
); i
++)
145 if (be32_to_cpu(hmi_evt
->u
.xstop_error
.xstop_reason
) &
146 xstop_reason
[i
].xstop_reason
)
147 printk("%s [Unit: %-3s] %s\n", level
,
148 xstop_reason
[i
].unit_failed
,
149 xstop_reason
[i
].description
);
152 static void print_checkstop_reason(const char *level
,
153 struct OpalHMIEvent
*hmi_evt
)
155 uint8_t type
= hmi_evt
->u
.xstop_error
.xstop_type
;
157 case CHECKSTOP_TYPE_CORE
:
158 print_core_checkstop_reason(level
, hmi_evt
);
160 case CHECKSTOP_TYPE_NX
:
161 print_nx_checkstop_reason(level
, hmi_evt
);
164 printk("%s Unknown Malfunction Alert of type %d\n",
170 static void print_hmi_event_info(struct OpalHMIEvent
*hmi_evt
)
172 const char *level
, *sevstr
, *error_info
;
173 static const char *hmi_error_types
[] = {
175 "Processor Recovery done",
176 "Processor recovery occurred again",
177 "Processor recovery occurred for masked error",
178 "Timer facility experienced an error",
179 "TFMR SPR is corrupted",
180 "UPS (Uniterrupted Power System) Overflow indication",
181 "An XSCOM operation failure",
182 "An XSCOM operation completed",
183 "SCOM has set a reserved FIR bit to cause recovery",
184 "Debug trigger has set a reserved FIR bit to cause recovery",
185 "A hypervisor resource error occurred",
186 "CAPP recovery process is in progress",
189 /* Print things out */
190 if (hmi_evt
->version
< OpalHMIEvt_V1
) {
191 pr_err("HMI Interrupt, Unknown event version %d !\n",
195 switch (hmi_evt
->severity
) {
196 case OpalHMI_SEV_NO_ERROR
:
200 case OpalHMI_SEV_WARNING
:
201 level
= KERN_WARNING
;
204 case OpalHMI_SEV_ERROR_SYNC
:
208 case OpalHMI_SEV_FATAL
:
215 printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
217 hmi_evt
->disposition
== OpalHMI_DISPOSITION_RECOVERED
?
218 "Recovered" : "Not recovered");
219 error_info
= hmi_evt
->type
< ARRAY_SIZE(hmi_error_types
) ?
220 hmi_error_types
[hmi_evt
->type
]
222 printk("%s Error detail: %s\n", level
, error_info
);
223 printk("%s HMER: %016llx\n", level
, be64_to_cpu(hmi_evt
->hmer
));
224 if ((hmi_evt
->type
== OpalHMI_ERROR_TFAC
) ||
225 (hmi_evt
->type
== OpalHMI_ERROR_TFMR_PARITY
))
226 printk("%s TFMR: %016llx\n", level
,
227 be64_to_cpu(hmi_evt
->tfmr
));
229 if (hmi_evt
->version
< OpalHMIEvt_V2
)
232 /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
233 if (hmi_evt
->type
== OpalHMI_ERROR_MALFUNC_ALERT
)
234 print_checkstop_reason(level
, hmi_evt
);
237 static void hmi_event_handler(struct work_struct
*work
)
240 struct OpalHMIEvent
*hmi_evt
;
241 struct OpalHmiEvtNode
*msg_node
;
244 int unrecoverable
= 0;
246 spin_lock_irqsave(&opal_hmi_evt_lock
, flags
);
247 while (!list_empty(&opal_hmi_evt_list
)) {
248 msg_node
= list_entry(opal_hmi_evt_list
.next
,
249 struct OpalHmiEvtNode
, list
);
250 list_del(&msg_node
->list
);
251 spin_unlock_irqrestore(&opal_hmi_evt_lock
, flags
);
253 hmi_evt
= (struct OpalHMIEvent
*) &msg_node
->hmi_evt
;
254 print_hmi_event_info(hmi_evt
);
255 disposition
= hmi_evt
->disposition
;
259 * Check if HMI event has been recovered or not. If not
260 * then kernel can't continue, we need to panic.
261 * But before we do that, display all the HMI event
262 * available on the list and set unrecoverable flag to 1.
264 if (disposition
!= OpalHMI_DISPOSITION_RECOVERED
)
267 spin_lock_irqsave(&opal_hmi_evt_lock
, flags
);
269 spin_unlock_irqrestore(&opal_hmi_evt_lock
, flags
);
272 /* Pull all HMI events from OPAL before we panic. */
273 while (opal_get_msg(__pa(&msg
), sizeof(msg
)) == OPAL_SUCCESS
) {
276 type
= be32_to_cpu(msg
.msg_type
);
278 /* skip if not HMI event */
279 if (type
!= OPAL_MSG_HMI_EVT
)
282 /* HMI event info starts from param[0] */
283 hmi_evt
= (struct OpalHMIEvent
*)&msg
.params
[0];
284 print_hmi_event_info(hmi_evt
);
287 pnv_platform_error_reboot(NULL
, "Unrecoverable HMI exception");
291 static DECLARE_WORK(hmi_event_work
, hmi_event_handler
);
293 * opal_handle_hmi_event - notifier handler that queues up HMI events
294 * to be preocessed later.
296 static int opal_handle_hmi_event(struct notifier_block
*nb
,
297 unsigned long msg_type
, void *msg
)
300 struct OpalHMIEvent
*hmi_evt
;
301 struct opal_msg
*hmi_msg
= msg
;
302 struct OpalHmiEvtNode
*msg_node
;
305 if (msg_type
!= OPAL_MSG_HMI_EVT
)
308 /* HMI event info starts from param[0] */
309 hmi_evt
= (struct OpalHMIEvent
*)&hmi_msg
->params
[0];
311 /* Delay the logging of HMI events to workqueue. */
312 msg_node
= kzalloc(sizeof(*msg_node
), GFP_ATOMIC
);
314 pr_err("HMI: out of memory, Opal message event not handled\n");
317 memcpy(&msg_node
->hmi_evt
, hmi_evt
, sizeof(struct OpalHMIEvent
));
319 spin_lock_irqsave(&opal_hmi_evt_lock
, flags
);
320 list_add(&msg_node
->list
, &opal_hmi_evt_list
);
321 spin_unlock_irqrestore(&opal_hmi_evt_lock
, flags
);
323 schedule_work(&hmi_event_work
);
327 static struct notifier_block opal_hmi_handler_nb
= {
328 .notifier_call
= opal_handle_hmi_event
,
333 int __init
opal_hmi_handler_init(void)
337 if (!opal_hmi_handler_nb_init
) {
338 ret
= opal_message_notifier_register(
339 OPAL_MSG_HMI_EVT
, &opal_hmi_handler_nb
);
341 pr_err("%s: Can't register OPAL event notifier (%d)\n",
345 opal_hmi_handler_nb_init
= 1;