2 * PowerNV OPAL high level interfaces
4 * Copyright 2011 IBM Corp.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #define pr_fmt(fmt) "opal: " fmt
14 #include <linux/printk.h>
15 #include <linux/types.h>
17 #include <linux/of_fdt.h>
18 #include <linux/of_platform.h>
19 #include <linux/interrupt.h>
20 #include <linux/notifier.h>
21 #include <linux/slab.h>
22 #include <linux/sched.h>
23 #include <linux/kobject.h>
24 #include <linux/delay.h>
25 #include <linux/memblock.h>
26 #include <linux/kthread.h>
27 #include <linux/freezer.h>
29 #include <asm/machdep.h>
31 #include <asm/firmware.h>
36 /* /sys/firmware/opal */
37 struct kobject
*opal_kobj
;
45 struct mcheck_recoverable_range
{
51 static struct mcheck_recoverable_range
*mc_recoverable_range
;
52 static int mc_recoverable_range_len
;
54 struct device_node
*opal_node
;
55 static DEFINE_SPINLOCK(opal_write_lock
);
56 static struct atomic_notifier_head opal_msg_notifier_head
[OPAL_MSG_TYPE_MAX
];
57 static uint32_t opal_heartbeat
;
59 static void opal_reinit_cores(void)
61 /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
63 * It will preserve non volatile GPRs and HSPRG0/1. It will
64 * also restore HIDs and other SPRs to their original value
65 * but it might clobber a bunch.
68 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE
);
70 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE
);
74 int __init
early_init_dt_scan_opal(unsigned long node
,
75 const char *uname
, int depth
, void *data
)
77 const void *basep
, *entryp
, *sizep
;
78 int basesz
, entrysz
, runtimesz
;
80 if (depth
!= 1 || strcmp(uname
, "ibm,opal") != 0)
83 basep
= of_get_flat_dt_prop(node
, "opal-base-address", &basesz
);
84 entryp
= of_get_flat_dt_prop(node
, "opal-entry-address", &entrysz
);
85 sizep
= of_get_flat_dt_prop(node
, "opal-runtime-size", &runtimesz
);
87 if (!basep
|| !entryp
|| !sizep
)
90 opal
.base
= of_read_number(basep
, basesz
/4);
91 opal
.entry
= of_read_number(entryp
, entrysz
/4);
92 opal
.size
= of_read_number(sizep
, runtimesz
/4);
94 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n",
95 opal
.base
, basep
, basesz
);
96 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
97 opal
.entry
, entryp
, entrysz
);
98 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
99 opal
.size
, sizep
, runtimesz
);
101 powerpc_firmware_features
|= FW_FEATURE_OPAL
;
102 if (of_flat_dt_is_compatible(node
, "ibm,opal-v3")) {
103 powerpc_firmware_features
|= FW_FEATURE_OPALv2
;
104 powerpc_firmware_features
|= FW_FEATURE_OPALv3
;
105 pr_info("OPAL V3 detected !\n");
106 } else if (of_flat_dt_is_compatible(node
, "ibm,opal-v2")) {
107 powerpc_firmware_features
|= FW_FEATURE_OPALv2
;
108 pr_info("OPAL V2 detected !\n");
110 pr_info("OPAL V1 detected !\n");
113 /* Reinit all cores with the right endian */
116 /* Restore some bits */
117 if (cur_cpu_spec
->cpu_restore
)
118 cur_cpu_spec
->cpu_restore();
123 int __init
early_init_dt_scan_recoverable_ranges(unsigned long node
,
124 const char *uname
, int depth
, void *data
)
129 if (depth
!= 1 || strcmp(uname
, "ibm,opal") != 0)
132 prop
= of_get_flat_dt_prop(node
, "mcheck-recoverable-ranges", &psize
);
137 pr_debug("Found machine check recoverable ranges.\n");
140 * Calculate number of available entries.
142 * Each recoverable address range entry is (start address, len,
143 * recovery address), 2 cells each for start and recovery address,
144 * 1 cell for len, totalling 5 cells per entry.
146 mc_recoverable_range_len
= psize
/ (sizeof(*prop
) * 5);
149 if (!mc_recoverable_range_len
)
152 /* Size required to hold all the entries. */
153 size
= mc_recoverable_range_len
*
154 sizeof(struct mcheck_recoverable_range
);
157 * Allocate a buffer to hold the MC recoverable ranges. We would be
158 * accessing them in real mode, hence it needs to be within
161 mc_recoverable_range
=__va(memblock_alloc_base(size
, __alignof__(u64
),
163 memset(mc_recoverable_range
, 0, size
);
165 for (i
= 0; i
< mc_recoverable_range_len
; i
++) {
166 mc_recoverable_range
[i
].start_addr
=
167 of_read_number(prop
+ (i
* 5) + 0, 2);
168 mc_recoverable_range
[i
].end_addr
=
169 mc_recoverable_range
[i
].start_addr
+
170 of_read_number(prop
+ (i
* 5) + 2, 1);
171 mc_recoverable_range
[i
].recover_addr
=
172 of_read_number(prop
+ (i
* 5) + 3, 2);
174 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
175 mc_recoverable_range
[i
].start_addr
,
176 mc_recoverable_range
[i
].end_addr
,
177 mc_recoverable_range
[i
].recover_addr
);
182 static int __init
opal_register_exception_handlers(void)
184 #ifdef __BIG_ENDIAN__
187 if (!(powerpc_firmware_features
& FW_FEATURE_OPAL
))
190 /* Hookup some exception handlers except machine check. We use the
191 * fwnmi area at 0x7000 to provide the glue space to OPAL
196 * Check if we are running on newer firmware that exports
197 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
198 * the HMI interrupt and we catch it directly in Linux.
200 * For older firmware (i.e currently released POWER8 System Firmware
201 * as of today <= SV810_087), we fallback to old behavior and let OPAL
202 * patch the HMI vector and handle it inside OPAL firmware.
204 * For newer firmware (in development/yet to be released) we will
205 * start catching/handling HMI directly in Linux.
207 if (!opal_check_token(OPAL_HANDLE_HMI
)) {
208 pr_info("Old firmware detected, OPAL handles HMIs.\n");
209 opal_register_exception_handler(
210 OPAL_HYPERVISOR_MAINTENANCE_HANDLER
,
215 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER
, 0, glue
);
220 machine_early_initcall(powernv
, opal_register_exception_handlers
);
223 * Opal message notifier based on message type. Allow subscribers to get
224 * notified for specific messgae type.
226 int opal_message_notifier_register(enum opal_msg_type msg_type
,
227 struct notifier_block
*nb
)
229 if (!nb
|| msg_type
>= OPAL_MSG_TYPE_MAX
) {
230 pr_warning("%s: Invalid arguments, msg_type:%d\n",
235 return atomic_notifier_chain_register(
236 &opal_msg_notifier_head
[msg_type
], nb
);
239 int opal_message_notifier_unregister(enum opal_msg_type msg_type
,
240 struct notifier_block
*nb
)
242 return atomic_notifier_chain_unregister(
243 &opal_msg_notifier_head
[msg_type
], nb
);
246 static void opal_message_do_notify(uint32_t msg_type
, void *msg
)
248 /* notify subscribers */
249 atomic_notifier_call_chain(&opal_msg_notifier_head
[msg_type
],
253 static void opal_handle_message(void)
257 * TODO: pre-allocate a message buffer depending on opal-msg-size
258 * value in /proc/device-tree.
260 static struct opal_msg msg
;
263 ret
= opal_get_msg(__pa(&msg
), sizeof(msg
));
264 /* No opal message pending. */
265 if (ret
== OPAL_RESOURCE
)
268 /* check for errors. */
270 pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
275 type
= be32_to_cpu(msg
.msg_type
);
278 if (type
>= OPAL_MSG_TYPE_MAX
) {
279 pr_warning("%s: Unknown message type: %u\n", __func__
, type
);
282 opal_message_do_notify(type
, (void *)&msg
);
285 static irqreturn_t
opal_message_notify(int irq
, void *data
)
287 opal_handle_message();
291 static int __init
opal_message_init(void)
295 for (i
= 0; i
< OPAL_MSG_TYPE_MAX
; i
++)
296 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head
[i
]);
298 irq
= opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING
));
300 pr_err("%s: Can't register OPAL event irq (%d)\n",
305 ret
= request_irq(irq
, opal_message_notify
,
306 IRQ_TYPE_LEVEL_HIGH
, "opal-msg", NULL
);
308 pr_err("%s: Can't request OPAL event irq (%d)\n",
316 int opal_get_chars(uint32_t vtermno
, char *buf
, int count
)
323 opal_poll_events(&evt
);
324 if ((be64_to_cpu(evt
) & OPAL_EVENT_CONSOLE_INPUT
) == 0)
326 len
= cpu_to_be64(count
);
327 rc
= opal_console_read(vtermno
, &len
, buf
);
328 if (rc
== OPAL_SUCCESS
)
329 return be64_to_cpu(len
);
333 int opal_put_chars(uint32_t vtermno
, const char *data
, int total_len
)
344 /* We want put_chars to be atomic to avoid mangling of hvsi
345 * packets. To do that, we first test for room and return
346 * -EAGAIN if there isn't enough.
348 * Unfortunately, opal_console_write_buffer_space() doesn't
349 * appear to work on opal v1, so we just assume there is
350 * enough room and be done with it
352 spin_lock_irqsave(&opal_write_lock
, flags
);
353 if (firmware_has_feature(FW_FEATURE_OPALv2
)) {
354 rc
= opal_console_write_buffer_space(vtermno
, &olen
);
355 len
= be64_to_cpu(olen
);
356 if (rc
|| len
< total_len
) {
357 spin_unlock_irqrestore(&opal_write_lock
, flags
);
358 /* Closed -> drop characters */
361 opal_poll_events(NULL
);
366 /* We still try to handle partial completions, though they
367 * should no longer happen.
370 while(total_len
> 0 && (rc
== OPAL_BUSY
||
371 rc
== OPAL_BUSY_EVENT
|| rc
== OPAL_SUCCESS
)) {
372 olen
= cpu_to_be64(total_len
);
373 rc
= opal_console_write(vtermno
, &olen
, data
);
374 len
= be64_to_cpu(olen
);
376 /* Closed or other error drop */
377 if (rc
!= OPAL_SUCCESS
&& rc
!= OPAL_BUSY
&&
378 rc
!= OPAL_BUSY_EVENT
) {
382 if (rc
== OPAL_SUCCESS
) {
387 /* This is a bit nasty but we need that for the console to
388 * flush when there aren't any interrupts. We will clean
389 * things a bit later to limit that to synchronous path
390 * such as the kernel console and xmon/udbg
393 opal_poll_events(&evt
);
394 while(rc
== OPAL_SUCCESS
&&
395 (be64_to_cpu(evt
) & OPAL_EVENT_CONSOLE_OUTPUT
));
397 spin_unlock_irqrestore(&opal_write_lock
, flags
);
401 static int opal_recover_mce(struct pt_regs
*regs
,
402 struct machine_check_event
*evt
)
405 uint64_t ea
= get_mce_fault_addr(evt
);
407 if (!(regs
->msr
& MSR_RI
)) {
408 /* If MSR_RI isn't set, we cannot recover */
410 } else if (evt
->disposition
== MCE_DISPOSITION_RECOVERED
) {
411 /* Platform corrected itself */
413 } else if (ea
&& !is_kernel_addr(ea
)) {
415 * Faulting address is not in kernel text. We should be fine.
416 * We need to find which process uses this address.
417 * For now, kill the task if we have received exception when
420 * TODO: Queue up this address for hwpoisioning later.
422 if (user_mode(regs
) && !is_global_init(current
)) {
423 _exception(SIGBUS
, regs
, BUS_MCEERR_AR
, regs
->nip
);
427 } else if (user_mode(regs
) && !is_global_init(current
) &&
428 evt
->severity
== MCE_SEV_ERROR_SYNC
) {
430 * If we have received a synchronous error when in userspace
433 _exception(SIGBUS
, regs
, BUS_MCEERR_AR
, regs
->nip
);
439 int opal_machine_check(struct pt_regs
*regs
)
441 struct machine_check_event evt
;
443 if (!get_mce_event(&evt
, MCE_EVENT_RELEASE
))
446 /* Print things out */
447 if (evt
.version
!= MCE_V1
) {
448 pr_err("Machine Check Exception, Unknown event version %d !\n",
452 machine_check_print_event_info(&evt
);
454 if (opal_recover_mce(regs
, &evt
))
459 /* Early hmi handler called in real mode. */
460 int opal_hmi_exception_early(struct pt_regs
*regs
)
465 * call opal hmi handler. Pass paca address as token.
466 * The return value OPAL_SUCCESS is an indication that there is
467 * an HMI event generated waiting to pull by Linux.
469 rc
= opal_handle_hmi();
470 if (rc
== OPAL_SUCCESS
) {
471 local_paca
->hmi_event_available
= 1;
477 /* HMI exception handler called in virtual mode during check_irq_replay. */
478 int opal_handle_hmi_exception(struct pt_regs
*regs
)
484 * Check if HMI event is available.
485 * if Yes, then call opal_poll_events to pull opal messages and
488 if (!local_paca
->hmi_event_available
)
491 local_paca
->hmi_event_available
= 0;
492 rc
= opal_poll_events(&evt
);
493 if (rc
== OPAL_SUCCESS
&& evt
)
494 opal_handle_events(be64_to_cpu(evt
));
499 static uint64_t find_recovery_address(uint64_t nip
)
503 for (i
= 0; i
< mc_recoverable_range_len
; i
++)
504 if ((nip
>= mc_recoverable_range
[i
].start_addr
) &&
505 (nip
< mc_recoverable_range
[i
].end_addr
))
506 return mc_recoverable_range
[i
].recover_addr
;
510 bool opal_mce_check_early_recovery(struct pt_regs
*regs
)
512 uint64_t recover_addr
= 0;
514 if (!opal
.base
|| !opal
.size
)
517 if ((regs
->nip
>= opal
.base
) &&
518 (regs
->nip
<= (opal
.base
+ opal
.size
)))
519 recover_addr
= find_recovery_address(regs
->nip
);
522 * Setup regs->nip to rfi into fixup address.
525 regs
->nip
= recover_addr
;
528 return !!recover_addr
;
531 static int opal_sysfs_init(void)
533 opal_kobj
= kobject_create_and_add("opal", firmware_kobj
);
535 pr_warn("kobject_create_and_add opal failed\n");
542 static ssize_t
symbol_map_read(struct file
*fp
, struct kobject
*kobj
,
543 struct bin_attribute
*bin_attr
,
544 char *buf
, loff_t off
, size_t count
)
546 return memory_read_from_buffer(buf
, count
, &off
, bin_attr
->private,
550 static BIN_ATTR_RO(symbol_map
, 0);
552 static void opal_export_symmap(void)
556 struct device_node
*fw
;
559 fw
= of_find_node_by_path("/ibm,opal/firmware");
562 syms
= of_get_property(fw
, "symbol-map", &size
);
563 if (!syms
|| size
!= 2 * sizeof(__be64
))
566 /* Setup attributes */
567 bin_attr_symbol_map
.private = __va(be64_to_cpu(syms
[0]));
568 bin_attr_symbol_map
.size
= be64_to_cpu(syms
[1]);
570 rc
= sysfs_create_bin_file(opal_kobj
, &bin_attr_symbol_map
);
572 pr_warn("Error %d creating OPAL symbols file\n", rc
);
575 static void __init
opal_dump_region_init(void)
581 if (!opal_check_token(OPAL_REGISTER_DUMP_REGION
))
584 /* Register kernel log buffer */
585 addr
= log_buf_addr_get();
589 size
= log_buf_len_get();
593 rc
= opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF
,
595 /* Don't warn if this is just an older OPAL that doesn't
596 * know about that call
598 if (rc
&& rc
!= OPAL_UNSUPPORTED
)
599 pr_warn("DUMP: Failed to register kernel log buffer. "
603 static void opal_pdev_init(struct device_node
*opal_node
,
604 const char *compatible
)
606 struct device_node
*np
;
608 for_each_child_of_node(opal_node
, np
)
609 if (of_device_is_compatible(np
, compatible
))
610 of_platform_device_create(np
, NULL
, NULL
);
613 static void opal_i2c_create_devs(void)
615 struct device_node
*np
;
617 for_each_compatible_node(np
, NULL
, "ibm,opal-i2c")
618 of_platform_device_create(np
, NULL
, NULL
);
621 static int kopald(void *unused
)
628 opal_poll_events(&events
);
629 opal_handle_events(be64_to_cpu(events
));
630 msleep_interruptible(opal_heartbeat
);
631 } while (!kthread_should_stop());
636 static void opal_init_heartbeat(void)
638 /* Old firwmware, we assume the HVC heartbeat is sufficient */
639 if (of_property_read_u32(opal_node
, "ibm,heartbeat-ms",
640 &opal_heartbeat
) != 0)
644 kthread_run(kopald
, NULL
, "kopald");
647 static int __init
opal_init(void)
649 struct device_node
*np
, *consoles
;
652 opal_node
= of_find_node_by_path("/ibm,opal");
654 pr_warn("Device node not found\n");
658 /* Initialise OPAL events */
661 /* Register OPAL consoles if any ports */
662 if (firmware_has_feature(FW_FEATURE_OPALv2
))
663 consoles
= of_find_node_by_path("/ibm,opal/consoles");
665 consoles
= of_node_get(opal_node
);
667 for_each_child_of_node(consoles
, np
) {
668 if (strcmp(np
->name
, "serial"))
670 of_platform_device_create(np
, NULL
, NULL
);
672 of_node_put(consoles
);
675 /* Initialise OPAL messaging system */
678 /* Initialise OPAL asynchronous completion interface */
679 opal_async_comp_init();
681 /* Initialise OPAL sensor interface */
684 /* Initialise OPAL hypervisor maintainence interrupt handling */
685 opal_hmi_handler_init();
687 /* Create i2c platform devices */
688 opal_i2c_create_devs();
690 /* Setup a heatbeat thread if requested by OPAL */
691 opal_init_heartbeat();
693 /* Create "opal" kobject under /sys/firmware */
694 rc
= opal_sysfs_init();
696 /* Export symbol map to userspace */
697 opal_export_symmap();
698 /* Setup dump region interface */
699 opal_dump_region_init();
700 /* Setup error log interface */
701 rc
= opal_elog_init();
702 /* Setup code update interface */
703 opal_flash_update_init();
704 /* Setup platform dump extract interface */
705 opal_platform_dump_init();
706 /* Setup system parameters interface */
707 opal_sys_param_init();
708 /* Setup message log interface. */
712 /* Initialize platform devices: IPMI backend & flash interface */
713 opal_pdev_init(opal_node
, "ibm,opal-ipmi");
714 opal_pdev_init(opal_node
, "ibm,opal-flash");
718 machine_subsys_initcall(powernv
, opal_init
);
720 void opal_shutdown(void)
724 opal_event_shutdown();
727 * Then sync with OPAL which ensure anything that can
728 * potentially write to our memory has completed such
729 * as an ongoing dump retrieval
731 while (rc
== OPAL_BUSY
|| rc
== OPAL_BUSY_EVENT
) {
732 rc
= opal_sync_host_reboot();
734 opal_poll_events(NULL
);
739 /* Unregister memory dump region */
740 if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION
))
741 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF
);
744 /* Export this so that test modules can use it */
745 EXPORT_SYMBOL_GPL(opal_invalid_call
);
746 EXPORT_SYMBOL_GPL(opal_ipmi_send
);
747 EXPORT_SYMBOL_GPL(opal_ipmi_recv
);
748 EXPORT_SYMBOL_GPL(opal_flash_read
);
749 EXPORT_SYMBOL_GPL(opal_flash_write
);
750 EXPORT_SYMBOL_GPL(opal_flash_erase
);
752 /* Convert a region of vmalloc memory to an opal sg list */
753 struct opal_sg_list
*opal_vmalloc_to_sg_list(void *vmalloc_addr
,
754 unsigned long vmalloc_size
)
756 struct opal_sg_list
*sg
, *first
= NULL
;
759 sg
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
765 while (vmalloc_size
> 0) {
766 uint64_t data
= vmalloc_to_pfn(vmalloc_addr
) << PAGE_SHIFT
;
767 uint64_t length
= min(vmalloc_size
, PAGE_SIZE
);
769 sg
->entry
[i
].data
= cpu_to_be64(data
);
770 sg
->entry
[i
].length
= cpu_to_be64(length
);
773 if (i
>= SG_ENTRIES_PER_NODE
) {
774 struct opal_sg_list
*next
;
776 next
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
780 sg
->length
= cpu_to_be64(
781 i
* sizeof(struct opal_sg_entry
) + 16);
783 sg
->next
= cpu_to_be64(__pa(next
));
787 vmalloc_addr
+= length
;
788 vmalloc_size
-= length
;
791 sg
->length
= cpu_to_be64(i
* sizeof(struct opal_sg_entry
) + 16);
796 pr_err("%s : Failed to allocate memory\n", __func__
);
797 opal_free_sg_list(first
);
801 void opal_free_sg_list(struct opal_sg_list
*sg
)
804 uint64_t next
= be64_to_cpu(sg
->next
);
815 int opal_error_code(int rc
)
818 case OPAL_SUCCESS
: return 0;
820 case OPAL_PARAMETER
: return -EINVAL
;
821 case OPAL_ASYNC_COMPLETION
: return -EINPROGRESS
;
822 case OPAL_BUSY_EVENT
: return -EBUSY
;
823 case OPAL_NO_MEM
: return -ENOMEM
;
824 case OPAL_PERMISSION
: return -EPERM
;
826 case OPAL_UNSUPPORTED
: return -EIO
;
827 case OPAL_HARDWARE
: return -EIO
;
828 case OPAL_INTERNAL_ERROR
: return -EIO
;
830 pr_err("%s: unexpected OPAL error %d\n", __func__
, rc
);
835 EXPORT_SYMBOL_GPL(opal_poll_events
);
836 EXPORT_SYMBOL_GPL(opal_rtc_read
);
837 EXPORT_SYMBOL_GPL(opal_rtc_write
);
838 EXPORT_SYMBOL_GPL(opal_tpo_read
);
839 EXPORT_SYMBOL_GPL(opal_tpo_write
);
840 EXPORT_SYMBOL_GPL(opal_i2c_request
);