1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Procedures for interfacing to the RTAS on CHRP machines.
6 * Peter Bergner, IBM March 2001.
7 * Copyright (C) 2001 IBM.
10 #define pr_fmt(fmt) "rtas: " fmt
12 #include <linux/bsearch.h>
13 #include <linux/capability.h>
14 #include <linux/delay.h>
15 #include <linux/export.h>
16 #include <linux/init.h>
17 #include <linux/kconfig.h>
18 #include <linux/kernel.h>
19 #include <linux/lockdep.h>
20 #include <linux/memblock.h>
21 #include <linux/mutex.h>
22 #include <linux/nospec.h>
24 #include <linux/of_fdt.h>
25 #include <linux/reboot.h>
26 #include <linux/sched.h>
27 #include <linux/security.h>
28 #include <linux/slab.h>
29 #include <linux/spinlock.h>
30 #include <linux/stdarg.h>
31 #include <linux/syscalls.h>
32 #include <linux/types.h>
33 #include <linux/uaccess.h>
34 #include <linux/xarray.h>
36 #include <asm/delay.h>
37 #include <asm/firmware.h>
38 #include <asm/interrupt.h>
39 #include <asm/machdep.h>
42 #include <asm/rtas-work-area.h>
45 #include <asm/trace.h>
49 /* Indexes into the args buffer, -1 if not used */
55 * Assumed buffer size per the spec if the function does not
56 * have a size parameter, e.g. ibm,errinjct. 0 if unused.
62 * struct rtas_function - Descriptor for RTAS functions.
64 * @token: Value of @name if it exists under the /rtas node.
65 * @name: Function name.
66 * @filter: If non-NULL, invoking this function via the rtas syscall is
67 * generally allowed, and @filter describes constraints on the
68 * arguments. See also @banned_for_syscall_on_le.
69 * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed
70 * but specifically restricted on ppc64le. Such
71 * functions are believed to have no users on
72 * ppc64le, and we want to keep it that way. It does
73 * not make sense for this to be set when @filter
75 * @lock: Pointer to an optional dedicated per-function mutex. This
76 * should be set for functions that require multiple calls in
77 * sequence to complete a single operation, and such sequences
78 * will disrupt each other if allowed to interleave. Users of
79 * this function are required to hold the associated lock for
80 * the duration of the call sequence. Add an explanatory
81 * comment to the function table entry if setting this member.
83 struct rtas_function
{
85 const bool banned_for_syscall_on_le
:1;
86 const char * const name
;
87 const struct rtas_filter
*filter
;
92 * Per-function locks for sequence-based RTAS functions.
94 static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock
);
95 static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock
);
96 static DEFINE_MUTEX(rtas_ibm_get_indices_lock
);
97 static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock
);
98 static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock
);
99 static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock
);
100 DEFINE_MUTEX(rtas_ibm_get_vpd_lock
);
102 static struct rtas_function rtas_function_table
[] __ro_after_init
= {
103 [RTAS_FNIDX__CHECK_EXCEPTION
] = {
104 .name
= "check-exception",
106 [RTAS_FNIDX__DISPLAY_CHARACTER
] = {
107 .name
= "display-character",
108 .filter
= &(const struct rtas_filter
) {
109 .buf_idx1
= -1, .size_idx1
= -1,
110 .buf_idx2
= -1, .size_idx2
= -1,
113 [RTAS_FNIDX__EVENT_SCAN
] = {
114 .name
= "event-scan",
116 [RTAS_FNIDX__FREEZE_TIME_BASE
] = {
117 .name
= "freeze-time-base",
119 [RTAS_FNIDX__GET_POWER_LEVEL
] = {
120 .name
= "get-power-level",
121 .filter
= &(const struct rtas_filter
) {
122 .buf_idx1
= -1, .size_idx1
= -1,
123 .buf_idx2
= -1, .size_idx2
= -1,
126 [RTAS_FNIDX__GET_SENSOR_STATE
] = {
127 .name
= "get-sensor-state",
128 .filter
= &(const struct rtas_filter
) {
129 .buf_idx1
= -1, .size_idx1
= -1,
130 .buf_idx2
= -1, .size_idx2
= -1,
133 [RTAS_FNIDX__GET_TERM_CHAR
] = {
134 .name
= "get-term-char",
136 [RTAS_FNIDX__GET_TIME_OF_DAY
] = {
137 .name
= "get-time-of-day",
138 .filter
= &(const struct rtas_filter
) {
139 .buf_idx1
= -1, .size_idx1
= -1,
140 .buf_idx2
= -1, .size_idx2
= -1,
143 [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE
] = {
144 .name
= "ibm,activate-firmware",
145 .filter
= &(const struct rtas_filter
) {
146 .buf_idx1
= -1, .size_idx1
= -1,
147 .buf_idx2
= -1, .size_idx2
= -1,
150 * PAPR+ as of v2.13 doesn't explicitly impose any
151 * restriction, but this typically requires multiple
152 * calls before success, and there's no reason to
153 * allow sequences to interleave.
155 .lock
= &rtas_ibm_activate_firmware_lock
,
157 [RTAS_FNIDX__IBM_CBE_START_PTCAL
] = {
158 .name
= "ibm,cbe-start-ptcal",
160 [RTAS_FNIDX__IBM_CBE_STOP_PTCAL
] = {
161 .name
= "ibm,cbe-stop-ptcal",
163 [RTAS_FNIDX__IBM_CHANGE_MSI
] = {
164 .name
= "ibm,change-msi",
166 [RTAS_FNIDX__IBM_CLOSE_ERRINJCT
] = {
167 .name
= "ibm,close-errinjct",
168 .filter
= &(const struct rtas_filter
) {
169 .buf_idx1
= -1, .size_idx1
= -1,
170 .buf_idx2
= -1, .size_idx2
= -1,
173 [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE
] = {
174 .name
= "ibm,configure-bridge",
176 [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR
] = {
177 .name
= "ibm,configure-connector",
178 .filter
= &(const struct rtas_filter
) {
179 .buf_idx1
= 0, .size_idx1
= -1,
180 .buf_idx2
= 1, .size_idx2
= -1,
184 [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP
] = {
185 .name
= "ibm,configure-kernel-dump",
187 [RTAS_FNIDX__IBM_CONFIGURE_PE
] = {
188 .name
= "ibm,configure-pe",
190 [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW
] = {
191 .name
= "ibm,create-pe-dma-window",
193 [RTAS_FNIDX__IBM_DISPLAY_MESSAGE
] = {
194 .name
= "ibm,display-message",
195 .filter
= &(const struct rtas_filter
) {
196 .buf_idx1
= 0, .size_idx1
= -1,
197 .buf_idx2
= -1, .size_idx2
= -1,
200 [RTAS_FNIDX__IBM_ERRINJCT
] = {
201 .name
= "ibm,errinjct",
202 .filter
= &(const struct rtas_filter
) {
203 .buf_idx1
= 2, .size_idx1
= -1,
204 .buf_idx2
= -1, .size_idx2
= -1,
208 [RTAS_FNIDX__IBM_EXTI2C
] = {
209 .name
= "ibm,exti2c",
211 [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO
] = {
212 .name
= "ibm,get-config-addr-info",
214 [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2
] = {
215 .name
= "ibm,get-config-addr-info2",
216 .filter
= &(const struct rtas_filter
) {
217 .buf_idx1
= -1, .size_idx1
= -1,
218 .buf_idx2
= -1, .size_idx2
= -1,
221 [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE
] = {
222 .name
= "ibm,get-dynamic-sensor-state",
223 .filter
= &(const struct rtas_filter
) {
224 .buf_idx1
= 1, .size_idx1
= -1,
225 .buf_idx2
= -1, .size_idx2
= -1,
228 * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS
229 * must not call ibm,get-dynamic-sensor-state with
230 * different inputs until a non-retry status has been
233 .lock
= &rtas_ibm_get_dynamic_sensor_state_lock
,
235 [RTAS_FNIDX__IBM_GET_INDICES
] = {
236 .name
= "ibm,get-indices",
237 .filter
= &(const struct rtas_filter
) {
238 .buf_idx1
= 2, .size_idx1
= 3,
239 .buf_idx2
= -1, .size_idx2
= -1,
242 * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not
243 * interleave ibm,get-indices call sequences with
246 .lock
= &rtas_ibm_get_indices_lock
,
248 [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY
] = {
249 .name
= "ibm,get-rio-topology",
251 [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER
] = {
252 .name
= "ibm,get-system-parameter",
253 .filter
= &(const struct rtas_filter
) {
254 .buf_idx1
= 1, .size_idx1
= 2,
255 .buf_idx2
= -1, .size_idx2
= -1,
258 [RTAS_FNIDX__IBM_GET_VPD
] = {
259 .name
= "ibm,get-vpd",
260 .filter
= &(const struct rtas_filter
) {
261 .buf_idx1
= 0, .size_idx1
= -1,
262 .buf_idx2
= 1, .size_idx2
= 2,
265 * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences
266 * should not be allowed to interleave.
268 .lock
= &rtas_ibm_get_vpd_lock
,
270 [RTAS_FNIDX__IBM_GET_XIVE
] = {
271 .name
= "ibm,get-xive",
273 [RTAS_FNIDX__IBM_INT_OFF
] = {
274 .name
= "ibm,int-off",
276 [RTAS_FNIDX__IBM_INT_ON
] = {
277 .name
= "ibm,int-on",
279 [RTAS_FNIDX__IBM_IO_QUIESCE_ACK
] = {
280 .name
= "ibm,io-quiesce-ack",
282 [RTAS_FNIDX__IBM_LPAR_PERFTOOLS
] = {
283 .name
= "ibm,lpar-perftools",
284 .filter
= &(const struct rtas_filter
) {
285 .buf_idx1
= 2, .size_idx1
= 3,
286 .buf_idx2
= -1, .size_idx2
= -1,
289 * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow
290 * only one call sequence in progress at a time.
292 .lock
= &rtas_ibm_lpar_perftools_lock
,
294 [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE
] = {
295 .name
= "ibm,manage-flash-image",
297 [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION
] = {
298 .name
= "ibm,manage-storage-preservation",
300 [RTAS_FNIDX__IBM_NMI_INTERLOCK
] = {
301 .name
= "ibm,nmi-interlock",
303 [RTAS_FNIDX__IBM_NMI_REGISTER
] = {
304 .name
= "ibm,nmi-register",
306 [RTAS_FNIDX__IBM_OPEN_ERRINJCT
] = {
307 .name
= "ibm,open-errinjct",
308 .filter
= &(const struct rtas_filter
) {
309 .buf_idx1
= -1, .size_idx1
= -1,
310 .buf_idx2
= -1, .size_idx2
= -1,
313 [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE
] = {
314 .name
= "ibm,open-sriov-allow-unfreeze",
316 [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER
] = {
317 .name
= "ibm,open-sriov-map-pe-number",
319 [RTAS_FNIDX__IBM_OS_TERM
] = {
320 .name
= "ibm,os-term",
322 [RTAS_FNIDX__IBM_PARTNER_CONTROL
] = {
323 .name
= "ibm,partner-control",
325 [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION
] = {
326 .name
= "ibm,physical-attestation",
327 .filter
= &(const struct rtas_filter
) {
328 .buf_idx1
= 0, .size_idx1
= 1,
329 .buf_idx2
= -1, .size_idx2
= -1,
332 * This follows a sequence-based pattern similar to
333 * ibm,get-vpd et al. Since PAPR+ restricts
334 * interleaving call sequences for other functions of
335 * this style, assume the restriction applies here,
336 * even though it's not explicit in the spec.
338 .lock
= &rtas_ibm_physical_attestation_lock
,
340 [RTAS_FNIDX__IBM_PLATFORM_DUMP
] = {
341 .name
= "ibm,platform-dump",
342 .filter
= &(const struct rtas_filter
) {
343 .buf_idx1
= 4, .size_idx1
= 5,
344 .buf_idx2
= -1, .size_idx2
= -1,
347 * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent
348 * sequences of ibm,platform-dump are allowed if they
349 * are operating on different dump tags. So leave the
350 * lock pointer unset for now. This may need
351 * reconsideration if kernel-internal users appear.
354 [RTAS_FNIDX__IBM_POWER_OFF_UPS
] = {
355 .name
= "ibm,power-off-ups",
357 [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER
] = {
358 .name
= "ibm,query-interrupt-source-number",
360 [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW
] = {
361 .name
= "ibm,query-pe-dma-window",
363 [RTAS_FNIDX__IBM_READ_PCI_CONFIG
] = {
364 .name
= "ibm,read-pci-config",
366 [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE
] = {
367 .name
= "ibm,read-slot-reset-state",
368 .filter
= &(const struct rtas_filter
) {
369 .buf_idx1
= -1, .size_idx1
= -1,
370 .buf_idx2
= -1, .size_idx2
= -1,
373 [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2
] = {
374 .name
= "ibm,read-slot-reset-state2",
376 [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW
] = {
377 .name
= "ibm,remove-pe-dma-window",
379 [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW
] = {
381 * Note: PAPR+ v2.13 7.3.31.4.1 spells this as
382 * "ibm,reset-pe-dma-windows" (plural), but RTAS
383 * implementations use the singular form in practice.
385 .name
= "ibm,reset-pe-dma-window",
387 [RTAS_FNIDX__IBM_SCAN_LOG_DUMP
] = {
388 .name
= "ibm,scan-log-dump",
389 .filter
= &(const struct rtas_filter
) {
390 .buf_idx1
= 0, .size_idx1
= 1,
391 .buf_idx2
= -1, .size_idx2
= -1,
394 [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR
] = {
395 .name
= "ibm,set-dynamic-indicator",
396 .filter
= &(const struct rtas_filter
) {
397 .buf_idx1
= 2, .size_idx1
= -1,
398 .buf_idx2
= -1, .size_idx2
= -1,
401 * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call
402 * this function with different inputs until a
403 * non-retry status has been returned.
405 .lock
= &rtas_ibm_set_dynamic_indicator_lock
,
407 [RTAS_FNIDX__IBM_SET_EEH_OPTION
] = {
408 .name
= "ibm,set-eeh-option",
409 .filter
= &(const struct rtas_filter
) {
410 .buf_idx1
= -1, .size_idx1
= -1,
411 .buf_idx2
= -1, .size_idx2
= -1,
414 [RTAS_FNIDX__IBM_SET_SLOT_RESET
] = {
415 .name
= "ibm,set-slot-reset",
417 [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER
] = {
418 .name
= "ibm,set-system-parameter",
419 .filter
= &(const struct rtas_filter
) {
420 .buf_idx1
= 1, .size_idx1
= -1,
421 .buf_idx2
= -1, .size_idx2
= -1,
424 [RTAS_FNIDX__IBM_SET_XIVE
] = {
425 .name
= "ibm,set-xive",
427 [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL
] = {
428 .name
= "ibm,slot-error-detail",
430 [RTAS_FNIDX__IBM_SUSPEND_ME
] = {
431 .name
= "ibm,suspend-me",
432 .banned_for_syscall_on_le
= true,
433 .filter
= &(const struct rtas_filter
) {
434 .buf_idx1
= -1, .size_idx1
= -1,
435 .buf_idx2
= -1, .size_idx2
= -1,
438 [RTAS_FNIDX__IBM_TUNE_DMA_PARMS
] = {
439 .name
= "ibm,tune-dma-parms",
441 [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT
] = {
442 .name
= "ibm,update-flash-64-and-reboot",
444 [RTAS_FNIDX__IBM_UPDATE_NODES
] = {
445 .name
= "ibm,update-nodes",
446 .banned_for_syscall_on_le
= true,
447 .filter
= &(const struct rtas_filter
) {
448 .buf_idx1
= 0, .size_idx1
= -1,
449 .buf_idx2
= -1, .size_idx2
= -1,
453 [RTAS_FNIDX__IBM_UPDATE_PROPERTIES
] = {
454 .name
= "ibm,update-properties",
455 .banned_for_syscall_on_le
= true,
456 .filter
= &(const struct rtas_filter
) {
457 .buf_idx1
= 0, .size_idx1
= -1,
458 .buf_idx2
= -1, .size_idx2
= -1,
462 [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE
] = {
463 .name
= "ibm,validate-flash-image",
465 [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG
] = {
466 .name
= "ibm,write-pci-config",
468 [RTAS_FNIDX__NVRAM_FETCH
] = {
469 .name
= "nvram-fetch",
471 [RTAS_FNIDX__NVRAM_STORE
] = {
472 .name
= "nvram-store",
474 [RTAS_FNIDX__POWER_OFF
] = {
477 [RTAS_FNIDX__PUT_TERM_CHAR
] = {
478 .name
= "put-term-char",
480 [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE
] = {
481 .name
= "query-cpu-stopped-state",
483 [RTAS_FNIDX__READ_PCI_CONFIG
] = {
484 .name
= "read-pci-config",
486 [RTAS_FNIDX__RTAS_LAST_ERROR
] = {
487 .name
= "rtas-last-error",
489 [RTAS_FNIDX__SET_INDICATOR
] = {
490 .name
= "set-indicator",
491 .filter
= &(const struct rtas_filter
) {
492 .buf_idx1
= -1, .size_idx1
= -1,
493 .buf_idx2
= -1, .size_idx2
= -1,
496 [RTAS_FNIDX__SET_POWER_LEVEL
] = {
497 .name
= "set-power-level",
498 .filter
= &(const struct rtas_filter
) {
499 .buf_idx1
= -1, .size_idx1
= -1,
500 .buf_idx2
= -1, .size_idx2
= -1,
503 [RTAS_FNIDX__SET_TIME_FOR_POWER_ON
] = {
504 .name
= "set-time-for-power-on",
505 .filter
= &(const struct rtas_filter
) {
506 .buf_idx1
= -1, .size_idx1
= -1,
507 .buf_idx2
= -1, .size_idx2
= -1,
510 [RTAS_FNIDX__SET_TIME_OF_DAY
] = {
511 .name
= "set-time-of-day",
512 .filter
= &(const struct rtas_filter
) {
513 .buf_idx1
= -1, .size_idx1
= -1,
514 .buf_idx2
= -1, .size_idx2
= -1,
517 [RTAS_FNIDX__START_CPU
] = {
520 [RTAS_FNIDX__STOP_SELF
] = {
523 [RTAS_FNIDX__SYSTEM_REBOOT
] = {
524 .name
= "system-reboot",
526 [RTAS_FNIDX__THAW_TIME_BASE
] = {
527 .name
= "thaw-time-base",
529 [RTAS_FNIDX__WRITE_PCI_CONFIG
] = {
530 .name
= "write-pci-config",
534 #define for_each_rtas_function(funcp) \
535 for (funcp = &rtas_function_table[0]; \
536 funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
540 * Nearly all RTAS calls need to be serialized. All uses of the
541 * default rtas_args block must hold rtas_lock.
543 * Exceptions to the RTAS serialization requirement (e.g. stop-self)
544 * must use a separate rtas_args structure.
546 static DEFINE_RAW_SPINLOCK(rtas_lock
);
547 static struct rtas_args rtas_args
;
550 * rtas_function_token() - RTAS function token lookup.
551 * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
553 * Context: Any context.
554 * Return: the token value for the function if implemented by this platform,
555 * otherwise RTAS_UNKNOWN_SERVICE.
557 s32
rtas_function_token(const rtas_fn_handle_t handle
)
559 const size_t index
= handle
.index
;
560 const bool out_of_bounds
= index
>= ARRAY_SIZE(rtas_function_table
);
562 if (WARN_ONCE(out_of_bounds
, "invalid function index %zu", index
))
563 return RTAS_UNKNOWN_SERVICE
;
565 * Various drivers attempt token lookups on non-RTAS
569 return RTAS_UNKNOWN_SERVICE
;
571 return rtas_function_table
[index
].token
;
573 EXPORT_SYMBOL_GPL(rtas_function_token
);
575 static int rtas_function_cmp(const void *a
, const void *b
)
577 const struct rtas_function
*f1
= a
;
578 const struct rtas_function
*f2
= b
;
580 return strcmp(f1
->name
, f2
->name
);
584 * Boot-time initialization of the function table needs the lookup to
585 * return a non-const-qualified object. Use rtas_name_to_function()
586 * in all other contexts.
588 static struct rtas_function
*__rtas_name_to_function(const char *name
)
590 const struct rtas_function key
= {
593 struct rtas_function
*found
;
595 found
= bsearch(&key
, rtas_function_table
, ARRAY_SIZE(rtas_function_table
),
596 sizeof(rtas_function_table
[0]), rtas_function_cmp
);
601 static const struct rtas_function
*rtas_name_to_function(const char *name
)
603 return __rtas_name_to_function(name
);
606 static DEFINE_XARRAY(rtas_token_to_function_xarray
);
608 static int __init
rtas_token_to_function_xarray_init(void)
610 const struct rtas_function
*func
;
613 for_each_rtas_function(func
) {
614 const s32 token
= func
->token
;
616 if (token
== RTAS_UNKNOWN_SERVICE
)
619 err
= xa_err(xa_store(&rtas_token_to_function_xarray
,
620 token
, (void *)func
, GFP_KERNEL
));
627 arch_initcall(rtas_token_to_function_xarray_init
);
630 * For use by sys_rtas(), where the token value is provided by user
631 * space and we don't want to warn on failed lookups.
633 static const struct rtas_function
*rtas_token_to_function_untrusted(s32 token
)
635 return xa_load(&rtas_token_to_function_xarray
, token
);
639 * Reverse lookup for deriving the function descriptor from a
640 * known-good token value in contexts where the former is not already
641 * available. @token must be valid, e.g. derived from the result of a
642 * prior lookup against the function table.
644 static const struct rtas_function
*rtas_token_to_function(s32 token
)
646 const struct rtas_function
*func
;
648 if (WARN_ONCE(token
< 0, "invalid token %d", token
))
651 func
= rtas_token_to_function_untrusted(token
);
655 * Fall back to linear scan in case the reverse mapping hasn't
656 * been initialized yet.
658 if (xa_empty(&rtas_token_to_function_xarray
)) {
659 for_each_rtas_function(func
) {
660 if (func
->token
== token
)
665 WARN_ONCE(true, "unexpected failed lookup for token %d", token
);
669 /* This is here deliberately so it's only used in this file */
670 void enter_rtas(unsigned long);
672 static void __do_enter_rtas(struct rtas_args
*args
)
674 enter_rtas(__pa(args
));
675 srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
678 static void __do_enter_rtas_trace(struct rtas_args
*args
)
680 const struct rtas_function
*func
= rtas_token_to_function(be32_to_cpu(args
->token
));
683 * If there is a per-function lock, it must be held by the
687 lockdep_assert_held(func
->lock
);
689 if (args
== &rtas_args
)
690 lockdep_assert_held(&rtas_lock
);
692 trace_rtas_input(args
, func
->name
);
693 trace_rtas_ll_entry(args
);
695 __do_enter_rtas(args
);
697 trace_rtas_ll_exit(args
);
698 trace_rtas_output(args
, func
->name
);
701 static void do_enter_rtas(struct rtas_args
*args
)
703 const unsigned long msr
= mfmsr();
705 * Situations where we want to skip any active tracepoints for
708 * 1. The last code executed on an offline CPU as it stops,
709 * i.e. we're about to call stop-self. The tracepoints'
710 * function name lookup uses xarray, which uses RCU, which
711 * isn't valid to call on an offline CPU. Any events
712 * emitted on an offline CPU will be discarded anyway.
714 * 2. In real mode, as when invoking ibm,nmi-interlock from
715 * the pseries MCE handler. We cannot count on trace
716 * buffers or the entries in rtas_token_to_function_xarray
717 * to be contained in the RMO.
719 const unsigned long mask
= MSR_IR
| MSR_DR
;
720 const bool can_trace
= likely(cpu_online(raw_smp_processor_id()) &&
721 (msr
& mask
) == mask
);
723 * Make sure MSR[RI] is currently enabled as it will be forced later
726 BUG_ON(!(msr
& MSR_RI
));
728 BUG_ON(!irqs_disabled());
730 hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */
733 __do_enter_rtas_trace(args
);
735 __do_enter_rtas(args
);
740 DEFINE_SPINLOCK(rtas_data_buf_lock
);
741 EXPORT_SYMBOL_GPL(rtas_data_buf_lock
);
743 char rtas_data_buf
[RTAS_DATA_BUF_SIZE
] __aligned(SZ_4K
);
744 EXPORT_SYMBOL_GPL(rtas_data_buf
);
746 unsigned long rtas_rmo_buf
;
749 * If non-NULL, this gets called when the kernel terminates.
750 * This is done like this so rtas_flash can be a module.
752 void (*rtas_flash_term_hook
)(int);
753 EXPORT_SYMBOL_GPL(rtas_flash_term_hook
);
756 * call_rtas_display_status and call_rtas_display_status_delay
757 * are designed only for very early low-level debugging, which
758 * is why the token is hard-coded to 10.
760 static void call_rtas_display_status(unsigned char c
)
767 raw_spin_lock_irqsave(&rtas_lock
, flags
);
768 rtas_call_unlocked(&rtas_args
, 10, 1, 1, NULL
, c
);
769 raw_spin_unlock_irqrestore(&rtas_lock
, flags
);
772 static void call_rtas_display_status_delay(char c
)
774 static int pending_newline
= 0; /* did last write end with unprinted newline? */
775 static int width
= 16;
779 call_rtas_display_status(' ');
784 if (pending_newline
) {
785 call_rtas_display_status('\r');
786 call_rtas_display_status('\n');
790 call_rtas_display_status(c
);
796 void __init
udbg_init_rtas_panel(void)
798 udbg_putc
= call_rtas_display_status_delay
;
801 #ifdef CONFIG_UDBG_RTAS_CONSOLE
803 /* If you think you're dying before early_init_dt_scan_rtas() does its
804 * work, you can hard code the token values for your firmware here and
805 * hardcode rtas.base/entry etc.
807 static unsigned int rtas_putchar_token
= RTAS_UNKNOWN_SERVICE
;
808 static unsigned int rtas_getchar_token
= RTAS_UNKNOWN_SERVICE
;
810 static void udbg_rtascon_putc(char c
)
817 /* Add CRs before LFs */
819 udbg_rtascon_putc('\r');
821 /* if there is more than one character to be displayed, wait a bit */
822 for (tries
= 0; tries
< 16; tries
++) {
823 if (rtas_call(rtas_putchar_token
, 1, 1, NULL
, c
) == 0)
829 static int udbg_rtascon_getc_poll(void)
836 if (rtas_call(rtas_getchar_token
, 0, 2, &c
))
842 static int udbg_rtascon_getc(void)
846 while ((c
= udbg_rtascon_getc_poll()) == -1)
853 void __init
udbg_init_rtas_console(void)
855 udbg_putc
= udbg_rtascon_putc
;
856 udbg_getc
= udbg_rtascon_getc
;
857 udbg_getc_poll
= udbg_rtascon_getc_poll
;
859 #endif /* CONFIG_UDBG_RTAS_CONSOLE */
861 void rtas_progress(char *s
, unsigned short hex
)
863 struct device_node
*root
;
867 static int display_character
, set_indicator
;
868 static int display_width
, display_lines
, form_feed
;
869 static const int *row_width
;
870 static DEFINE_SPINLOCK(progress_lock
);
871 static int current_line
;
872 static int pending_newline
= 0; /* did last write end with unprinted newline? */
877 if (display_width
== 0) {
878 display_width
= 0x10;
879 if ((root
= of_find_node_by_path("/rtas"))) {
880 if ((p
= of_get_property(root
,
881 "ibm,display-line-length", NULL
)))
882 display_width
= be32_to_cpu(*p
);
883 if ((p
= of_get_property(root
,
884 "ibm,form-feed", NULL
)))
885 form_feed
= be32_to_cpu(*p
);
886 if ((p
= of_get_property(root
,
887 "ibm,display-number-of-lines", NULL
)))
888 display_lines
= be32_to_cpu(*p
);
889 row_width
= of_get_property(root
,
890 "ibm,display-truncation-length", NULL
);
893 display_character
= rtas_function_token(RTAS_FN_DISPLAY_CHARACTER
);
894 set_indicator
= rtas_function_token(RTAS_FN_SET_INDICATOR
);
897 if (display_character
== RTAS_UNKNOWN_SERVICE
) {
898 /* use hex display if available */
899 if (set_indicator
!= RTAS_UNKNOWN_SERVICE
)
900 rtas_call(set_indicator
, 3, 1, NULL
, 6, 0, hex
);
904 spin_lock(&progress_lock
);
907 * Last write ended with newline, but we didn't print it since
908 * it would just clear the bottom line of output. Print it now
911 * If no newline is pending and form feed is supported, clear the
912 * display with a form feed; otherwise, print a CR to start output
913 * at the beginning of the line.
915 if (pending_newline
) {
916 rtas_call(display_character
, 1, 1, NULL
, '\r');
917 rtas_call(display_character
, 1, 1, NULL
, '\n');
922 rtas_call(display_character
, 1, 1, NULL
,
925 rtas_call(display_character
, 1, 1, NULL
, '\r');
929 width
= row_width
[current_line
];
931 width
= display_width
;
934 if (*os
== '\n' || *os
== '\r') {
935 /* If newline is the last character, save it
936 * until next call to avoid bumping up the
939 if (*os
== '\n' && !os
[1]) {
942 if (current_line
> display_lines
-1)
943 current_line
= display_lines
-1;
944 spin_unlock(&progress_lock
);
948 /* RTAS wants CR-LF, not just LF */
951 rtas_call(display_character
, 1, 1, NULL
, '\r');
952 rtas_call(display_character
, 1, 1, NULL
, '\n');
954 /* CR might be used to re-draw a line, so we'll
955 * leave it alone and not add LF.
957 rtas_call(display_character
, 1, 1, NULL
, *os
);
961 width
= row_width
[current_line
];
963 width
= display_width
;
966 rtas_call(display_character
, 1, 1, NULL
, *os
);
971 /* if we overwrite the screen length */
973 while ((*os
!= 0) && (*os
!= '\n') && (*os
!= '\r'))
977 spin_unlock(&progress_lock
);
979 EXPORT_SYMBOL_GPL(rtas_progress
); /* needed by rtas_flash module */
981 int rtas_token(const char *service
)
983 const struct rtas_function
*func
;
986 if (rtas
.dev
== NULL
)
987 return RTAS_UNKNOWN_SERVICE
;
989 func
= rtas_name_to_function(service
);
993 * The caller is looking up a name that is not known to be an
994 * RTAS function. Either it's a function that needs to be
995 * added to the table, or they're misusing rtas_token() to
996 * access non-function properties of the /rtas node. Warn and
997 * fall back to the legacy behavior.
999 WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n",
1002 tokp
= of_get_property(rtas
.dev
, service
, NULL
);
1003 return tokp
? be32_to_cpu(*tokp
) : RTAS_UNKNOWN_SERVICE
;
1005 EXPORT_SYMBOL_GPL(rtas_token
);
1007 #ifdef CONFIG_RTAS_ERROR_LOGGING
1009 static u32 rtas_error_log_max __ro_after_init
= RTAS_ERROR_LOG_MAX
;
1012 * Return the firmware-specified size of the error log buffer
1013 * for all rtas calls that require an error buffer argument.
1014 * This includes 'check-exception' and 'rtas-last-error'.
1016 int rtas_get_error_log_max(void)
1018 return rtas_error_log_max
;
1021 static void __init
init_error_log_max(void)
1023 static const char propname
[] __initconst
= "rtas-error-log-max";
1026 if (of_property_read_u32(rtas
.dev
, propname
, &max
)) {
1027 pr_warn("%s not found, using default of %u\n",
1028 propname
, RTAS_ERROR_LOG_MAX
);
1029 max
= RTAS_ERROR_LOG_MAX
;
1032 if (max
> RTAS_ERROR_LOG_MAX
) {
1033 pr_warn("%s = %u, clamping max error log size to %u\n",
1034 propname
, max
, RTAS_ERROR_LOG_MAX
);
1035 max
= RTAS_ERROR_LOG_MAX
;
1038 rtas_error_log_max
= max
;
1042 static char rtas_err_buf
[RTAS_ERROR_LOG_MAX
];
1044 /** Return a copy of the detailed error text associated with the
1045 * most recent failed call to rtas. Because the error text
1046 * might go stale if there are any other intervening rtas calls,
1047 * this routine must be called atomically with whatever produced
1048 * the error (i.e. with rtas_lock still held from the previous call).
1050 static char *__fetch_rtas_last_error(char *altbuf
)
1052 const s32 token
= rtas_function_token(RTAS_FN_RTAS_LAST_ERROR
);
1053 struct rtas_args err_args
, save_args
;
1057 lockdep_assert_held(&rtas_lock
);
1062 bufsz
= rtas_get_error_log_max();
1064 err_args
.token
= cpu_to_be32(token
);
1065 err_args
.nargs
= cpu_to_be32(2);
1066 err_args
.nret
= cpu_to_be32(1);
1067 err_args
.args
[0] = cpu_to_be32(__pa(rtas_err_buf
));
1068 err_args
.args
[1] = cpu_to_be32(bufsz
);
1069 err_args
.args
[2] = 0;
1071 save_args
= rtas_args
;
1072 rtas_args
= err_args
;
1074 do_enter_rtas(&rtas_args
);
1076 err_args
= rtas_args
;
1077 rtas_args
= save_args
;
1079 /* Log the error in the unlikely case that there was one. */
1080 if (unlikely(err_args
.args
[2] == 0)) {
1085 if (slab_is_available())
1086 buf
= kmalloc(RTAS_ERROR_LOG_MAX
, GFP_ATOMIC
);
1089 memmove(buf
, rtas_err_buf
, RTAS_ERROR_LOG_MAX
);
1095 #define get_errorlog_buffer() kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL)
1097 #else /* CONFIG_RTAS_ERROR_LOGGING */
1098 #define __fetch_rtas_last_error(x) NULL
1099 #define get_errorlog_buffer() NULL
1100 static void __init
init_error_log_max(void) {}
1105 va_rtas_call_unlocked(struct rtas_args
*args
, int token
, int nargs
, int nret
,
1110 args
->token
= cpu_to_be32(token
);
1111 args
->nargs
= cpu_to_be32(nargs
);
1112 args
->nret
= cpu_to_be32(nret
);
1113 args
->rets
= &(args
->args
[nargs
]);
1115 for (i
= 0; i
< nargs
; ++i
)
1116 args
->args
[i
] = cpu_to_be32(va_arg(list
, __u32
));
1118 for (i
= 0; i
< nret
; ++i
)
1121 do_enter_rtas(args
);
1125 * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
1126 * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
1128 * @token: Identifies the function being invoked.
1129 * @nargs: Number of input parameters. Does not include token.
1130 * @nret: Number of output parameters, including the call status.
1131 * @....: List of @nargs input parameters.
1133 * Invokes the RTAS function indicated by @token, which the caller
1134 * should obtain via rtas_function_token().
1136 * This function is similar to rtas_call(), but must be used with a
1137 * limited set of RTAS calls specifically exempted from the general
1138 * requirement that only one RTAS call may be in progress at any
1139 * time. Examples include stop-self and ibm,nmi-interlock.
1141 void rtas_call_unlocked(struct rtas_args
*args
, int token
, int nargs
, int nret
, ...)
1145 va_start(list
, nret
);
1146 va_rtas_call_unlocked(args
, token
, nargs
, nret
, list
);
1150 static bool token_is_restricted_errinjct(s32 token
)
1152 return token
== rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT
) ||
1153 token
== rtas_function_token(RTAS_FN_IBM_ERRINJCT
);
1157 * rtas_call() - Invoke an RTAS firmware function.
1158 * @token: Identifies the function being invoked.
1159 * @nargs: Number of input parameters. Does not include token.
1160 * @nret: Number of output parameters, including the call status.
1161 * @outputs: Array of @nret output words.
1162 * @....: List of @nargs input parameters.
1164 * Invokes the RTAS function indicated by @token, which the caller
1165 * should obtain via rtas_function_token().
1167 * The @nargs and @nret arguments must match the number of input and
1168 * output parameters specified for the RTAS function.
1170 * rtas_call() returns RTAS status codes, not conventional Linux errno
1171 * values. Callers must translate any failure to an appropriate errno
1172 * in syscall context. Most callers of RTAS functions that can return
1173 * -2 or 990x should use rtas_busy_delay() to correctly handle those
1174 * statuses before calling again.
1176 * The return value descriptions are adapted from 7.2.8 [RTAS] Return
1177 * Codes of the PAPR and CHRP specifications.
1179 * Context: Process context preferably, interrupt context if
1180 * necessary. Acquires an internal spinlock and may perform
1181 * GFP_ATOMIC slab allocation in error path. Unsafe for NMI
1184 * * 0 - RTAS function call succeeded.
1185 * * -1 - RTAS function encountered a hardware or
1186 * platform error, or the token is invalid,
1187 * or the function is restricted by kernel policy.
1188 * * -2 - Specs say "A necessary hardware device was busy,
1189 * and the requested function could not be
1190 * performed. The operation should be retried at
1191 * a later time." This is misleading, at least with
1192 * respect to current RTAS implementations. What it
1193 * usually means in practice is that the function
1194 * could not be completed while meeting RTAS's
1195 * deadline for returning control to the OS (250us
1196 * for PAPR/PowerVM, typically), but the call may be
1197 * immediately reattempted to resume work on it.
1198 * * -3 - Parameter error.
1199 * * -7 - Unexpected state change.
1200 * * 9000...9899 - Vendor-specific success codes.
1201 * * 9900...9905 - Advisory extended delay. Caller should try
1202 * again after ~10^x ms has elapsed, where x is
1203 * the last digit of the status [0-5]. Again going
1204 * beyond the PAPR text, 990x on PowerVM indicates
1205 * contention for RTAS-internal resources. Other
1206 * RTAS call sequences in progress should be
1207 * allowed to complete before reattempting the
1209 * * -9000 - Multi-level isolation error.
1210 * * -9999...-9004 - Vendor-specific error codes.
1211 * * Additional negative values - Function-specific error.
1212 * * Additional positive values - Function-specific success.
1214 int rtas_call(int token
, int nargs
, int nret
, int *outputs
, ...)
1216 struct pin_cookie cookie
;
1219 unsigned long flags
;
1220 struct rtas_args
*args
;
1221 char *buff_copy
= NULL
;
1224 if (!rtas
.entry
|| token
== RTAS_UNKNOWN_SERVICE
)
1227 if (token_is_restricted_errinjct(token
)) {
1229 * It would be nicer to not discard the error value
1230 * from security_locked_down(), but callers expect an
1231 * RTAS status, not an errno.
1233 if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION
))
1237 if ((mfmsr() & (MSR_IR
|MSR_DR
)) != (MSR_IR
|MSR_DR
)) {
1242 raw_spin_lock_irqsave(&rtas_lock
, flags
);
1243 cookie
= lockdep_pin_lock(&rtas_lock
);
1245 /* We use the global rtas args buffer */
1248 va_start(list
, outputs
);
1249 va_rtas_call_unlocked(args
, token
, nargs
, nret
, list
);
1252 /* A -1 return code indicates that the last command couldn't
1253 be completed due to a hardware error. */
1254 if (be32_to_cpu(args
->rets
[0]) == -1)
1255 buff_copy
= __fetch_rtas_last_error(NULL
);
1257 if (nret
> 1 && outputs
!= NULL
)
1258 for (i
= 0; i
< nret
-1; ++i
)
1259 outputs
[i
] = be32_to_cpu(args
->rets
[i
+ 1]);
1260 ret
= (nret
> 0) ? be32_to_cpu(args
->rets
[0]) : 0;
1262 lockdep_unpin_lock(&rtas_lock
, cookie
);
1263 raw_spin_unlock_irqrestore(&rtas_lock
, flags
);
1266 log_error(buff_copy
, ERR_TYPE_RTAS_LOG
, 0);
1267 if (slab_is_available())
1272 EXPORT_SYMBOL_GPL(rtas_call
);
1275 * rtas_busy_delay_time() - From an RTAS status value, calculate the
1276 * suggested delay time in milliseconds.
1278 * @status: a value returned from rtas_call() or similar APIs which return
1279 * the status of a RTAS function call.
1281 * Context: Any context.
1284 * * 100000 - If @status is 9905.
1285 * * 10000 - If @status is 9904.
1286 * * 1000 - If @status is 9903.
1287 * * 100 - If @status is 9902.
1288 * * 10 - If @status is 9901.
1289 * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but
1290 * some callers depend on this behavior, and the worst outcome
1291 * is that they will delay for longer than necessary.
1292 * * 0 - If @status is not a busy or extended delay value.
1294 unsigned int rtas_busy_delay_time(int status
)
1297 unsigned int ms
= 0;
1299 if (status
== RTAS_BUSY
) {
1301 } else if (status
>= RTAS_EXTENDED_DELAY_MIN
&&
1302 status
<= RTAS_EXTENDED_DELAY_MAX
) {
1303 order
= status
- RTAS_EXTENDED_DELAY_MIN
;
1304 for (ms
= 1; order
> 0; order
--)
1312 * Early boot fallback for rtas_busy_delay().
1314 static bool __init
rtas_busy_delay_early(int status
)
1316 static size_t successive_ext_delays __initdata
;
1320 case RTAS_EXTENDED_DELAY_MIN
...RTAS_EXTENDED_DELAY_MAX
:
1322 * In the unlikely case that we receive an extended
1323 * delay status in early boot, the OS is probably not
1324 * the cause, and there's nothing we can do to clear
1325 * the condition. Best we can do is delay for a bit
1326 * and hope it's transient. Lie to the caller if it
1327 * seems like we're stuck in a retry loop.
1331 successive_ext_delays
+= 1;
1332 if (successive_ext_delays
> 1000) {
1333 pr_err("too many extended delays, giving up\n");
1336 successive_ext_delays
= 0;
1341 successive_ext_delays
= 0;
1345 successive_ext_delays
= 0;
1353 * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
1355 * @status: a value returned from rtas_call() or similar APIs which return
1356 * the status of a RTAS function call.
1358 * Context: Process context. May sleep or schedule.
1361 * * true - @status is RTAS_BUSY or an extended delay hint. The
1362 * caller may assume that the CPU has been yielded if necessary,
1363 * and that an appropriate delay for @status has elapsed.
1364 * Generally the caller should reattempt the RTAS call which
1367 * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
1368 * caller is responsible for handling @status.
1370 bool __ref
rtas_busy_delay(int status
)
1376 * Can't do timed sleeps before timekeeping is up.
1378 if (system_state
< SYSTEM_SCHEDULING
)
1379 return rtas_busy_delay_early(status
);
1382 case RTAS_EXTENDED_DELAY_MIN
...RTAS_EXTENDED_DELAY_MAX
:
1384 ms
= rtas_busy_delay_time(status
);
1386 * The extended delay hint can be as high as 100 seconds.
1387 * Surely any function returning such a status is either
1388 * buggy or isn't going to be significantly slowed by us
1389 * polling at 1HZ. Clamp the sleep time to one second.
1391 ms
= clamp(ms
, 1U, 1000U);
1393 * The delay hint is an order-of-magnitude suggestion, not a
1394 * minimum. It is fine, possibly even advantageous, for us to
1395 * pause for less time than hinted. To make sure pause time will
1396 * not be way longer than requested independent of HZ
1397 * configuration, use fsleep(). See fsleep() for details of
1398 * used sleeping functions.
1405 * We should call again immediately if there's no other
1413 * Not a busy or extended delay status; the caller should
1414 * handle @status itself. Ensure we warn on misuses in
1415 * atomic context regardless.
1423 EXPORT_SYMBOL_GPL(rtas_busy_delay
);
1425 int rtas_error_rc(int rtas_rc
)
1430 case RTAS_HARDWARE_ERROR
: /* Hardware Error */
1433 case RTAS_INVALID_PARAMETER
: /* Bad indicator/domain/etc */
1436 case -9000: /* Isolation error */
1439 case -9001: /* Outstanding TCE/PTE */
1442 case -9002: /* No usable slot */
1446 pr_err("%s: unexpected error %d\n", __func__
, rtas_rc
);
1452 EXPORT_SYMBOL_GPL(rtas_error_rc
);
1454 int rtas_get_power_level(int powerdomain
, int *level
)
1456 int token
= rtas_function_token(RTAS_FN_GET_POWER_LEVEL
);
1459 if (token
== RTAS_UNKNOWN_SERVICE
)
1462 while ((rc
= rtas_call(token
, 1, 2, level
, powerdomain
)) == RTAS_BUSY
)
1466 return rtas_error_rc(rc
);
1469 EXPORT_SYMBOL_GPL(rtas_get_power_level
);
1471 int rtas_set_power_level(int powerdomain
, int level
, int *setlevel
)
1473 int token
= rtas_function_token(RTAS_FN_SET_POWER_LEVEL
);
1476 if (token
== RTAS_UNKNOWN_SERVICE
)
1480 rc
= rtas_call(token
, 2, 2, setlevel
, powerdomain
, level
);
1481 } while (rtas_busy_delay(rc
));
1484 return rtas_error_rc(rc
);
1487 EXPORT_SYMBOL_GPL(rtas_set_power_level
);
1489 int rtas_get_sensor(int sensor
, int index
, int *state
)
1491 int token
= rtas_function_token(RTAS_FN_GET_SENSOR_STATE
);
1494 if (token
== RTAS_UNKNOWN_SERVICE
)
1498 rc
= rtas_call(token
, 2, 2, state
, sensor
, index
);
1499 } while (rtas_busy_delay(rc
));
1502 return rtas_error_rc(rc
);
1505 EXPORT_SYMBOL_GPL(rtas_get_sensor
);
1507 int rtas_get_sensor_fast(int sensor
, int index
, int *state
)
1509 int token
= rtas_function_token(RTAS_FN_GET_SENSOR_STATE
);
1512 if (token
== RTAS_UNKNOWN_SERVICE
)
1515 rc
= rtas_call(token
, 2, 2, state
, sensor
, index
);
1516 WARN_ON(rc
== RTAS_BUSY
|| (rc
>= RTAS_EXTENDED_DELAY_MIN
&&
1517 rc
<= RTAS_EXTENDED_DELAY_MAX
));
1520 return rtas_error_rc(rc
);
1524 bool rtas_indicator_present(int token
, int *maxindex
)
1526 int proplen
, count
, i
;
1527 const struct indicator_elem
{
1532 indicators
= of_get_property(rtas
.dev
, "rtas-indicators", &proplen
);
1536 count
= proplen
/ sizeof(struct indicator_elem
);
1538 for (i
= 0; i
< count
; i
++) {
1539 if (__be32_to_cpu(indicators
[i
].token
) != token
)
1542 *maxindex
= __be32_to_cpu(indicators
[i
].maxindex
);
1549 int rtas_set_indicator(int indicator
, int index
, int new_value
)
1551 int token
= rtas_function_token(RTAS_FN_SET_INDICATOR
);
1554 if (token
== RTAS_UNKNOWN_SERVICE
)
1558 rc
= rtas_call(token
, 3, 1, NULL
, indicator
, index
, new_value
);
1559 } while (rtas_busy_delay(rc
));
1562 return rtas_error_rc(rc
);
1565 EXPORT_SYMBOL_GPL(rtas_set_indicator
);
1568 * Ignoring RTAS extended delay
1570 int rtas_set_indicator_fast(int indicator
, int index
, int new_value
)
1572 int token
= rtas_function_token(RTAS_FN_SET_INDICATOR
);
1575 if (token
== RTAS_UNKNOWN_SERVICE
)
1578 rc
= rtas_call(token
, 3, 1, NULL
, indicator
, index
, new_value
);
1580 WARN_ON(rc
== RTAS_BUSY
|| (rc
>= RTAS_EXTENDED_DELAY_MIN
&&
1581 rc
<= RTAS_EXTENDED_DELAY_MAX
));
1584 return rtas_error_rc(rc
);
1590 * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR.
1592 * @fw_status: RTAS call status will be placed here if not NULL.
1594 * rtas_ibm_suspend_me() should be called only on a CPU which has
1595 * received H_CONTINUE from the H_JOIN hcall. All other active CPUs
1596 * should be waiting to return from H_JOIN.
1598 * rtas_ibm_suspend_me() may suspend execution of the OS
1599 * indefinitely. Callers should take appropriate measures upon return, such as
1600 * resetting watchdog facilities.
1602 * Callers may choose to retry this call if @fw_status is
1603 * %RTAS_THREADS_ACTIVE.
1606 * 0 - The partition has resumed from suspend, possibly after
1607 * migration to a different host.
1608 * -ECANCELED - The operation was aborted.
1609 * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call.
1610 * -EBUSY - Some other condition prevented the suspend from succeeding.
1611 * -EIO - Hardware/platform error.
1613 int rtas_ibm_suspend_me(int *fw_status
)
1615 int token
= rtas_function_token(RTAS_FN_IBM_SUSPEND_ME
);
1619 fwrc
= rtas_call(token
, 0, 1, NULL
);
1625 case RTAS_SUSPEND_ABORTED
:
1628 case RTAS_THREADS_ACTIVE
:
1631 case RTAS_NOT_SUSPENDABLE
:
1632 case RTAS_OUTSTANDING_COPROC
:
1647 void __noreturn
rtas_restart(char *cmd
)
1649 if (rtas_flash_term_hook
)
1650 rtas_flash_term_hook(SYS_RESTART
);
1651 pr_emerg("system-reboot returned %d\n",
1652 rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT
), 0, 1, NULL
));
1656 void rtas_power_off(void)
1658 if (rtas_flash_term_hook
)
1659 rtas_flash_term_hook(SYS_POWER_OFF
);
1660 /* allow power on only with power button press */
1661 pr_emerg("power-off returned %d\n",
1662 rtas_call(rtas_function_token(RTAS_FN_POWER_OFF
), 2, 1, NULL
, -1, -1));
1666 void __noreturn
rtas_halt(void)
1668 if (rtas_flash_term_hook
)
1669 rtas_flash_term_hook(SYS_HALT
);
1670 /* allow power on only with power button press */
1671 pr_emerg("power-off returned %d\n",
1672 rtas_call(rtas_function_token(RTAS_FN_POWER_OFF
), 2, 1, NULL
, -1, -1));
1676 /* Must be in the RMO region, so we place it here */
1677 static char rtas_os_term_buf
[2048];
1678 static bool ibm_extended_os_term
;
1680 void rtas_os_term(char *str
)
1682 s32 token
= rtas_function_token(RTAS_FN_IBM_OS_TERM
);
1683 static struct rtas_args args
;
1687 * Firmware with the ibm,extended-os-term property is guaranteed
1688 * to always return from an ibm,os-term call. Earlier versions without
1689 * this property may terminate the partition which we want to avoid
1690 * since it interferes with panic_timeout.
1693 if (token
== RTAS_UNKNOWN_SERVICE
|| !ibm_extended_os_term
)
1696 snprintf(rtas_os_term_buf
, 2048, "OS panic: %s", str
);
1699 * Keep calling as long as RTAS returns a "try again" status,
1700 * but don't use rtas_busy_delay(), which potentially
1704 rtas_call_unlocked(&args
, token
, 1, 1, NULL
, __pa(rtas_os_term_buf
));
1705 status
= be32_to_cpu(args
.rets
[0]);
1706 } while (rtas_busy_delay_time(status
));
1709 pr_emerg("ibm,os-term call failed %d\n", status
);
1713 * rtas_activate_firmware() - Activate a new version of firmware.
1715 * Context: This function may sleep.
1717 * Activate a new version of partition firmware. The OS must call this
1718 * after resuming from a partition hibernation or migration in order
1719 * to maintain the ability to perform live firmware updates. It's not
1720 * catastrophic for this method to be absent or to fail; just log the
1721 * condition in that case.
1723 void rtas_activate_firmware(void)
1725 int token
= rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE
);
1728 if (token
== RTAS_UNKNOWN_SERVICE
) {
1729 pr_notice("ibm,activate-firmware method unavailable\n");
1733 mutex_lock(&rtas_ibm_activate_firmware_lock
);
1736 fwrc
= rtas_call(token
, 0, 1, NULL
);
1737 } while (rtas_busy_delay(fwrc
));
1739 mutex_unlock(&rtas_ibm_activate_firmware_lock
);
1742 pr_err("ibm,activate-firmware failed (%i)\n", fwrc
);
1746 * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
1747 * extended event log.
1748 * @log: RTAS error/event log
1749 * @section_id: two character section identifier
1751 * Return: A pointer to the specified errorlog or NULL if not found.
1753 noinstr
struct pseries_errorlog
*get_pseries_errorlog(struct rtas_error_log
*log
,
1754 uint16_t section_id
)
1756 struct rtas_ext_event_log_v6
*ext_log
=
1757 (struct rtas_ext_event_log_v6
*)log
->buffer
;
1758 struct pseries_errorlog
*sect
;
1759 unsigned char *p
, *log_end
;
1760 uint32_t ext_log_length
= rtas_error_extended_log_length(log
);
1761 uint8_t log_format
= rtas_ext_event_log_format(ext_log
);
1762 uint32_t company_id
= rtas_ext_event_company_id(ext_log
);
1764 /* Check that we understand the format */
1765 if (ext_log_length
< sizeof(struct rtas_ext_event_log_v6
) ||
1766 log_format
!= RTAS_V6EXT_LOG_FORMAT_EVENT_LOG
||
1767 company_id
!= RTAS_V6EXT_COMPANY_ID_IBM
)
1770 log_end
= log
->buffer
+ ext_log_length
;
1771 p
= ext_log
->vendor_log
;
1773 while (p
< log_end
) {
1774 sect
= (struct pseries_errorlog
*)p
;
1775 if (pseries_errorlog_id(sect
) == section_id
)
1777 p
+= pseries_errorlog_length(sect
);
1784 * The sys_rtas syscall, as originally designed, allows root to pass
1785 * arbitrary physical addresses to RTAS calls. A number of RTAS calls
1786 * can be abused to write to arbitrary memory and do other things that
1787 * are potentially harmful to system integrity, and thus should only
1788 * be used inside the kernel and not exposed to userspace.
1790 * All known legitimate users of the sys_rtas syscall will only ever
1791 * pass addresses that fall within the RMO buffer, and use a known
1792 * subset of RTAS calls.
1794 * Accordingly, we filter RTAS requests to check that the call is
1795 * permitted, and that provided pointers fall within the RMO buffer.
1796 * If a function is allowed to be invoked via the syscall, then its
1797 * entry in the rtas_functions table points to a rtas_filter that
1798 * describes its constraints, with the indexes of the parameters which
1799 * are expected to contain addresses and sizes of buffers allocated
1800 * inside the RMO buffer.
1803 static bool in_rmo_buf(u32 base
, u32 end
)
1805 return base
>= rtas_rmo_buf
&&
1806 base
< (rtas_rmo_buf
+ RTAS_USER_REGION_SIZE
) &&
1808 end
>= rtas_rmo_buf
&&
1809 end
< (rtas_rmo_buf
+ RTAS_USER_REGION_SIZE
);
1812 static bool block_rtas_call(const struct rtas_function
*func
, int nargs
,
1813 struct rtas_args
*args
)
1815 const struct rtas_filter
*f
;
1816 const bool is_platform_dump
=
1817 func
== &rtas_function_table
[RTAS_FNIDX__IBM_PLATFORM_DUMP
];
1818 const bool is_config_conn
=
1819 func
== &rtas_function_table
[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR
];
1820 u32 base
, size
, end
;
1823 * Only functions with filters attached are allowed.
1829 * And some functions aren't allowed on LE.
1831 if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN
) && func
->banned_for_syscall_on_le
)
1834 if (f
->buf_idx1
!= -1) {
1835 base
= be32_to_cpu(args
->args
[f
->buf_idx1
]);
1836 if (f
->size_idx1
!= -1)
1837 size
= be32_to_cpu(args
->args
[f
->size_idx1
]);
1838 else if (f
->fixed_size
)
1839 size
= f
->fixed_size
;
1843 end
= base
+ size
- 1;
1846 * Special case for ibm,platform-dump - NULL buffer
1847 * address is used to indicate end of dump processing
1849 if (is_platform_dump
&& base
== 0)
1852 if (!in_rmo_buf(base
, end
))
1856 if (f
->buf_idx2
!= -1) {
1857 base
= be32_to_cpu(args
->args
[f
->buf_idx2
]);
1858 if (f
->size_idx2
!= -1)
1859 size
= be32_to_cpu(args
->args
[f
->size_idx2
]);
1860 else if (f
->fixed_size
)
1861 size
= f
->fixed_size
;
1864 end
= base
+ size
- 1;
1867 * Special case for ibm,configure-connector where the
1870 if (is_config_conn
&& base
== 0)
1873 if (!in_rmo_buf(base
, end
))
1879 pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
1880 pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
1881 func
->name
, nargs
, current
->comm
);
1885 /* We assume to be passed big endian arguments */
1886 SYSCALL_DEFINE1(rtas
, struct rtas_args __user
*, uargs
)
1888 const struct rtas_function
*func
;
1889 struct pin_cookie cookie
;
1890 struct rtas_args args
;
1891 unsigned long flags
;
1892 char *buff_copy
, *errbuf
= NULL
;
1893 int nargs
, nret
, token
;
1895 if (!capable(CAP_SYS_ADMIN
))
1901 if (copy_from_user(&args
, uargs
, 3 * sizeof(u32
)) != 0)
1904 nargs
= be32_to_cpu(args
.nargs
);
1905 nret
= be32_to_cpu(args
.nret
);
1906 token
= be32_to_cpu(args
.token
);
1908 if (nargs
>= ARRAY_SIZE(args
.args
)
1909 || nret
> ARRAY_SIZE(args
.args
)
1910 || nargs
+ nret
> ARRAY_SIZE(args
.args
))
1913 nargs
= array_index_nospec(nargs
, ARRAY_SIZE(args
.args
));
1914 nret
= array_index_nospec(nret
, ARRAY_SIZE(args
.args
) - nargs
);
1917 if (copy_from_user(args
.args
, uargs
->args
,
1918 nargs
* sizeof(rtas_arg_t
)) != 0)
1922 * If this token doesn't correspond to a function the kernel
1923 * understands, you're not allowed to call it.
1925 func
= rtas_token_to_function_untrusted(token
);
1929 args
.rets
= &args
.args
[nargs
];
1930 memset(args
.rets
, 0, nret
* sizeof(rtas_arg_t
));
1932 if (block_rtas_call(func
, nargs
, &args
))
1935 if (token_is_restricted_errinjct(token
)) {
1938 err
= security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION
);
1943 /* Need to handle ibm,suspend_me call specially */
1944 if (token
== rtas_function_token(RTAS_FN_IBM_SUSPEND_ME
)) {
1947 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
1948 * endian, or at least the hcall within it requires it.
1951 u64 handle
= ((u64
)be32_to_cpu(args
.args
[0]) << 32)
1952 | be32_to_cpu(args
.args
[1]);
1953 rc
= rtas_syscall_dispatch_ibm_suspend_me(handle
);
1955 args
.rets
[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE
);
1956 else if (rc
== -EIO
)
1957 args
.rets
[0] = cpu_to_be32(-1);
1963 buff_copy
= get_errorlog_buffer();
1966 * If this function has a mutex assigned to it, we must
1967 * acquire it to avoid interleaving with any kernel-based uses
1968 * of the same function. Kernel-based sequences acquire the
1969 * appropriate mutex explicitly.
1972 mutex_lock(func
->lock
);
1974 raw_spin_lock_irqsave(&rtas_lock
, flags
);
1975 cookie
= lockdep_pin_lock(&rtas_lock
);
1978 do_enter_rtas(&rtas_args
);
1981 /* A -1 return code indicates that the last command couldn't
1982 be completed due to a hardware error. */
1983 if (be32_to_cpu(args
.rets
[0]) == -1)
1984 errbuf
= __fetch_rtas_last_error(buff_copy
);
1986 lockdep_unpin_lock(&rtas_lock
, cookie
);
1987 raw_spin_unlock_irqrestore(&rtas_lock
, flags
);
1990 mutex_unlock(func
->lock
);
1994 log_error(errbuf
, ERR_TYPE_RTAS_LOG
, 0);
1999 /* Copy out args. */
2000 if (copy_to_user(uargs
->args
+ nargs
,
2002 nret
* sizeof(rtas_arg_t
)) != 0)
2008 static void __init
rtas_function_table_init(void)
2010 struct property
*prop
;
2012 for (size_t i
= 0; i
< ARRAY_SIZE(rtas_function_table
); ++i
) {
2013 struct rtas_function
*curr
= &rtas_function_table
[i
];
2014 struct rtas_function
*prior
;
2017 curr
->token
= RTAS_UNKNOWN_SERVICE
;
2022 * Ensure table is sorted correctly for binary search
2023 * on function names.
2025 prior
= &rtas_function_table
[i
- 1];
2027 cmp
= strcmp(prior
->name
, curr
->name
);
2032 pr_err("'%s' has duplicate function table entries\n",
2035 pr_err("function table unsorted: '%s' wrongly precedes '%s'\n",
2036 prior
->name
, curr
->name
);
2040 for_each_property_of_node(rtas
.dev
, prop
) {
2041 struct rtas_function
*func
;
2043 if (prop
->length
!= sizeof(u32
))
2046 func
= __rtas_name_to_function(prop
->name
);
2050 func
->token
= be32_to_cpup((__be32
*)prop
->value
);
2052 pr_debug("function %s has token %u\n", func
->name
, func
->token
);
2057 * Call early during boot, before mem init, to retrieve the RTAS
2058 * information from the device-tree and allocate the RMO buffer for userland
2061 void __init
rtas_initialize(void)
2063 unsigned long rtas_region
= RTAS_INSTANTIATE_MAX
;
2064 u32 base
, size
, entry
;
2065 int no_base
, no_size
, no_entry
;
2067 /* Get RTAS dev node and fill up our "rtas" structure with infos
2070 rtas
.dev
= of_find_node_by_name(NULL
, "rtas");
2074 no_base
= of_property_read_u32(rtas
.dev
, "linux,rtas-base", &base
);
2075 no_size
= of_property_read_u32(rtas
.dev
, "rtas-size", &size
);
2076 if (no_base
|| no_size
) {
2077 of_node_put(rtas
.dev
);
2084 no_entry
= of_property_read_u32(rtas
.dev
, "linux,rtas-entry", &entry
);
2085 rtas
.entry
= no_entry
? rtas
.base
: entry
;
2087 init_error_log_max();
2089 /* Must be called before any function token lookups */
2090 rtas_function_table_init();
2093 * Discover this now to avoid a device tree lookup in the
2096 ibm_extended_os_term
= of_property_read_bool(rtas
.dev
, "ibm,extended-os-term");
2098 /* If RTAS was found, allocate the RMO buffer for it and look for
2099 * the stop-self token if any
2102 if (firmware_has_feature(FW_FEATURE_LPAR
))
2103 rtas_region
= min(ppc64_rma_size
, RTAS_INSTANTIATE_MAX
);
2105 rtas_rmo_buf
= memblock_phys_alloc_range(RTAS_USER_REGION_SIZE
, PAGE_SIZE
,
2108 panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
2109 PAGE_SIZE
, &rtas_region
);
2111 rtas_work_area_reserve_arena(rtas_region
);
2114 int __init
early_init_dt_scan_rtas(unsigned long node
,
2115 const char *uname
, int depth
, void *data
)
2117 const u32
*basep
, *entryp
, *sizep
;
2119 if (depth
!= 1 || strcmp(uname
, "rtas") != 0)
2122 basep
= of_get_flat_dt_prop(node
, "linux,rtas-base", NULL
);
2123 entryp
= of_get_flat_dt_prop(node
, "linux,rtas-entry", NULL
);
2124 sizep
= of_get_flat_dt_prop(node
, "rtas-size", NULL
);
2127 /* need this feature to decide the crashkernel offset */
2128 if (of_get_flat_dt_prop(node
, "ibm,hypertas-functions", NULL
))
2129 powerpc_firmware_features
|= FW_FEATURE_LPAR
;
2132 if (basep
&& entryp
&& sizep
) {
2134 rtas
.entry
= *entryp
;
2138 #ifdef CONFIG_UDBG_RTAS_CONSOLE
2139 basep
= of_get_flat_dt_prop(node
, "put-term-char", NULL
);
2141 rtas_putchar_token
= *basep
;
2143 basep
= of_get_flat_dt_prop(node
, "get-term-char", NULL
);
2145 rtas_getchar_token
= *basep
;
2147 if (rtas_putchar_token
!= RTAS_UNKNOWN_SERVICE
&&
2148 rtas_getchar_token
!= RTAS_UNKNOWN_SERVICE
)
2149 udbg_init_rtas_console();
2157 static DEFINE_RAW_SPINLOCK(timebase_lock
);
2158 static u64 timebase
= 0;
2160 void rtas_give_timebase(void)
2162 unsigned long flags
;
2164 raw_spin_lock_irqsave(&timebase_lock
, flags
);
2166 rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE
), 0, 1, NULL
);
2167 timebase
= get_tb();
2168 raw_spin_unlock(&timebase_lock
);
2172 rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE
), 0, 1, NULL
);
2173 local_irq_restore(flags
);
2176 void rtas_take_timebase(void)
2180 raw_spin_lock(&timebase_lock
);
2181 set_tb(timebase
>> 32, timebase
& 0xffffffff);
2183 raw_spin_unlock(&timebase_lock
);