1 // SPDX-License-Identifier: GPL-2.0-or-later
4 #include <asm/asm-prototypes.h>
6 #include <asm/cputime.h>
7 #include <asm/hw_irq.h>
8 #include <asm/kprobes.h>
10 #include <asm/ptrace.h>
12 #include <asm/signal.h>
13 #include <asm/switch_to.h>
14 #include <asm/syscall.h>
16 #include <asm/unistd.h>
18 typedef long (*syscall_fn
)(long, long, long, long, long, long);
20 /* Has to run notrace because it is entered not completely "reconciled" */
21 notrace
long system_call_exception(long r3
, long r4
, long r5
,
22 long r6
, long r7
, long r8
,
23 unsigned long r0
, struct pt_regs
*regs
)
27 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
))
28 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED
);
30 trace_hardirqs_off(); /* finish reconciling */
32 if (IS_ENABLED(CONFIG_PPC_BOOK3S
))
33 BUG_ON(!(regs
->msr
& MSR_RI
));
34 BUG_ON(!(regs
->msr
& MSR_PR
));
35 BUG_ON(!FULL_REGS(regs
));
36 BUG_ON(regs
->softe
!= IRQS_ENABLED
);
38 #ifdef CONFIG_PPC_PKEY
39 if (mmu_has_feature(MMU_FTR_PKEY
)) {
40 unsigned long amr
, iamr
;
41 bool flush_needed
= false;
43 * When entering from userspace we mostly have the AMR/IAMR
44 * different from kernel default values. Hence don't compare.
46 amr
= mfspr(SPRN_AMR
);
47 iamr
= mfspr(SPRN_IAMR
);
50 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP
)) {
51 mtspr(SPRN_AMR
, AMR_KUAP_BLOCKED
);
54 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP
)) {
55 mtspr(SPRN_IAMR
, AMR_KUEP_BLOCKED
);
64 account_cpu_user_entry();
66 #ifdef CONFIG_PPC_SPLPAR
67 if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
) &&
68 firmware_has_feature(FW_FEATURE_SPLPAR
)) {
69 struct lppaca
*lp
= local_paca
->lppaca_ptr
;
71 if (unlikely(local_paca
->dtl_ridx
!= be64_to_cpu(lp
->dtl_idx
)))
72 accumulate_stolen_time();
77 * This is not required for the syscall exit path, but makes the
78 * stack frame look nicer. If this was initialised in the first stack
79 * frame, or if the unwinder was taught the first stack frame always
80 * returns to user with IRQS_ENABLED, this store could be avoided!
82 regs
->softe
= IRQS_ENABLED
;
86 if (unlikely(current_thread_info()->flags
& _TIF_SYSCALL_DOTRACE
)) {
87 if (unlikely(regs
->trap
== 0x7ff0)) {
88 /* Unsupported scv vector */
89 _exception(SIGILL
, regs
, ILL_ILLOPC
, regs
->nip
);
93 * We use the return value of do_syscall_trace_enter() as the
94 * syscall number. If the syscall was rejected for any reason
95 * do_syscall_trace_enter() returns an invalid syscall number
96 * and the test against NR_syscalls will fail and the return
97 * value to be used is in regs->gpr[3].
99 r0
= do_syscall_trace_enter(regs
);
100 if (unlikely(r0
>= NR_syscalls
))
109 } else if (unlikely(r0
>= NR_syscalls
)) {
110 if (unlikely(regs
->trap
== 0x7ff0)) {
111 /* Unsupported scv vector */
112 _exception(SIGILL
, regs
, ILL_ILLOPC
, regs
->nip
);
118 /* May be faster to do array_index_nospec? */
121 if (unlikely(is_32bit_task())) {
122 f
= (void *)compat_sys_call_table
[r0
];
124 r3
&= 0x00000000ffffffffULL
;
125 r4
&= 0x00000000ffffffffULL
;
126 r5
&= 0x00000000ffffffffULL
;
127 r6
&= 0x00000000ffffffffULL
;
128 r7
&= 0x00000000ffffffffULL
;
129 r8
&= 0x00000000ffffffffULL
;
132 f
= (void *)sys_call_table
[r0
];
135 return f(r3
, r4
, r5
, r6
, r7
, r8
);
139 * local irqs must be disabled. Returns false if the caller must re-enable
140 * them, check for new work, and try again.
142 static notrace
inline bool prep_irq_for_enabled_exit(bool clear_ri
)
144 /* This must be done with RI=1 because tracing may touch vmaps */
147 /* This pattern matches prep_irq_for_idle */
149 __hard_EE_RI_disable();
151 __hard_irq_disable();
152 if (unlikely(lazy_irq_pending_nocheck())) {
153 /* Took an interrupt, may have more exit work to do. */
156 trace_hardirqs_off();
157 local_paca
->irq_happened
|= PACA_IRQ_HARD_DIS
;
161 local_paca
->irq_happened
= 0;
162 irq_soft_mask_set(IRQS_ENABLED
);
168 * This should be called after a syscall returns, with r3 the return value
169 * from the syscall. If this function returns non-zero, the system call
170 * exit assembly should additionally load all GPR registers and CTR and XER
171 * from the interrupt frame.
173 * The function graph tracer can not trace the return side of this function,
174 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
176 notrace
unsigned long syscall_exit_prepare(unsigned long r3
,
177 struct pt_regs
*regs
,
180 unsigned long *ti_flagsp
= ¤t_thread_info()->flags
;
181 unsigned long ti_flags
;
182 unsigned long ret
= 0;
188 /* Check whether the syscall is issued inside a restartable sequence */
191 ti_flags
= *ti_flagsp
;
193 if (unlikely(r3
>= (unsigned long)-MAX_ERRNO
) && !scv
) {
194 if (likely(!(ti_flags
& (_TIF_NOERROR
| _TIF_RESTOREALL
)))) {
196 regs
->ccr
|= 0x10000000; /* Set SO bit in CR */
200 if (unlikely(ti_flags
& _TIF_PERSYSCALL_MASK
)) {
201 if (ti_flags
& _TIF_RESTOREALL
)
202 ret
= _TIF_RESTOREALL
;
205 clear_bits(_TIF_PERSYSCALL_MASK
, ti_flagsp
);
210 if (unlikely(ti_flags
& _TIF_SYSCALL_DOTRACE
)) {
211 do_syscall_trace_leave(regs
);
212 ret
|= _TIF_RESTOREALL
;
217 ti_flags
= READ_ONCE(*ti_flagsp
);
218 while (unlikely(ti_flags
& (_TIF_USER_WORK_MASK
& ~_TIF_RESTORE_TM
))) {
220 if (ti_flags
& _TIF_NEED_RESCHED
) {
224 * SIGPENDING must restore signal handler function
225 * argument GPRs, and some non-volatiles (e.g., r1).
226 * Restore all for now. This could be made lighter.
228 if (ti_flags
& _TIF_SIGPENDING
)
229 ret
|= _TIF_RESTOREALL
;
230 do_notify_resume(regs
, ti_flags
);
233 ti_flags
= READ_ONCE(*ti_flagsp
);
236 if (IS_ENABLED(CONFIG_PPC_BOOK3S
) && IS_ENABLED(CONFIG_PPC_FPU
)) {
237 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM
) &&
238 unlikely((ti_flags
& _TIF_RESTORE_TM
))) {
239 restore_tm_state(regs
);
241 unsigned long mathflags
= MSR_FP
;
243 if (cpu_has_feature(CPU_FTR_VSX
))
244 mathflags
|= MSR_VEC
| MSR_VSX
;
245 else if (cpu_has_feature(CPU_FTR_ALTIVEC
))
246 mathflags
|= MSR_VEC
;
249 * If userspace MSR has all available FP bits set,
250 * then they are live and no need to restore. If not,
251 * it means the regs were given up and restore_math
252 * may decide to restore them (to avoid taking an FP
255 if ((regs
->msr
& mathflags
) != mathflags
)
260 /* scv need not set RI=0 because SRRs are not used */
261 if (unlikely(!prep_irq_for_enabled_exit(!scv
))) {
266 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
267 local_paca
->tm_scratch
= regs
->msr
;
270 account_cpu_user_exit();
272 #ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
274 * We do this at the end so that we do context switch with KERNEL AMR
276 kuap_user_restore(regs
);
281 #ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
282 notrace
unsigned long interrupt_exit_user_prepare(struct pt_regs
*regs
, unsigned long msr
)
284 #ifdef CONFIG_PPC_BOOK3E
285 struct thread_struct
*ts
= ¤t
->thread
;
287 unsigned long *ti_flagsp
= ¤t_thread_info()->flags
;
288 unsigned long ti_flags
;
290 unsigned long ret
= 0;
292 if (IS_ENABLED(CONFIG_PPC_BOOK3S
))
293 BUG_ON(!(regs
->msr
& MSR_RI
));
294 BUG_ON(!(regs
->msr
& MSR_PR
));
295 BUG_ON(!FULL_REGS(regs
));
296 BUG_ON(regs
->softe
!= IRQS_ENABLED
);
299 * We don't need to restore AMR on the way back to userspace for KUAP.
300 * AMR can only have been unlocked if we interrupted the kernel.
304 local_irq_save(flags
);
307 ti_flags
= READ_ONCE(*ti_flagsp
);
308 while (unlikely(ti_flags
& (_TIF_USER_WORK_MASK
& ~_TIF_RESTORE_TM
))) {
309 local_irq_enable(); /* returning to user: may enable */
310 if (ti_flags
& _TIF_NEED_RESCHED
) {
313 if (ti_flags
& _TIF_SIGPENDING
)
314 ret
|= _TIF_RESTOREALL
;
315 do_notify_resume(regs
, ti_flags
);
318 ti_flags
= READ_ONCE(*ti_flagsp
);
321 if (IS_ENABLED(CONFIG_PPC_BOOK3S
) && IS_ENABLED(CONFIG_PPC_FPU
)) {
322 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM
) &&
323 unlikely((ti_flags
& _TIF_RESTORE_TM
))) {
324 restore_tm_state(regs
);
326 unsigned long mathflags
= MSR_FP
;
328 if (cpu_has_feature(CPU_FTR_VSX
))
329 mathflags
|= MSR_VEC
| MSR_VSX
;
330 else if (cpu_has_feature(CPU_FTR_ALTIVEC
))
331 mathflags
|= MSR_VEC
;
333 /* See above restore_math comment */
334 if ((regs
->msr
& mathflags
) != mathflags
)
339 if (unlikely(!prep_irq_for_enabled_exit(true))) {
345 #ifdef CONFIG_PPC_BOOK3E
346 if (unlikely(ts
->debug
.dbcr0
& DBCR0_IDM
)) {
348 * Check to see if the dbcr0 register is set up to debug.
349 * Use the internal debug mode bit to do this.
351 mtmsr(mfmsr() & ~MSR_DE
);
352 mtspr(SPRN_DBCR0
, ts
->debug
.dbcr0
);
353 mtspr(SPRN_DBSR
, -1);
357 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
358 local_paca
->tm_scratch
= regs
->msr
;
361 account_cpu_user_exit();
364 * We do this at the end so that we do context switch with KERNEL AMR
366 kuap_user_restore(regs
);
370 void unrecoverable_exception(struct pt_regs
*regs
);
371 void preempt_schedule_irq(void);
373 notrace
unsigned long interrupt_exit_kernel_prepare(struct pt_regs
*regs
, unsigned long msr
)
375 unsigned long *ti_flagsp
= ¤t_thread_info()->flags
;
377 unsigned long ret
= 0;
380 if (IS_ENABLED(CONFIG_PPC_BOOK3S
) && unlikely(!(regs
->msr
& MSR_RI
)))
381 unrecoverable_exception(regs
);
382 BUG_ON(regs
->msr
& MSR_PR
);
383 BUG_ON(!FULL_REGS(regs
));
385 amr
= kuap_get_and_check_amr();
387 if (unlikely(*ti_flagsp
& _TIF_EMULATE_STACK_STORE
)) {
388 clear_bits(_TIF_EMULATE_STACK_STORE
, ti_flagsp
);
392 local_irq_save(flags
);
394 if (regs
->softe
== IRQS_ENABLED
) {
395 /* Returning to a kernel context with local irqs enabled. */
396 WARN_ON_ONCE(!(regs
->msr
& MSR_EE
));
398 if (IS_ENABLED(CONFIG_PREEMPT
)) {
399 /* Return to preemptible kernel context */
400 if (unlikely(*ti_flagsp
& _TIF_NEED_RESCHED
)) {
401 if (preempt_count() == 0)
402 preempt_schedule_irq();
406 if (unlikely(!prep_irq_for_enabled_exit(true))) {
408 * Can't local_irq_restore to replay if we were in
409 * interrupt context. Must replay directly.
411 if (irqs_disabled_flags(flags
)) {
412 replay_soft_interrupts();
414 local_irq_restore(flags
);
415 local_irq_save(flags
);
417 /* Took an interrupt, may have more exit work to do. */
421 /* Returning to a kernel context with local irqs disabled. */
422 __hard_EE_RI_disable();
423 if (regs
->msr
& MSR_EE
)
424 local_paca
->irq_happened
&= ~PACA_IRQ_HARD_DIS
;
428 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
429 local_paca
->tm_scratch
= regs
->msr
;
433 * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
434 * which would cause Read-After-Write stalls. Hence, we take the AMR
435 * value from the check above.
437 kuap_kernel_restore(regs
, amr
);