io_uring: ensure finish_wait() is always called in __io_uring_task_cancel()
[linux/fpc-iii.git] / arch / powerpc / kernel / syscall_64.c
blob7c85ed04a1641c9f68a93ba3d34bdfca55dd0969
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 #include <linux/err.h>
4 #include <asm/asm-prototypes.h>
5 #include <asm/kup.h>
6 #include <asm/cputime.h>
7 #include <asm/hw_irq.h>
8 #include <asm/kprobes.h>
9 #include <asm/paca.h>
10 #include <asm/ptrace.h>
11 #include <asm/reg.h>
12 #include <asm/signal.h>
13 #include <asm/switch_to.h>
14 #include <asm/syscall.h>
15 #include <asm/time.h>
16 #include <asm/unistd.h>
18 typedef long (*syscall_fn)(long, long, long, long, long, long);
20 /* Has to run notrace because it is entered not completely "reconciled" */
21 notrace long system_call_exception(long r3, long r4, long r5,
22 long r6, long r7, long r8,
23 unsigned long r0, struct pt_regs *regs)
25 syscall_fn f;
27 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
28 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
30 trace_hardirqs_off(); /* finish reconciling */
32 if (IS_ENABLED(CONFIG_PPC_BOOK3S))
33 BUG_ON(!(regs->msr & MSR_RI));
34 BUG_ON(!(regs->msr & MSR_PR));
35 BUG_ON(!FULL_REGS(regs));
36 BUG_ON(regs->softe != IRQS_ENABLED);
38 #ifdef CONFIG_PPC_PKEY
39 if (mmu_has_feature(MMU_FTR_PKEY)) {
40 unsigned long amr, iamr;
41 bool flush_needed = false;
43 * When entering from userspace we mostly have the AMR/IAMR
44 * different from kernel default values. Hence don't compare.
46 amr = mfspr(SPRN_AMR);
47 iamr = mfspr(SPRN_IAMR);
48 regs->amr = amr;
49 regs->iamr = iamr;
50 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
51 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
52 flush_needed = true;
54 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
55 mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
56 flush_needed = true;
58 if (flush_needed)
59 isync();
60 } else
61 #endif
62 kuap_check_amr();
64 account_cpu_user_entry();
66 #ifdef CONFIG_PPC_SPLPAR
67 if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
68 firmware_has_feature(FW_FEATURE_SPLPAR)) {
69 struct lppaca *lp = local_paca->lppaca_ptr;
71 if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
72 accumulate_stolen_time();
74 #endif
77 * This is not required for the syscall exit path, but makes the
78 * stack frame look nicer. If this was initialised in the first stack
79 * frame, or if the unwinder was taught the first stack frame always
80 * returns to user with IRQS_ENABLED, this store could be avoided!
82 regs->softe = IRQS_ENABLED;
84 local_irq_enable();
86 if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
87 if (unlikely(regs->trap == 0x7ff0)) {
88 /* Unsupported scv vector */
89 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
90 return regs->gpr[3];
93 * We use the return value of do_syscall_trace_enter() as the
94 * syscall number. If the syscall was rejected for any reason
95 * do_syscall_trace_enter() returns an invalid syscall number
96 * and the test against NR_syscalls will fail and the return
97 * value to be used is in regs->gpr[3].
99 r0 = do_syscall_trace_enter(regs);
100 if (unlikely(r0 >= NR_syscalls))
101 return regs->gpr[3];
102 r3 = regs->gpr[3];
103 r4 = regs->gpr[4];
104 r5 = regs->gpr[5];
105 r6 = regs->gpr[6];
106 r7 = regs->gpr[7];
107 r8 = regs->gpr[8];
109 } else if (unlikely(r0 >= NR_syscalls)) {
110 if (unlikely(regs->trap == 0x7ff0)) {
111 /* Unsupported scv vector */
112 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
113 return regs->gpr[3];
115 return -ENOSYS;
118 /* May be faster to do array_index_nospec? */
119 barrier_nospec();
121 if (unlikely(is_32bit_task())) {
122 f = (void *)compat_sys_call_table[r0];
124 r3 &= 0x00000000ffffffffULL;
125 r4 &= 0x00000000ffffffffULL;
126 r5 &= 0x00000000ffffffffULL;
127 r6 &= 0x00000000ffffffffULL;
128 r7 &= 0x00000000ffffffffULL;
129 r8 &= 0x00000000ffffffffULL;
131 } else {
132 f = (void *)sys_call_table[r0];
135 return f(r3, r4, r5, r6, r7, r8);
139 * local irqs must be disabled. Returns false if the caller must re-enable
140 * them, check for new work, and try again.
142 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri)
144 /* This must be done with RI=1 because tracing may touch vmaps */
145 trace_hardirqs_on();
147 /* This pattern matches prep_irq_for_idle */
148 if (clear_ri)
149 __hard_EE_RI_disable();
150 else
151 __hard_irq_disable();
152 if (unlikely(lazy_irq_pending_nocheck())) {
153 /* Took an interrupt, may have more exit work to do. */
154 if (clear_ri)
155 __hard_RI_enable();
156 trace_hardirqs_off();
157 local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
159 return false;
161 local_paca->irq_happened = 0;
162 irq_soft_mask_set(IRQS_ENABLED);
164 return true;
168 * This should be called after a syscall returns, with r3 the return value
169 * from the syscall. If this function returns non-zero, the system call
170 * exit assembly should additionally load all GPR registers and CTR and XER
171 * from the interrupt frame.
173 * The function graph tracer can not trace the return side of this function,
174 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
176 notrace unsigned long syscall_exit_prepare(unsigned long r3,
177 struct pt_regs *regs,
178 long scv)
180 unsigned long *ti_flagsp = &current_thread_info()->flags;
181 unsigned long ti_flags;
182 unsigned long ret = 0;
184 kuap_check_amr();
186 regs->result = r3;
188 /* Check whether the syscall is issued inside a restartable sequence */
189 rseq_syscall(regs);
191 ti_flags = *ti_flagsp;
193 if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
194 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
195 r3 = -r3;
196 regs->ccr |= 0x10000000; /* Set SO bit in CR */
200 if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
201 if (ti_flags & _TIF_RESTOREALL)
202 ret = _TIF_RESTOREALL;
203 else
204 regs->gpr[3] = r3;
205 clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
206 } else {
207 regs->gpr[3] = r3;
210 if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
211 do_syscall_trace_leave(regs);
212 ret |= _TIF_RESTOREALL;
215 again:
216 local_irq_disable();
217 ti_flags = READ_ONCE(*ti_flagsp);
218 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
219 local_irq_enable();
220 if (ti_flags & _TIF_NEED_RESCHED) {
221 schedule();
222 } else {
224 * SIGPENDING must restore signal handler function
225 * argument GPRs, and some non-volatiles (e.g., r1).
226 * Restore all for now. This could be made lighter.
228 if (ti_flags & _TIF_SIGPENDING)
229 ret |= _TIF_RESTOREALL;
230 do_notify_resume(regs, ti_flags);
232 local_irq_disable();
233 ti_flags = READ_ONCE(*ti_flagsp);
236 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
237 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
238 unlikely((ti_flags & _TIF_RESTORE_TM))) {
239 restore_tm_state(regs);
240 } else {
241 unsigned long mathflags = MSR_FP;
243 if (cpu_has_feature(CPU_FTR_VSX))
244 mathflags |= MSR_VEC | MSR_VSX;
245 else if (cpu_has_feature(CPU_FTR_ALTIVEC))
246 mathflags |= MSR_VEC;
249 * If userspace MSR has all available FP bits set,
250 * then they are live and no need to restore. If not,
251 * it means the regs were given up and restore_math
252 * may decide to restore them (to avoid taking an FP
253 * fault).
255 if ((regs->msr & mathflags) != mathflags)
256 restore_math(regs);
260 /* scv need not set RI=0 because SRRs are not used */
261 if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
262 local_irq_enable();
263 goto again;
266 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
267 local_paca->tm_scratch = regs->msr;
268 #endif
270 account_cpu_user_exit();
272 #ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
274 * We do this at the end so that we do context switch with KERNEL AMR
276 kuap_user_restore(regs);
277 #endif
278 return ret;
281 #ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
282 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
284 #ifdef CONFIG_PPC_BOOK3E
285 struct thread_struct *ts = &current->thread;
286 #endif
287 unsigned long *ti_flagsp = &current_thread_info()->flags;
288 unsigned long ti_flags;
289 unsigned long flags;
290 unsigned long ret = 0;
292 if (IS_ENABLED(CONFIG_PPC_BOOK3S))
293 BUG_ON(!(regs->msr & MSR_RI));
294 BUG_ON(!(regs->msr & MSR_PR));
295 BUG_ON(!FULL_REGS(regs));
296 BUG_ON(regs->softe != IRQS_ENABLED);
299 * We don't need to restore AMR on the way back to userspace for KUAP.
300 * AMR can only have been unlocked if we interrupted the kernel.
302 kuap_check_amr();
304 local_irq_save(flags);
306 again:
307 ti_flags = READ_ONCE(*ti_flagsp);
308 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
309 local_irq_enable(); /* returning to user: may enable */
310 if (ti_flags & _TIF_NEED_RESCHED) {
311 schedule();
312 } else {
313 if (ti_flags & _TIF_SIGPENDING)
314 ret |= _TIF_RESTOREALL;
315 do_notify_resume(regs, ti_flags);
317 local_irq_disable();
318 ti_flags = READ_ONCE(*ti_flagsp);
321 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
322 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
323 unlikely((ti_flags & _TIF_RESTORE_TM))) {
324 restore_tm_state(regs);
325 } else {
326 unsigned long mathflags = MSR_FP;
328 if (cpu_has_feature(CPU_FTR_VSX))
329 mathflags |= MSR_VEC | MSR_VSX;
330 else if (cpu_has_feature(CPU_FTR_ALTIVEC))
331 mathflags |= MSR_VEC;
333 /* See above restore_math comment */
334 if ((regs->msr & mathflags) != mathflags)
335 restore_math(regs);
339 if (unlikely(!prep_irq_for_enabled_exit(true))) {
340 local_irq_enable();
341 local_irq_disable();
342 goto again;
345 #ifdef CONFIG_PPC_BOOK3E
346 if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
348 * Check to see if the dbcr0 register is set up to debug.
349 * Use the internal debug mode bit to do this.
351 mtmsr(mfmsr() & ~MSR_DE);
352 mtspr(SPRN_DBCR0, ts->debug.dbcr0);
353 mtspr(SPRN_DBSR, -1);
355 #endif
357 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
358 local_paca->tm_scratch = regs->msr;
359 #endif
361 account_cpu_user_exit();
364 * We do this at the end so that we do context switch with KERNEL AMR
366 kuap_user_restore(regs);
367 return ret;
370 void unrecoverable_exception(struct pt_regs *regs);
371 void preempt_schedule_irq(void);
373 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
375 unsigned long *ti_flagsp = &current_thread_info()->flags;
376 unsigned long flags;
377 unsigned long ret = 0;
378 unsigned long amr;
380 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
381 unrecoverable_exception(regs);
382 BUG_ON(regs->msr & MSR_PR);
383 BUG_ON(!FULL_REGS(regs));
385 amr = kuap_get_and_check_amr();
387 if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
388 clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
389 ret = 1;
392 local_irq_save(flags);
394 if (regs->softe == IRQS_ENABLED) {
395 /* Returning to a kernel context with local irqs enabled. */
396 WARN_ON_ONCE(!(regs->msr & MSR_EE));
397 again:
398 if (IS_ENABLED(CONFIG_PREEMPT)) {
399 /* Return to preemptible kernel context */
400 if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
401 if (preempt_count() == 0)
402 preempt_schedule_irq();
406 if (unlikely(!prep_irq_for_enabled_exit(true))) {
408 * Can't local_irq_restore to replay if we were in
409 * interrupt context. Must replay directly.
411 if (irqs_disabled_flags(flags)) {
412 replay_soft_interrupts();
413 } else {
414 local_irq_restore(flags);
415 local_irq_save(flags);
417 /* Took an interrupt, may have more exit work to do. */
418 goto again;
420 } else {
421 /* Returning to a kernel context with local irqs disabled. */
422 __hard_EE_RI_disable();
423 if (regs->msr & MSR_EE)
424 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
428 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
429 local_paca->tm_scratch = regs->msr;
430 #endif
433 * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
434 * which would cause Read-After-Write stalls. Hence, we take the AMR
435 * value from the check above.
437 kuap_kernel_restore(regs, amr);
439 return ret;
441 #endif