1 /* SPDX-License-Identifier: GPL-2.0-or-later */
4 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
5 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
6 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
7 * Adapted for Power Macintosh by Paul Mackerras.
8 * Low-level exception handlers and MMU support
9 * rewritten by Paul Mackerras.
10 * Copyright (C) 1996 Paul Mackerras.
11 * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
13 * This file contains the system call entry code, context switch
14 * code, and exception/interrupt return code for PowerPC.
17 #include <linux/errno.h>
18 #include <linux/err.h>
19 #include <asm/cache.h>
20 #include <asm/unistd.h>
21 #include <asm/processor.h>
24 #include <asm/thread_info.h>
25 #include <asm/code-patching-asm.h>
26 #include <asm/ppc_asm.h>
27 #include <asm/asm-offsets.h>
28 #include <asm/cputable.h>
29 #include <asm/firmware.h>
31 #include <asm/ptrace.h>
32 #include <asm/irqflags.h>
33 #include <asm/hw_irq.h>
34 #include <asm/context_tracking.h>
36 #include <asm/ppc-opcode.h>
37 #include <asm/barrier.h>
38 #include <asm/export.h>
39 #include <asm/asm-compat.h>
40 #ifdef CONFIG_PPC_BOOK3S
41 #include <asm/exception-64s.h>
43 #include <asm/exception-64e.h>
45 #include <asm/feature-fixups.h>
53 .tc sys_call_table[TC],sys_call_table
56 COMPAT_SYS_CALL_TABLE:
57 .tc compat_sys_call_table[TC],compat_sys_call_table
60 /* This value is used to mark exception frames on the stack. */
62 .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
67 #ifdef CONFIG_PPC_BOOK3S
68 .macro system_call_vectored name trapnr
69 .globl system_call_vectored_\name
70 system_call_vectored_\name:
71 _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
72 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
74 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
76 END_FTR_SECTION_IFSET(CPU_FTR_TM)
90 /* Can we avoid saving r3-r8 in common case? */
97 /* Zero r9-r12, this should only be required when restoring all GPRs */
112 addi r10,r1,STACK_FRAME_OVERHEAD
113 ld r11,exception_marker@toc(r2)
114 std r11,-16(r10) /* "regshere" marker */
118 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
121 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
122 * would clobber syscall parameters. Also we always enter with IRQs
123 * enabled and nothing pending. system_call_exception() will call
124 * trace_hardirqs_off().
126 * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The
127 * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED.
130 /* Calling convention has r9 = orig r0, r10 = regs */
132 bl system_call_exception
134 .Lsyscall_vectored_\name\()_exit:
135 addi r4,r1,STACK_FRAME_OVERHEAD
137 bl syscall_exit_prepare
144 stdcx. r0,0,r1 /* to clear the reservation */
145 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
149 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
152 bne .Lsyscall_vectored_\name\()_restore_regs
154 /* rfscv returns with LR->NIA and CTR->MSR */
158 /* Could zero these as per ABI, but we may consider a stricter ABI
159 * which preserves these if libc implementations can benefit, so
160 * restore them for now until further measurement is done. */
167 /* Zero volatile regs that may contain sensitive kernel data */
175 * We don't need to restore AMR on the way back to userspace for KUAP.
176 * The value of AMR only matters while we're in the kernel.
184 b . /* prevent speculative execution */
186 .Lsyscall_vectored_\name\()_restore_regs:
208 system_call_vectored common 0x3000
210 * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
211 * which is tested by system_call_exception when r0 is -1 (as set by vector
214 system_call_vectored sigill 0x7ff0
218 * Entered via kernel return set up by kernel/sstep.c, must match entry regs
220 .globl system_call_vectored_emulate
221 system_call_vectored_emulate:
222 _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
223 li r10,IRQS_ALL_DISABLED
224 stb r10,PACAIRQSOFTMASK(r13)
225 b system_call_vectored_common
228 .balign IFETCH_ALIGN_BYTES
229 .globl system_call_common
231 _ASM_NOKPROBE_SYMBOL(system_call_common)
232 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
234 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
236 END_FTR_SECTION_IFSET(CPU_FTR_TM)
246 #ifdef CONFIG_PPC_FSL_BOOK3E
247 START_BTB_FLUSH_SECTION
249 END_BTB_FLUSH_SECTION
254 /* Can we avoid saving r3-r8 in common case? */
261 /* Zero r9-r12, this should only be required when restoring all GPRs */
273 * This clears CR0.SO (bit 28), which is the error indication on
274 * return from this system call.
276 rldimi r12,r11,28,(63-28)
282 addi r10,r1,STACK_FRAME_OVERHEAD
283 ld r11,exception_marker@toc(r2)
284 std r11,-16(r10) /* "regshere" marker */
287 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
288 * would clobber syscall parameters. Also we always enter with IRQs
289 * enabled and nothing pending. system_call_exception() will call
290 * trace_hardirqs_off().
292 li r11,IRQS_ALL_DISABLED
293 li r12,PACA_IRQ_HARD_DIS
294 stb r11,PACAIRQSOFTMASK(r13)
295 stb r12,PACAIRQHAPPENED(r13)
297 /* Calling convention has r9 = orig r0, r10 = regs */
299 bl system_call_exception
302 addi r4,r1,STACK_FRAME_OVERHEAD
304 bl syscall_exit_prepare
312 stdcx. r0,0,r1 /* to clear the reservation */
313 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
320 bne .Lsyscall_restore_regs
321 /* Zero volatile regs that may contain sensitive kernel data */
334 .Lsyscall_restore_regs_cont:
338 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
341 * We don't need to restore AMR on the way back to userspace for KUAP.
342 * The value of AMR only matters while we're in the kernel.
350 b . /* prevent speculative execution */
352 .Lsyscall_restore_regs:
361 b .Lsyscall_restore_regs_cont
363 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
365 /* Firstly we need to enable TM in the kernel */
368 rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
371 /* tabort, this dooms the transaction, nothing else */
372 li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
376 * Return directly to userspace. We have corrupted user register state,
377 * but userspace will never see that register state. Execution will
378 * resume after the tbegin of the aborted transaction with the
379 * checkpointed register state.
387 b . /* prevent speculative execution */
390 #ifdef CONFIG_PPC_BOOK3S
391 _GLOBAL(ret_from_fork_scv)
394 li r3,0 /* fork() return value */
395 b .Lsyscall_vectored_common_exit
398 _GLOBAL(ret_from_fork)
401 li r3,0 /* fork() return value */
404 _GLOBAL(ret_from_kernel_thread)
409 #ifdef PPC64_ELF_ABI_v2
416 #ifdef CONFIG_PPC_BOOK3E
417 /* Save non-volatile GPRs, if not already saved. */
426 _ASM_NOKPROBE_SYMBOL(save_nvgprs);
429 #ifdef CONFIG_PPC_BOOK3S_64
431 #define FLUSH_COUNT_CACHE \
433 patch_site 1b, patch__call_flush_branch_caches1; \
435 patch_site 1b, patch__call_flush_branch_caches2; \
437 patch_site 1b, patch__call_flush_branch_caches3
446 .global flush_branch_caches
448 /* Save LR into r9 */
451 // Flush the link stack
462 // If we're just flushing the link stack, return here
464 patch_site 3b patch__flush_link_stack_return
472 patch_site 2b patch__flush_count_cache_return
484 #define FLUSH_COUNT_CACHE
485 #endif /* CONFIG_PPC_BOOK3S_64 */
488 * This routine switches between two different tasks. The process
489 * state of one is saved on its kernel stack. Then the state
490 * of the other is restored from its kernel stack. The memory
491 * management hardware is updated to the second process's state.
492 * Finally, we can return to the second process, via interrupt_return.
493 * On entry, r3 points to the THREAD for the current task, r4
494 * points to the THREAD for the new task.
496 * Note: there are two ways to get to the "going out" portion
497 * of this code; either by coming in via the entry (_switch)
498 * or via "fork" which must set up an environment equivalent
499 * to the "_switch" path. If you change this you'll have to change
500 * the fork code also.
502 * The code which creates the new task context is in 'copy_thread'
503 * in arch/powerpc/kernel/process.c
509 stdu r1,-SWITCH_FRAME_SIZE(r1)
510 /* r3-r13 are caller saved -- Cort */
512 std r0,_NIP(r1) /* Return to switch caller */
515 std r1,KSP(r3) /* Set old stack pointer */
517 kuap_check_amr r9, r10
519 FLUSH_COUNT_CACHE /* Clobbers r9, ctr */
522 * On SMP kernels, care must be taken because a task may be
523 * scheduled off CPUx and on to CPUy. Memory ordering must be
526 * Cacheable stores on CPUx will be visible when the task is
527 * scheduled on CPUy by virtue of the core scheduler barriers
528 * (see "Notes on Program-Order guarantees on SMP systems." in
529 * kernel/sched/core.c).
531 * Uncacheable stores in the case of involuntary preemption must
532 * be taken care of. The smp_mb__after_spinlock() in __schedule()
533 * is implemented as hwsync on powerpc, which orders MMIO too. So
534 * long as there is an hwsync in the context switch path, it will
535 * be executed on the source CPU after the task has performed
536 * all MMIO ops on that CPU, and on the destination CPU before the
537 * task performs any MMIO ops there.
541 * The kernel context switch path must contain a spin_lock,
542 * which contains larx/stcx, which will clear any reservation
543 * of the task being switched.
545 #ifdef CONFIG_PPC_BOOK3S
546 /* Cancel all explict user streams as they will have no use after context
547 * switch and will stop the HW from creating streams itself
549 DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
552 addi r6,r4,-THREAD /* Convert THREAD to 'current' */
553 std r6,PACACURRENT(r13) /* Set new 'current' */
554 #if defined(CONFIG_STACKPROTECTOR)
555 ld r6, TASK_CANARY(r6)
556 std r6, PACA_CANARY(r13)
559 ld r8,KSP(r4) /* new stack pointer */
560 #ifdef CONFIG_PPC_BOOK3S_64
561 BEGIN_MMU_FTR_SECTION
563 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
565 clrrdi r6,r8,28 /* get its ESID */
566 clrrdi r9,r1,28 /* get current sp ESID */
568 clrrdi r6,r8,40 /* get its 1T ESID */
569 clrrdi r9,r1,40 /* get current sp 1T ESID */
570 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
571 clrldi. r0,r6,2 /* is new ESID c00000000? */
572 cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
574 beq 2f /* if yes, don't slbie it */
576 /* Bolt in the new stack SLB entry */
577 ld r7,KSP_VSID(r4) /* Get new stack's VSID */
578 oris r0,r6,(SLB_ESID_V)@h
579 ori r0,r0,(SLB_NUM_BOLTED-1)@l
581 li r9,MMU_SEGSIZE_1T /* insert B field */
582 oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
583 rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
584 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
586 /* Update the last bolted SLB. No write barriers are needed
587 * here, provided we only update the current CPU's SLB shadow
590 ld r9,PACA_SLBSHADOWPTR(r13)
592 std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
593 li r12,SLBSHADOW_STACKVSID
594 STDX_BE r7,r12,r9 /* Save VSID */
595 li r12,SLBSHADOW_STACKESID
596 STDX_BE r0,r12,r9 /* Save ESID */
598 /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
599 * we have 1TB segments, the only CPUs known to have the errata
600 * only support less than 1TB of system memory and we'll never
601 * actually hit this code path.
607 slbie r6 /* Workaround POWER5 < DD2.1 issue */
608 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
612 #endif /* CONFIG_PPC_BOOK3S_64 */
614 clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
615 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
616 because we don't need to leave the 288-byte ABI gap at the
617 top of the kernel stack. */
618 addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
621 * PMU interrupts in radix may come in here. They will use r1, not
622 * PACAKSAVE, so this stack switch will not cause a problem. They
623 * will store to the process stack, which may then be migrated to
624 * another CPU. However the rq lock release on this CPU paired with
625 * the rq lock acquire on the new CPU before the stack becomes
626 * active on the new CPU, will order those stores.
628 mr r1,r8 /* start using new stack pointer */
629 std r7,PACAKSAVE(r13)
634 /* r3-r13 are destroyed -- Cort */
637 /* convert old thread to its task_struct for return value */
639 ld r7,_NIP(r1) /* Return to _switch caller in new task */
641 addi r1,r1,SWITCH_FRAME_SIZE
644 #ifdef CONFIG_PPC_BOOK3S
646 * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
647 * touched, no exit work created, then this can be used.
649 .balign IFETCH_ALIGN_BYTES
650 .globl fast_interrupt_return
651 fast_interrupt_return:
652 _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
653 kuap_check_amr r3, r4
656 bne .Lfast_user_interrupt_return_amr
657 kuap_kernel_restore r3, r4
659 li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
660 bne+ .Lfast_kernel_interrupt_return
661 addi r3,r1,STACK_FRAME_OVERHEAD
662 bl unrecoverable_exception
663 b . /* should not get here */
665 .balign IFETCH_ALIGN_BYTES
666 .globl interrupt_return
668 _ASM_NOKPROBE_SYMBOL(interrupt_return)
671 beq .Lkernel_interrupt_return
672 addi r3,r1,STACK_FRAME_OVERHEAD
673 bl interrupt_exit_user_prepare
675 bne- .Lrestore_nvgprs
677 .Lfast_user_interrupt_return_amr:
678 kuap_user_restore r3, r4
679 .Lfast_user_interrupt_return:
685 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
690 stdcx. r0,0,r1 /* to clear the reservation */
693 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
715 b . /* prevent speculative execution */
719 b .Lfast_user_interrupt_return
721 .balign IFETCH_ALIGN_BYTES
722 .Lkernel_interrupt_return:
723 addi r3,r1,STACK_FRAME_OVERHEAD
724 bl interrupt_exit_kernel_prepare
726 .Lfast_kernel_interrupt_return:
734 stdcx. r0,0,r1 /* to clear the reservation */
737 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
753 * Leaving a stale exception_marker on the stack can confuse
754 * the reliable stack unwinder later on. Clear it.
756 std r0,STACK_FRAME_OVERHEAD-16(r1)
760 bne- cr1,1f /* emulate stack store */
766 b . /* prevent speculative execution */
769 * Emulate stack store with update. New r1 value was already calculated
770 * and updated in our interrupt regs by emulate_loadstore, but we can't
771 * store the previous value of r1 to the stack before re-loading our
772 * registers from it, otherwise they could be clobbered. Use
773 * PACA_EXGEN as temporary storage to hold the store data, as
774 * interrupts are disabled here so it won't be clobbered.
777 std r9,PACA_EXGEN+0(r13)
778 addi r9,r1,INT_FRAME_SIZE /* get original r1 */
782 std r9,0(r1) /* perform store component of stdu */
783 ld r9,PACA_EXGEN+0(r13)
786 b . /* prevent speculative execution */
787 #endif /* CONFIG_PPC_BOOK3S */
789 #ifdef CONFIG_PPC_RTAS
791 * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
792 * called with the MMU off.
794 * In addition, we need to be in 32b mode, at least for now.
796 * Note: r3 is an input parameter to rtas, so don't trash it...
801 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
803 /* Because RTAS is running in 32b mode, it clobbers the high order half
804 * of all registers that it saves. We therefore save those registers
805 * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
807 SAVE_GPR(2, r1) /* Save the TOC */
808 SAVE_GPR(13, r1) /* Save paca */
809 SAVE_NVGPRS(r1) /* Save the non-volatiles */
822 /* Temporary workaround to clear CR until RTAS can be modified to
829 /* There is no way it is acceptable to get here with interrupts enabled,
830 * check it with the asm equivalent of WARN_ON
832 lbz r0,PACAIRQSOFTMASK(r13)
833 1: tdeqi r0,IRQS_ENABLED
834 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
837 /* Hard-disable interrupts */
843 /* Unfortunately, the stack pointer and the MSR are also clobbered,
844 * so they are saved in the PACA which allows us to restore
845 * our original state after RTAS returns.
848 std r6,PACASAVEDMSR(r13)
850 /* Setup our real return addr */
851 LOAD_REG_ADDR(r4,rtas_return_loc)
852 clrldi r4,r4,2 /* convert to realmode address */
856 ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
860 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
861 ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
865 sync /* disable interrupts so SRR0/1 */
866 mtmsrd r0 /* don't get trashed */
868 LOAD_REG_ADDR(r4, rtas)
869 ld r5,RTASENTRY(r4) /* get the rtas->entry value */
870 ld r4,RTASBASE(r4) /* get the rtas->base value */
875 b . /* prevent speculative execution */
881 * Clear RI and set SF before anything.
886 sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
891 /* relocation is off at this point */
893 clrldi r4,r4,2 /* convert to realmode address */
897 ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
899 ld r1,PACAR1(r4) /* Restore our SP */
900 ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
905 b . /* prevent speculative execution */
906 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
907 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
910 1: .8byte rtas_restore_regs
913 /* relocation is on at this point */
914 REST_GPR(2, r1) /* Restore the TOC */
915 REST_GPR(13, r1) /* Restore paca */
916 REST_NVGPRS(r1) /* Restore the non-volatiles */
931 addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
932 ld r0,16(r1) /* get return address */
935 blr /* return to caller */
937 #endif /* CONFIG_PPC_RTAS */
942 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
944 /* Because PROM is running in 32b mode, it clobbers the high order half
945 * of all registers that it saves. We therefore save those registers
946 * PROM might touch to the stack. (r0, r3-r13 are caller saved)
956 /* Put PROM address in SRR0 */
959 /* Setup our trampoline return addr in LR */
965 /* Prepare a 32-bit mode big endian MSR
967 #ifdef CONFIG_PPC_BOOK3E
968 rlwinm r11,r11,0,1,31
971 #else /* CONFIG_PPC_BOOK3E */
972 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
976 #endif /* CONFIG_PPC_BOOK3E */
978 1: /* Return from OF */
981 /* Just make sure that r1 top 32 bits didn't get
986 /* Restore the MSR (back to 64 bits) */
991 /* Restore other registers */
998 addi r1,r1,SWITCH_FRAME_SIZE