1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/export.h>
3 #include <linux/linkage.h>
4 #include <asm/processor.h>
5 #include <asm/ppc_asm.h>
7 #include <asm/asm-offsets.h>
8 #include <asm/cputable.h>
9 #include <asm/thread_info.h>
11 #include <asm/ptrace.h>
12 #include <asm/asm-compat.h>
15 * Load state from memory into VMX registers including VSCR.
16 * Assumes the caller has enabled VMX in the MSR.
18 _GLOBAL(load_vr_state)
24 EXPORT_SYMBOL(load_vr_state)
25 _ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
28 * Store VMX state into memory, including VSCR.
29 * Assumes the caller has enabled VMX in the MSR.
31 _GLOBAL(store_vr_state)
38 EXPORT_SYMBOL(store_vr_state)
41 * Disable VMX for the task which had it previously,
42 * and save its vector registers in its thread_struct.
43 * Enables the VMX for use in the kernel on return.
44 * On SMP we know the VMX is free, since we give it up every
45 * switch (ie, no lazy save of the vector registers).
47 * Note that on 32-bit this can only use registers that will be
48 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
50 _GLOBAL(load_up_altivec)
51 mfmsr r5 /* grab the current MSR */
52 #ifdef CONFIG_PPC_BOOK3S_64
53 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
57 MTMSRD(r5) /* enable use of AltiVec now */
61 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
62 * to optimise userspace context save/restore. Whenever we take an
63 * altivec unavailable exception we must set VRSAVE to something non
64 * zero. Set it to all 1s. See also the programming note in the ISA.
72 /* enable use of VMX after return */
77 ld r4,PACACURRENT(r13)
78 addi r5,r4,THREAD /* Get THREAD */
79 oris r12,r12,MSR_VEC@h
81 #ifdef CONFIG_PPC_BOOK3S_64
83 stb r4,PACASRR_VALID(r13)
87 stb r4,THREAD_LOAD_VEC(r5)
88 addi r6,r5,THREAD_VRSTATE
90 stw r4,THREAD_USED_VR(r5)
94 /* restore registers and return */
96 _ASM_NOKPROBE_SYMBOL(load_up_altivec)
100 * Save the vector registers to its thread_struct
102 _GLOBAL(save_altivec)
103 addi r3,r3,THREAD /* want THREAD of task */
104 PPC_LL r7,THREAD_VRSAVEAREA(r3)
105 PPC_LL r5,PT_REGS(r3)
108 addi r7,r3,THREAD_VRSTATE
109 2: SAVE_32VRS(0,r4,r7)
119 #error This asm code isn't ready for 32-bit kernels
123 * load_up_vsx(unused, unused, tsk)
124 * Disable VSX for the task which had it previously,
125 * and save its vector registers in its thread_struct.
126 * Reuse the fp and vsx saves, but first check to see if they have
127 * been saved already.
130 /* Load FP and VSX registers if they haven't been done yet */
132 beql+ load_up_fpu /* skip if already loaded */
133 andis. r5,r12,MSR_VEC@h
134 beql+ load_up_altivec /* skip if already loaded */
136 #ifdef CONFIG_PPC_BOOK3S_64
137 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
142 ld r4,PACACURRENT(r13)
143 addi r4,r4,THREAD /* Get THREAD */
145 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
146 /* enable use of VSX after return */
147 oris r12,r12,MSR_VSX@h
150 stb r4,PACASRR_VALID(r13)
151 b fast_interrupt_return_srr
153 #endif /* CONFIG_VSX */
157 * The routines below are in assembler so we can closely control the
158 * usage of floating-point registers. These routines must be called
159 * with preempt disabled.
166 .long 0x3f800000 /* 1.0 in single-precision FP */
168 .long 0x3f000000 /* 0.5 in single-precision FP */
170 #define LDCONST(fr, name) \
178 .quad 0x3ff0000000000000 /* 1.0 */
180 .quad 0x3fe0000000000000 /* 0.5 */
182 #ifdef CONFIG_PPC_KERNEL_PCREL
183 #define LDCONST(fr, name) \
184 pla r11,name@pcrel; \
187 #define LDCONST(fr, name) \
188 addis r11,r2,name@toc@ha; \
189 lfd fr,name@toc@l(r11)
194 * Internal routine to enable floating point and set FPSCR to 0.
195 * Don't call it from C; it doesn't use the normal calling convention.
197 SYM_FUNC_START_LOCAL(fpenable)
214 SYM_FUNC_END(fpenable)
228 * Vector add, floating point.
245 * Vector subtract, floating point.
262 * Vector multiply and add, floating point.
274 fmadds fr0,fr0,fr2,fr1
282 * Vector negative multiply and subtract, floating point.
294 fnmsubs fr0,fr0,fr2,fr1
302 * Vector reciprocal estimate. We just compute 1.0/x.
303 * r3 -> destination, r4 -> source.
320 * Vector reciprocal square-root estimate, floating point.
321 * We use the frsqrte instruction for the initial estimate followed
322 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
323 * r3 -> destination, r4 -> source.
338 frsqrte fr1,fr0 /* r = frsqrte(s) */
339 fmuls fr3,fr1,fr0 /* r * s */
340 fmuls fr2,fr1,fr5 /* r * 0.5 */
341 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
342 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
343 fmuls fr3,fr1,fr0 /* r * s */
344 fmuls fr2,fr1,fr5 /* r * 0.5 */
345 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
346 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */