1 // SPDX-License-Identifier: GPL-2.0
3 * Routines to emulate some Altivec/VMX instructions, specifically
4 * those that can trap when given denormalized operands in Java mode.
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/sched.h>
9 #include <asm/ptrace.h>
10 #include <asm/processor.h>
11 #include <asm/switch_to.h>
12 #include <linux/uaccess.h>
14 /* Functions in vector.S */
15 extern void vaddfp(vector128
*dst
, vector128
*a
, vector128
*b
);
16 extern void vsubfp(vector128
*dst
, vector128
*a
, vector128
*b
);
17 extern void vmaddfp(vector128
*dst
, vector128
*a
, vector128
*b
, vector128
*c
);
18 extern void vnmsubfp(vector128
*dst
, vector128
*a
, vector128
*b
, vector128
*c
);
19 extern void vrefp(vector128
*dst
, vector128
*src
);
20 extern void vrsqrtefp(vector128
*dst
, vector128
*src
);
21 extern void vexptep(vector128
*dst
, vector128
*src
);
23 static unsigned int exp2s
[8] = {
35 * Computes an estimate of 2^x. The `s' argument is the 32-bit
36 * single-precision floating-point representation of x.
38 static unsigned int eexp2(unsigned int s
)
41 unsigned int mant
, frac
;
43 /* extract exponent field from input */
44 exp
= ((s
>> 23) & 0xff) - 127;
46 /* check for NaN input */
47 if (exp
== 128 && (s
& 0x7fffff) != 0)
48 return s
| 0x400000; /* return QNaN */
49 /* 2^-big = 0, 2^+big = +Inf */
50 return (s
& 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
53 return 0x3f800000; /* 1.0 */
55 /* convert to fixed point integer in 9.23 representation */
56 pwr
= (s
& 0x7fffff) | 0x800000;
64 /* extract integer part, which becomes exponent part of result */
65 exp
= (pwr
>> 23) + 126;
71 /* table lookup on top 3 bits of fraction to get mantissa */
72 mant
= exp2s
[(pwr
>> 20) & 7];
74 /* linear interpolation using remaining 20 bits of fraction */
75 asm("mulhwu %0,%1,%2" : "=r" (frac
)
76 : "r" (pwr
<< 12), "r" (0x172b83ff));
77 asm("mulhwu %0,%1,%2" : "=r" (frac
) : "r" (frac
), "r" (mant
));
81 return mant
+ (exp
<< 23);
83 /* denormalized result */
85 mant
+= 1 << (exp
- 1);
90 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
91 * single-precision floating-point representation of x.
93 static unsigned int elog2(unsigned int s
)
95 int exp
, mant
, lz
, frac
;
99 if (exp
== 0x7f800000) { /* Inf or NaN */
101 s
|= 0x400000; /* turn NaN into QNaN */
104 if ((exp
| mant
) == 0) /* +0 or -0 */
105 return 0xff800000; /* return -Inf */
109 asm("cntlzw %0,%1" : "=r" (lz
) : "r" (mant
));
111 exp
= (-118 - lz
) << 23;
117 if (mant
>= 0xb504f3) { /* 2^0.5 * 2^23 */
118 exp
|= 0x400000; /* 0.5 * 2^23 */
119 asm("mulhwu %0,%1,%2" : "=r" (mant
)
120 : "r" (mant
), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
122 if (mant
>= 0x9837f0) { /* 2^0.25 * 2^23 */
123 exp
|= 0x200000; /* 0.25 * 2^23 */
124 asm("mulhwu %0,%1,%2" : "=r" (mant
)
125 : "r" (mant
), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
127 if (mant
>= 0x8b95c2) { /* 2^0.125 * 2^23 */
128 exp
|= 0x100000; /* 0.125 * 2^23 */
129 asm("mulhwu %0,%1,%2" : "=r" (mant
)
130 : "r" (mant
), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
132 if (mant
> 0x800000) { /* 1.0 * 2^23 */
133 /* calculate (mant - 1) * 1.381097463 */
134 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
135 asm("mulhwu %0,%1,%2" : "=r" (frac
)
136 : "r" ((mant
- 0x800000) << 1), "r" (0xb0c7cd3a));
139 s
= exp
& 0x80000000;
143 asm("cntlzw %0,%1" : "=r" (lz
) : "r" (exp
));
149 s
+= ((lz
+ 126) << 23) + exp
;
156 static int ctsxs(unsigned int x
, int scale
, unsigned int *vscrp
)
160 exp
= (x
>> 23) & 0xff;
162 if (exp
== 255 && mant
!= 0)
163 return 0; /* NaN -> 0 */
164 exp
= exp
- 127 + scale
;
166 return 0; /* round towards zero */
168 /* saturate, unless the result would be -2^31 */
169 if (x
+ (scale
<< 23) != 0xcf000000)
171 return (x
& 0x80000000)? 0x80000000: 0x7fffffff;
174 mant
= (mant
<< 7) >> (30 - exp
);
175 return (x
& 0x80000000)? -mant
: mant
;
178 static unsigned int ctuxs(unsigned int x
, int scale
, unsigned int *vscrp
)
183 exp
= (x
>> 23) & 0xff;
185 if (exp
== 255 && mant
!= 0)
186 return 0; /* NaN -> 0 */
187 exp
= exp
- 127 + scale
;
189 return 0; /* round towards zero */
190 if (x
& 0x80000000) {
191 /* negative => saturate to 0 */
201 mant
= (mant
<< 8) >> (31 - exp
);
205 /* Round to floating integer, towards 0 */
206 static unsigned int rfiz(unsigned int x
)
210 exp
= ((x
>> 23) & 0xff) - 127;
211 if (exp
== 128 && (x
& 0x7fffff) != 0)
212 return x
| 0x400000; /* NaN -> make it a QNaN */
214 return x
; /* it's an integer already (or Inf) */
216 return x
& 0x80000000; /* |x| < 1.0 rounds to 0 */
217 return x
& ~(0x7fffff >> exp
);
220 /* Round to floating integer, towards +/- Inf */
221 static unsigned int rfii(unsigned int x
)
225 exp
= ((x
>> 23) & 0xff) - 127;
226 if (exp
== 128 && (x
& 0x7fffff) != 0)
227 return x
| 0x400000; /* NaN -> make it a QNaN */
229 return x
; /* it's an integer already (or Inf) */
230 if ((x
& 0x7fffffff) == 0)
231 return x
; /* +/-0 -> +/-0 */
233 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
234 return (x
& 0x80000000) | 0x3f800000;
235 mask
= 0x7fffff >> exp
;
236 /* mantissa overflows into exponent - that's OK,
237 it can't overflow into the sign bit */
238 return (x
+ mask
) & ~mask
;
241 /* Round to floating integer, to nearest */
242 static unsigned int rfin(unsigned int x
)
246 exp
= ((x
>> 23) & 0xff) - 127;
247 if (exp
== 128 && (x
& 0x7fffff) != 0)
248 return x
| 0x400000; /* NaN -> make it a QNaN */
250 return x
; /* it's an integer already (or Inf) */
252 return x
& 0x80000000; /* |x| < 0.5 -> +/-0 */
254 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
255 return (x
& 0x80000000) | 0x3f800000;
256 half
= 0x400000 >> exp
;
257 /* add 0.5 to the magnitude and chop off the fraction bits */
258 return (x
+ half
) & ~(0x7fffff >> exp
);
261 int emulate_altivec(struct pt_regs
*regs
)
263 unsigned int instr
, i
;
264 unsigned int va
, vb
, vc
, vd
;
267 if (get_user(instr
, (unsigned int __user
*) regs
->nip
))
269 if ((instr
>> 26) != 4)
270 return -EINVAL
; /* not an altivec instruction */
271 vd
= (instr
>> 21) & 0x1f;
272 va
= (instr
>> 16) & 0x1f;
273 vb
= (instr
>> 11) & 0x1f;
274 vc
= (instr
>> 6) & 0x1f;
276 vrs
= current
->thread
.vr_state
.vr
;
277 switch (instr
& 0x3f) {
281 vaddfp(&vrs
[vd
], &vrs
[va
], &vrs
[vb
]);
284 vsubfp(&vrs
[vd
], &vrs
[va
], &vrs
[vb
]);
287 vrefp(&vrs
[vd
], &vrs
[vb
]);
289 case 5: /* vrsqrtefp */
290 vrsqrtefp(&vrs
[vd
], &vrs
[vb
]);
292 case 6: /* vexptefp */
293 for (i
= 0; i
< 4; ++i
)
294 vrs
[vd
].u
[i
] = eexp2(vrs
[vb
].u
[i
]);
296 case 7: /* vlogefp */
297 for (i
= 0; i
< 4; ++i
)
298 vrs
[vd
].u
[i
] = elog2(vrs
[vb
].u
[i
]);
301 for (i
= 0; i
< 4; ++i
)
302 vrs
[vd
].u
[i
] = rfin(vrs
[vb
].u
[i
]);
305 for (i
= 0; i
< 4; ++i
)
306 vrs
[vd
].u
[i
] = rfiz(vrs
[vb
].u
[i
]);
309 for (i
= 0; i
< 4; ++i
) {
310 u32 x
= vrs
[vb
].u
[i
];
311 x
= (x
& 0x80000000)? rfiz(x
): rfii(x
);
316 for (i
= 0; i
< 4; ++i
) {
317 u32 x
= vrs
[vb
].u
[i
];
318 x
= (x
& 0x80000000)? rfii(x
): rfiz(x
);
322 case 14: /* vctuxs */
323 for (i
= 0; i
< 4; ++i
)
324 vrs
[vd
].u
[i
] = ctuxs(vrs
[vb
].u
[i
], va
,
325 ¤t
->thread
.vr_state
.vscr
.u
[3]);
327 case 15: /* vctsxs */
328 for (i
= 0; i
< 4; ++i
)
329 vrs
[vd
].u
[i
] = ctsxs(vrs
[vb
].u
[i
], va
,
330 ¤t
->thread
.vr_state
.vscr
.u
[3]);
336 case 46: /* vmaddfp */
337 vmaddfp(&vrs
[vd
], &vrs
[va
], &vrs
[vb
], &vrs
[vc
]);
339 case 47: /* vnmsubfp */
340 vnmsubfp(&vrs
[vd
], &vrs
[va
], &vrs
[vb
], &vrs
[vc
]);