Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/gregkh/driver...
[linux-2.6/verdex.git] / arch / ppc / kernel / vecemu.c
blob604d0947cb20cd87dcad9f7e512dd5a63ddd05d4
1 /*
2 * Routines to emulate some Altivec/VMX instructions, specifically
3 * those that can trap when given denormalized operands in Java mode.
4 */
5 #include <linux/kernel.h>
6 #include <linux/errno.h>
7 #include <linux/sched.h>
8 #include <asm/ptrace.h>
9 #include <asm/processor.h>
10 #include <asm/uaccess.h>
12 /* Functions in vector.S */
13 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
14 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
15 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
16 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17 extern void vrefp(vector128 *dst, vector128 *src);
18 extern void vrsqrtefp(vector128 *dst, vector128 *src);
19 extern void vexptep(vector128 *dst, vector128 *src);
21 static unsigned int exp2s[8] = {
22 0x800000,
23 0x8b95c2,
24 0x9837f0,
25 0xa5fed7,
26 0xb504f3,
27 0xc5672a,
28 0xd744fd,
29 0xeac0c7
33 * Computes an estimate of 2^x. The `s' argument is the 32-bit
34 * single-precision floating-point representation of x.
36 static unsigned int eexp2(unsigned int s)
38 int exp, pwr;
39 unsigned int mant, frac;
41 /* extract exponent field from input */
42 exp = ((s >> 23) & 0xff) - 127;
43 if (exp > 7) {
44 /* check for NaN input */
45 if (exp == 128 && (s & 0x7fffff) != 0)
46 return s | 0x400000; /* return QNaN */
47 /* 2^-big = 0, 2^+big = +Inf */
48 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
50 if (exp < -23)
51 return 0x3f800000; /* 1.0 */
53 /* convert to fixed point integer in 9.23 representation */
54 pwr = (s & 0x7fffff) | 0x800000;
55 if (exp > 0)
56 pwr <<= exp;
57 else
58 pwr >>= -exp;
59 if (s & 0x80000000)
60 pwr = -pwr;
62 /* extract integer part, which becomes exponent part of result */
63 exp = (pwr >> 23) + 126;
64 if (exp >= 254)
65 return 0x7f800000;
66 if (exp < -23)
67 return 0;
69 /* table lookup on top 3 bits of fraction to get mantissa */
70 mant = exp2s[(pwr >> 20) & 7];
72 /* linear interpolation using remaining 20 bits of fraction */
73 asm("mulhwu %0,%1,%2" : "=r" (frac)
74 : "r" (pwr << 12), "r" (0x172b83ff));
75 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
76 mant += frac;
78 if (exp >= 0)
79 return mant + (exp << 23);
81 /* denormalized result */
82 exp = -exp;
83 mant += 1 << (exp - 1);
84 return mant >> exp;
88 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
89 * single-precision floating-point representation of x.
91 static unsigned int elog2(unsigned int s)
93 int exp, mant, lz, frac;
95 exp = s & 0x7f800000;
96 mant = s & 0x7fffff;
97 if (exp == 0x7f800000) { /* Inf or NaN */
98 if (mant != 0)
99 s |= 0x400000; /* turn NaN into QNaN */
100 return s;
102 if ((exp | mant) == 0) /* +0 or -0 */
103 return 0xff800000; /* return -Inf */
105 if (exp == 0) {
106 /* denormalized */
107 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
108 mant <<= lz - 8;
109 exp = (-118 - lz) << 23;
110 } else {
111 mant |= 0x800000;
112 exp -= 127 << 23;
115 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
116 exp |= 0x400000; /* 0.5 * 2^23 */
117 asm("mulhwu %0,%1,%2" : "=r" (mant)
118 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
120 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
121 exp |= 0x200000; /* 0.25 * 2^23 */
122 asm("mulhwu %0,%1,%2" : "=r" (mant)
123 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
125 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
126 exp |= 0x100000; /* 0.125 * 2^23 */
127 asm("mulhwu %0,%1,%2" : "=r" (mant)
128 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
130 if (mant > 0x800000) { /* 1.0 * 2^23 */
131 /* calculate (mant - 1) * 1.381097463 */
132 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
133 asm("mulhwu %0,%1,%2" : "=r" (frac)
134 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
135 exp += frac;
137 s = exp & 0x80000000;
138 if (exp != 0) {
139 if (s)
140 exp = -exp;
141 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
142 lz = 8 - lz;
143 if (lz > 0)
144 exp >>= lz;
145 else if (lz < 0)
146 exp <<= -lz;
147 s += ((lz + 126) << 23) + exp;
149 return s;
152 #define VSCR_SAT 1
154 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
156 int exp, mant;
158 exp = (x >> 23) & 0xff;
159 mant = x & 0x7fffff;
160 if (exp == 255 && mant != 0)
161 return 0; /* NaN -> 0 */
162 exp = exp - 127 + scale;
163 if (exp < 0)
164 return 0; /* round towards zero */
165 if (exp >= 31) {
166 /* saturate, unless the result would be -2^31 */
167 if (x + (scale << 23) != 0xcf000000)
168 *vscrp |= VSCR_SAT;
169 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
171 mant |= 0x800000;
172 mant = (mant << 7) >> (30 - exp);
173 return (x & 0x80000000)? -mant: mant;
176 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
178 int exp;
179 unsigned int mant;
181 exp = (x >> 23) & 0xff;
182 mant = x & 0x7fffff;
183 if (exp == 255 && mant != 0)
184 return 0; /* NaN -> 0 */
185 exp = exp - 127 + scale;
186 if (exp < 0)
187 return 0; /* round towards zero */
188 if (x & 0x80000000) {
189 /* negative => saturate to 0 */
190 *vscrp |= VSCR_SAT;
191 return 0;
193 if (exp >= 32) {
194 /* saturate */
195 *vscrp |= VSCR_SAT;
196 return 0xffffffff;
198 mant |= 0x800000;
199 mant = (mant << 8) >> (31 - exp);
200 return mant;
203 /* Round to floating integer, towards 0 */
204 static unsigned int rfiz(unsigned int x)
206 int exp;
208 exp = ((x >> 23) & 0xff) - 127;
209 if (exp == 128 && (x & 0x7fffff) != 0)
210 return x | 0x400000; /* NaN -> make it a QNaN */
211 if (exp >= 23)
212 return x; /* it's an integer already (or Inf) */
213 if (exp < 0)
214 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
215 return x & ~(0x7fffff >> exp);
218 /* Round to floating integer, towards +/- Inf */
219 static unsigned int rfii(unsigned int x)
221 int exp, mask;
223 exp = ((x >> 23) & 0xff) - 127;
224 if (exp == 128 && (x & 0x7fffff) != 0)
225 return x | 0x400000; /* NaN -> make it a QNaN */
226 if (exp >= 23)
227 return x; /* it's an integer already (or Inf) */
228 if ((x & 0x7fffffff) == 0)
229 return x; /* +/-0 -> +/-0 */
230 if (exp < 0)
231 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
232 return (x & 0x80000000) | 0x3f800000;
233 mask = 0x7fffff >> exp;
234 /* mantissa overflows into exponent - that's OK,
235 it can't overflow into the sign bit */
236 return (x + mask) & ~mask;
239 /* Round to floating integer, to nearest */
240 static unsigned int rfin(unsigned int x)
242 int exp, half;
244 exp = ((x >> 23) & 0xff) - 127;
245 if (exp == 128 && (x & 0x7fffff) != 0)
246 return x | 0x400000; /* NaN -> make it a QNaN */
247 if (exp >= 23)
248 return x; /* it's an integer already (or Inf) */
249 if (exp < -1)
250 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
251 if (exp == -1)
252 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
253 return (x & 0x80000000) | 0x3f800000;
254 half = 0x400000 >> exp;
255 /* add 0.5 to the magnitude and chop off the fraction bits */
256 return (x + half) & ~(0x7fffff >> exp);
259 int emulate_altivec(struct pt_regs *regs)
261 unsigned int instr, i;
262 unsigned int va, vb, vc, vd;
263 vector128 *vrs;
265 if (get_user(instr, (unsigned int __user *) regs->nip))
266 return -EFAULT;
267 if ((instr >> 26) != 4)
268 return -EINVAL; /* not an altivec instruction */
269 vd = (instr >> 21) & 0x1f;
270 va = (instr >> 16) & 0x1f;
271 vb = (instr >> 11) & 0x1f;
272 vc = (instr >> 6) & 0x1f;
274 vrs = current->thread.vr;
275 switch (instr & 0x3f) {
276 case 10:
277 switch (vc) {
278 case 0: /* vaddfp */
279 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
280 break;
281 case 1: /* vsubfp */
282 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
283 break;
284 case 4: /* vrefp */
285 vrefp(&vrs[vd], &vrs[vb]);
286 break;
287 case 5: /* vrsqrtefp */
288 vrsqrtefp(&vrs[vd], &vrs[vb]);
289 break;
290 case 6: /* vexptefp */
291 for (i = 0; i < 4; ++i)
292 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
293 break;
294 case 7: /* vlogefp */
295 for (i = 0; i < 4; ++i)
296 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
297 break;
298 case 8: /* vrfin */
299 for (i = 0; i < 4; ++i)
300 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
301 break;
302 case 9: /* vrfiz */
303 for (i = 0; i < 4; ++i)
304 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
305 break;
306 case 10: /* vrfip */
307 for (i = 0; i < 4; ++i) {
308 u32 x = vrs[vb].u[i];
309 x = (x & 0x80000000)? rfiz(x): rfii(x);
310 vrs[vd].u[i] = x;
312 break;
313 case 11: /* vrfim */
314 for (i = 0; i < 4; ++i) {
315 u32 x = vrs[vb].u[i];
316 x = (x & 0x80000000)? rfii(x): rfiz(x);
317 vrs[vd].u[i] = x;
319 break;
320 case 14: /* vctuxs */
321 for (i = 0; i < 4; ++i)
322 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
323 &current->thread.vscr.u[3]);
324 break;
325 case 15: /* vctsxs */
326 for (i = 0; i < 4; ++i)
327 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
328 &current->thread.vscr.u[3]);
329 break;
330 default:
331 return -EINVAL;
333 break;
334 case 46: /* vmaddfp */
335 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
336 break;
337 case 47: /* vnmsubfp */
338 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
339 break;
340 default:
341 return -EINVAL;
344 return 0;