Linux 2.6.28-rc5
[cris-mirror.git] / arch / sh / kernel / cpu / sh2a / fpu.c
blob6df2fb98eb30edb905c248290c0d64d754ed33cb
1 /*
2 * Save/restore floating point context for signal handlers.
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
10 * FIXME! These routines can be optimized in big endian case.
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16 #include <asm/fpu.h>
18 /* The PR (precision) bit in the FP Status Register must be clear when
19 * an frchg instruction is executed, otherwise the instruction is undefined.
20 * Executing frchg with PR set causes a trap on some SH4 implementations.
23 #define FPSCR_RCHG 0x00000000
27 * Save FPU registers onto task structure.
28 * Assume called with FPU enabled (SR.FD=0).
30 void
31 save_fpu(struct task_struct *tsk, struct pt_regs *regs)
33 unsigned long dummy;
35 clear_tsk_thread_flag(tsk, TIF_USEDFPU);
36 enable_fpu();
37 asm volatile("sts.l fpul, @-%0\n\t"
38 "sts.l fpscr, @-%0\n\t"
39 "fmov.s fr15, @-%0\n\t"
40 "fmov.s fr14, @-%0\n\t"
41 "fmov.s fr13, @-%0\n\t"
42 "fmov.s fr12, @-%0\n\t"
43 "fmov.s fr11, @-%0\n\t"
44 "fmov.s fr10, @-%0\n\t"
45 "fmov.s fr9, @-%0\n\t"
46 "fmov.s fr8, @-%0\n\t"
47 "fmov.s fr7, @-%0\n\t"
48 "fmov.s fr6, @-%0\n\t"
49 "fmov.s fr5, @-%0\n\t"
50 "fmov.s fr4, @-%0\n\t"
51 "fmov.s fr3, @-%0\n\t"
52 "fmov.s fr2, @-%0\n\t"
53 "fmov.s fr1, @-%0\n\t"
54 "fmov.s fr0, @-%0\n\t"
55 "lds %3, fpscr\n\t"
56 : "=r" (dummy)
57 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
58 "r" (FPSCR_RCHG),
59 "r" (FPSCR_INIT)
60 : "memory");
62 disable_fpu();
63 release_fpu(regs);
66 static void
67 restore_fpu(struct task_struct *tsk)
69 unsigned long dummy;
71 enable_fpu();
72 asm volatile("fmov.s @%0+, fr0\n\t"
73 "fmov.s @%0+, fr1\n\t"
74 "fmov.s @%0+, fr2\n\t"
75 "fmov.s @%0+, fr3\n\t"
76 "fmov.s @%0+, fr4\n\t"
77 "fmov.s @%0+, fr5\n\t"
78 "fmov.s @%0+, fr6\n\t"
79 "fmov.s @%0+, fr7\n\t"
80 "fmov.s @%0+, fr8\n\t"
81 "fmov.s @%0+, fr9\n\t"
82 "fmov.s @%0+, fr10\n\t"
83 "fmov.s @%0+, fr11\n\t"
84 "fmov.s @%0+, fr12\n\t"
85 "fmov.s @%0+, fr13\n\t"
86 "fmov.s @%0+, fr14\n\t"
87 "fmov.s @%0+, fr15\n\t"
88 "lds.l @%0+, fpscr\n\t"
89 "lds.l @%0+, fpul\n\t"
90 : "=r" (dummy)
91 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
92 : "memory");
93 disable_fpu();
97 * Load the FPU with signalling NANS. This bit pattern we're using
98 * has the property that no matter wether considered as single or as
99 * double precission represents signaling NANS.
102 static void
103 fpu_init(void)
105 enable_fpu();
106 asm volatile("lds %0, fpul\n\t"
107 "fsts fpul, fr0\n\t"
108 "fsts fpul, fr1\n\t"
109 "fsts fpul, fr2\n\t"
110 "fsts fpul, fr3\n\t"
111 "fsts fpul, fr4\n\t"
112 "fsts fpul, fr5\n\t"
113 "fsts fpul, fr6\n\t"
114 "fsts fpul, fr7\n\t"
115 "fsts fpul, fr8\n\t"
116 "fsts fpul, fr9\n\t"
117 "fsts fpul, fr10\n\t"
118 "fsts fpul, fr11\n\t"
119 "fsts fpul, fr12\n\t"
120 "fsts fpul, fr13\n\t"
121 "fsts fpul, fr14\n\t"
122 "fsts fpul, fr15\n\t"
123 "lds %2, fpscr\n\t"
124 : /* no output */
125 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
126 disable_fpu();
130 * Emulate arithmetic ops on denormalized number for some FPU insns.
133 /* denormalized float * float */
134 static int denormal_mulf(int hx, int hy)
136 unsigned int ix, iy;
137 unsigned long long m, n;
138 int exp, w;
140 ix = hx & 0x7fffffff;
141 iy = hy & 0x7fffffff;
142 if (iy < 0x00800000 || ix == 0)
143 return ((hx ^ hy) & 0x80000000);
145 exp = (iy & 0x7f800000) >> 23;
146 ix &= 0x007fffff;
147 iy = (iy & 0x007fffff) | 0x00800000;
148 m = (unsigned long long)ix * iy;
149 n = m;
150 w = -1;
151 while (n) { n >>= 1; w++; }
153 /* FIXME: use guard bits */
154 exp += w - 126 - 46;
155 if (exp > 0)
156 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
157 else if (exp + 22 >= 0)
158 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
159 else
160 ix = 0;
162 ix |= (hx ^ hy) & 0x80000000;
163 return ix;
166 /* denormalized double * double */
167 static void mult64(unsigned long long x, unsigned long long y,
168 unsigned long long *highp, unsigned long long *lowp)
170 unsigned long long sub0, sub1, sub2, sub3;
171 unsigned long long high, low;
173 sub0 = (x >> 32) * (unsigned long) (y >> 32);
174 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
175 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
176 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
177 low = sub3;
178 high = 0LL;
179 sub3 += (sub1 << 32);
180 if (low > sub3)
181 high++;
182 low = sub3;
183 sub3 += (sub2 << 32);
184 if (low > sub3)
185 high++;
186 low = sub3;
187 high += (sub1 >> 32) + (sub2 >> 32);
188 high += sub0;
189 *lowp = low;
190 *highp = high;
193 static inline long long rshift64(unsigned long long mh,
194 unsigned long long ml, int n)
196 if (n >= 64)
197 return mh >> (n - 64);
198 return (mh << (64 - n)) | (ml >> n);
201 static long long denormal_muld(long long hx, long long hy)
203 unsigned long long ix, iy;
204 unsigned long long mh, ml, nh, nl;
205 int exp, w;
207 ix = hx & 0x7fffffffffffffffLL;
208 iy = hy & 0x7fffffffffffffffLL;
209 if (iy < 0x0010000000000000LL || ix == 0)
210 return ((hx ^ hy) & 0x8000000000000000LL);
212 exp = (iy & 0x7ff0000000000000LL) >> 52;
213 ix &= 0x000fffffffffffffLL;
214 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
215 mult64(ix, iy, &mh, &ml);
216 nh = mh;
217 nl = ml;
218 w = -1;
219 if (nh) {
220 while (nh) { nh >>= 1; w++;}
221 w += 64;
222 } else
223 while (nl) { nl >>= 1; w++;}
225 /* FIXME: use guard bits */
226 exp += w - 1022 - 52 * 2;
227 if (exp > 0)
228 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
229 | ((long long)exp << 52);
230 else if (exp + 51 >= 0)
231 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
232 else
233 ix = 0;
235 ix |= (hx ^ hy) & 0x8000000000000000LL;
236 return ix;
239 /* ix - iy where iy: denormal and ix, iy >= 0 */
240 static int denormal_subf1(unsigned int ix, unsigned int iy)
242 int frac;
243 int exp;
245 if (ix < 0x00800000)
246 return ix - iy;
248 exp = (ix & 0x7f800000) >> 23;
249 if (exp - 1 > 31)
250 return ix;
251 iy >>= exp - 1;
252 if (iy == 0)
253 return ix;
255 frac = (ix & 0x007fffff) | 0x00800000;
256 frac -= iy;
257 while (frac < 0x00800000) {
258 if (--exp == 0)
259 return frac;
260 frac <<= 1;
263 return (exp << 23) | (frac & 0x007fffff);
266 /* ix + iy where iy: denormal and ix, iy >= 0 */
267 static int denormal_addf1(unsigned int ix, unsigned int iy)
269 int frac;
270 int exp;
272 if (ix < 0x00800000)
273 return ix + iy;
275 exp = (ix & 0x7f800000) >> 23;
276 if (exp - 1 > 31)
277 return ix;
278 iy >>= exp - 1;
279 if (iy == 0)
280 return ix;
282 frac = (ix & 0x007fffff) | 0x00800000;
283 frac += iy;
284 if (frac >= 0x01000000) {
285 frac >>= 1;
286 ++exp;
289 return (exp << 23) | (frac & 0x007fffff);
292 static int denormal_addf(int hx, int hy)
294 unsigned int ix, iy;
295 int sign;
297 if ((hx ^ hy) & 0x80000000) {
298 sign = hx & 0x80000000;
299 ix = hx & 0x7fffffff;
300 iy = hy & 0x7fffffff;
301 if (iy < 0x00800000) {
302 ix = denormal_subf1(ix, iy);
303 if ((int) ix < 0) {
304 ix = -ix;
305 sign ^= 0x80000000;
307 } else {
308 ix = denormal_subf1(iy, ix);
309 sign ^= 0x80000000;
311 } else {
312 sign = hx & 0x80000000;
313 ix = hx & 0x7fffffff;
314 iy = hy & 0x7fffffff;
315 if (iy < 0x00800000)
316 ix = denormal_addf1(ix, iy);
317 else
318 ix = denormal_addf1(iy, ix);
321 return sign | ix;
324 /* ix - iy where iy: denormal and ix, iy >= 0 */
325 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
327 long long frac;
328 int exp;
330 if (ix < 0x0010000000000000LL)
331 return ix - iy;
333 exp = (ix & 0x7ff0000000000000LL) >> 52;
334 if (exp - 1 > 63)
335 return ix;
336 iy >>= exp - 1;
337 if (iy == 0)
338 return ix;
340 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
341 frac -= iy;
342 while (frac < 0x0010000000000000LL) {
343 if (--exp == 0)
344 return frac;
345 frac <<= 1;
348 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
351 /* ix + iy where iy: denormal and ix, iy >= 0 */
352 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
354 long long frac;
355 long long exp;
357 if (ix < 0x0010000000000000LL)
358 return ix + iy;
360 exp = (ix & 0x7ff0000000000000LL) >> 52;
361 if (exp - 1 > 63)
362 return ix;
363 iy >>= exp - 1;
364 if (iy == 0)
365 return ix;
367 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
368 frac += iy;
369 if (frac >= 0x0020000000000000LL) {
370 frac >>= 1;
371 ++exp;
374 return (exp << 52) | (frac & 0x000fffffffffffffLL);
377 static long long denormal_addd(long long hx, long long hy)
379 unsigned long long ix, iy;
380 long long sign;
382 if ((hx ^ hy) & 0x8000000000000000LL) {
383 sign = hx & 0x8000000000000000LL;
384 ix = hx & 0x7fffffffffffffffLL;
385 iy = hy & 0x7fffffffffffffffLL;
386 if (iy < 0x0010000000000000LL) {
387 ix = denormal_subd1(ix, iy);
388 if ((int) ix < 0) {
389 ix = -ix;
390 sign ^= 0x8000000000000000LL;
392 } else {
393 ix = denormal_subd1(iy, ix);
394 sign ^= 0x8000000000000000LL;
396 } else {
397 sign = hx & 0x8000000000000000LL;
398 ix = hx & 0x7fffffffffffffffLL;
399 iy = hy & 0x7fffffffffffffffLL;
400 if (iy < 0x0010000000000000LL)
401 ix = denormal_addd1(ix, iy);
402 else
403 ix = denormal_addd1(iy, ix);
406 return sign | ix;
410 * denormal_to_double - Given denormalized float number,
411 * store double float
413 * @fpu: Pointer to sh_fpu_hard structure
414 * @n: Index to FP register
416 static void
417 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
419 unsigned long du, dl;
420 unsigned long x = fpu->fpul;
421 int exp = 1023 - 126;
423 if (x != 0 && (x & 0x7f800000) == 0) {
424 du = (x & 0x80000000);
425 while ((x & 0x00800000) == 0) {
426 x <<= 1;
427 exp--;
429 x &= 0x007fffff;
430 du |= (exp << 20) | (x >> 3);
431 dl = x << 29;
433 fpu->fp_regs[n] = du;
434 fpu->fp_regs[n+1] = dl;
439 * ieee_fpe_handler - Handle denormalized number exception
441 * @regs: Pointer to register structure
443 * Returns 1 when it's handled (should not cause exception).
445 static int
446 ieee_fpe_handler (struct pt_regs *regs)
448 unsigned short insn = *(unsigned short *) regs->pc;
449 unsigned short finsn;
450 unsigned long nextpc;
451 int nib[4] = {
452 (insn >> 12) & 0xf,
453 (insn >> 8) & 0xf,
454 (insn >> 4) & 0xf,
455 insn & 0xf};
457 if (nib[0] == 0xb ||
458 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
459 regs->pr = regs->pc + 4;
460 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
461 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
462 finsn = *(unsigned short *) (regs->pc + 2);
463 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
464 if (regs->sr & 1)
465 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
466 else
467 nextpc = regs->pc + 4;
468 finsn = *(unsigned short *) (regs->pc + 2);
469 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
470 if (regs->sr & 1)
471 nextpc = regs->pc + 4;
472 else
473 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
474 finsn = *(unsigned short *) (regs->pc + 2);
475 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
476 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
477 nextpc = regs->regs[nib[1]];
478 finsn = *(unsigned short *) (regs->pc + 2);
479 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
480 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
481 nextpc = regs->pc + 4 + regs->regs[nib[1]];
482 finsn = *(unsigned short *) (regs->pc + 2);
483 } else if (insn == 0x000b) { /* rts */
484 nextpc = regs->pr;
485 finsn = *(unsigned short *) (regs->pc + 2);
486 } else {
487 nextpc = regs->pc + 2;
488 finsn = insn;
491 #define FPSCR_FPU_ERROR (1 << 17)
493 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
494 struct task_struct *tsk = current;
496 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
497 /* FPU error */
498 denormal_to_double (&tsk->thread.fpu.hard,
499 (finsn >> 8) & 0xf);
500 } else
501 return 0;
503 regs->pc = nextpc;
504 return 1;
505 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
506 struct task_struct *tsk = current;
507 int fpscr;
508 int n, m, prec;
509 unsigned int hx, hy;
511 n = (finsn >> 8) & 0xf;
512 m = (finsn >> 4) & 0xf;
513 hx = tsk->thread.fpu.hard.fp_regs[n];
514 hy = tsk->thread.fpu.hard.fp_regs[m];
515 fpscr = tsk->thread.fpu.hard.fpscr;
516 prec = fpscr & (1 << 19);
518 if ((fpscr & FPSCR_FPU_ERROR)
519 && (prec && ((hx & 0x7fffffff) < 0x00100000
520 || (hy & 0x7fffffff) < 0x00100000))) {
521 long long llx, lly;
523 /* FPU error because of denormal */
524 llx = ((long long) hx << 32)
525 | tsk->thread.fpu.hard.fp_regs[n+1];
526 lly = ((long long) hy << 32)
527 | tsk->thread.fpu.hard.fp_regs[m+1];
528 if ((hx & 0x7fffffff) >= 0x00100000)
529 llx = denormal_muld(lly, llx);
530 else
531 llx = denormal_muld(llx, lly);
532 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
533 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
534 } else if ((fpscr & FPSCR_FPU_ERROR)
535 && (!prec && ((hx & 0x7fffffff) < 0x00800000
536 || (hy & 0x7fffffff) < 0x00800000))) {
537 /* FPU error because of denormal */
538 if ((hx & 0x7fffffff) >= 0x00800000)
539 hx = denormal_mulf(hy, hx);
540 else
541 hx = denormal_mulf(hx, hy);
542 tsk->thread.fpu.hard.fp_regs[n] = hx;
543 } else
544 return 0;
546 regs->pc = nextpc;
547 return 1;
548 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
549 struct task_struct *tsk = current;
550 int fpscr;
551 int n, m, prec;
552 unsigned int hx, hy;
554 n = (finsn >> 8) & 0xf;
555 m = (finsn >> 4) & 0xf;
556 hx = tsk->thread.fpu.hard.fp_regs[n];
557 hy = tsk->thread.fpu.hard.fp_regs[m];
558 fpscr = tsk->thread.fpu.hard.fpscr;
559 prec = fpscr & (1 << 19);
561 if ((fpscr & FPSCR_FPU_ERROR)
562 && (prec && ((hx & 0x7fffffff) < 0x00100000
563 || (hy & 0x7fffffff) < 0x00100000))) {
564 long long llx, lly;
566 /* FPU error because of denormal */
567 llx = ((long long) hx << 32)
568 | tsk->thread.fpu.hard.fp_regs[n+1];
569 lly = ((long long) hy << 32)
570 | tsk->thread.fpu.hard.fp_regs[m+1];
571 if ((finsn & 0xf00f) == 0xf000)
572 llx = denormal_addd(llx, lly);
573 else
574 llx = denormal_addd(llx, lly ^ (1LL << 63));
575 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
576 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
577 } else if ((fpscr & FPSCR_FPU_ERROR)
578 && (!prec && ((hx & 0x7fffffff) < 0x00800000
579 || (hy & 0x7fffffff) < 0x00800000))) {
580 /* FPU error because of denormal */
581 if ((finsn & 0xf00f) == 0xf000)
582 hx = denormal_addf(hx, hy);
583 else
584 hx = denormal_addf(hx, hy ^ 0x80000000);
585 tsk->thread.fpu.hard.fp_regs[n] = hx;
586 } else
587 return 0;
589 regs->pc = nextpc;
590 return 1;
593 return 0;
596 BUILD_TRAP_HANDLER(fpu_error)
598 struct task_struct *tsk = current;
599 TRAP_HANDLER_DECL;
601 save_fpu(tsk, regs);
602 if (ieee_fpe_handler(regs)) {
603 tsk->thread.fpu.hard.fpscr &=
604 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
605 grab_fpu(regs);
606 restore_fpu(tsk);
607 set_tsk_thread_flag(tsk, TIF_USEDFPU);
608 return;
611 force_sig(SIGFPE, tsk);
614 BUILD_TRAP_HANDLER(fpu_state_restore)
616 struct task_struct *tsk = current;
617 TRAP_HANDLER_DECL;
619 grab_fpu(regs);
620 if (!user_mode(regs)) {
621 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
622 return;
625 if (used_math()) {
626 /* Using the FPU again. */
627 restore_fpu(tsk);
628 } else {
629 /* First time FPU user. */
630 fpu_init();
631 set_used_math();
633 set_tsk_thread_flag(tsk, TIF_USEDFPU);