Some modifications to files needed to succesfully compile ;)
[wrt350n-kernel.git] / arch / sh / kernel / cpu / sh2a / fpu.c
blobff99562456fbe50f80308f479c960d617fb662bb
1 /*
2 * Save/restore floating point context for signal handlers.
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
10 * FIXME! These routines can be optimized in big endian case.
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
17 /* The PR (precision) bit in the FP Status Register must be clear when
18 * an frchg instruction is executed, otherwise the instruction is undefined.
19 * Executing frchg with PR set causes a trap on some SH4 implementations.
22 #define FPSCR_RCHG 0x00000000
26 * Save FPU registers onto task structure.
27 * Assume called with FPU enabled (SR.FD=0).
29 void
30 save_fpu(struct task_struct *tsk, struct pt_regs *regs)
32 unsigned long dummy;
34 clear_tsk_thread_flag(tsk, TIF_USEDFPU);
35 enable_fpu();
36 asm volatile("sts.l fpul, @-%0\n\t"
37 "sts.l fpscr, @-%0\n\t"
38 "fmov.s fr15, @-%0\n\t"
39 "fmov.s fr14, @-%0\n\t"
40 "fmov.s fr13, @-%0\n\t"
41 "fmov.s fr12, @-%0\n\t"
42 "fmov.s fr11, @-%0\n\t"
43 "fmov.s fr10, @-%0\n\t"
44 "fmov.s fr9, @-%0\n\t"
45 "fmov.s fr8, @-%0\n\t"
46 "fmov.s fr7, @-%0\n\t"
47 "fmov.s fr6, @-%0\n\t"
48 "fmov.s fr5, @-%0\n\t"
49 "fmov.s fr4, @-%0\n\t"
50 "fmov.s fr3, @-%0\n\t"
51 "fmov.s fr2, @-%0\n\t"
52 "fmov.s fr1, @-%0\n\t"
53 "fmov.s fr0, @-%0\n\t"
54 "lds %3, fpscr\n\t"
55 : "=r" (dummy)
56 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
57 "r" (FPSCR_RCHG),
58 "r" (FPSCR_INIT)
59 : "memory");
61 disable_fpu();
62 release_fpu(regs);
65 static void
66 restore_fpu(struct task_struct *tsk)
68 unsigned long dummy;
70 enable_fpu();
71 asm volatile("fmov.s @%0+, fr0\n\t"
72 "fmov.s @%0+, fr1\n\t"
73 "fmov.s @%0+, fr2\n\t"
74 "fmov.s @%0+, fr3\n\t"
75 "fmov.s @%0+, fr4\n\t"
76 "fmov.s @%0+, fr5\n\t"
77 "fmov.s @%0+, fr6\n\t"
78 "fmov.s @%0+, fr7\n\t"
79 "fmov.s @%0+, fr8\n\t"
80 "fmov.s @%0+, fr9\n\t"
81 "fmov.s @%0+, fr10\n\t"
82 "fmov.s @%0+, fr11\n\t"
83 "fmov.s @%0+, fr12\n\t"
84 "fmov.s @%0+, fr13\n\t"
85 "fmov.s @%0+, fr14\n\t"
86 "fmov.s @%0+, fr15\n\t"
87 "lds.l @%0+, fpscr\n\t"
88 "lds.l @%0+, fpul\n\t"
89 : "=r" (dummy)
90 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
91 : "memory");
92 disable_fpu();
96 * Load the FPU with signalling NANS. This bit pattern we're using
97 * has the property that no matter wether considered as single or as
98 * double precission represents signaling NANS.
101 static void
102 fpu_init(void)
104 enable_fpu();
105 asm volatile("lds %0, fpul\n\t"
106 "fsts fpul, fr0\n\t"
107 "fsts fpul, fr1\n\t"
108 "fsts fpul, fr2\n\t"
109 "fsts fpul, fr3\n\t"
110 "fsts fpul, fr4\n\t"
111 "fsts fpul, fr5\n\t"
112 "fsts fpul, fr6\n\t"
113 "fsts fpul, fr7\n\t"
114 "fsts fpul, fr8\n\t"
115 "fsts fpul, fr9\n\t"
116 "fsts fpul, fr10\n\t"
117 "fsts fpul, fr11\n\t"
118 "fsts fpul, fr12\n\t"
119 "fsts fpul, fr13\n\t"
120 "fsts fpul, fr14\n\t"
121 "fsts fpul, fr15\n\t"
122 "lds %2, fpscr\n\t"
123 : /* no output */
124 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
125 disable_fpu();
129 * Emulate arithmetic ops on denormalized number for some FPU insns.
132 /* denormalized float * float */
133 static int denormal_mulf(int hx, int hy)
135 unsigned int ix, iy;
136 unsigned long long m, n;
137 int exp, w;
139 ix = hx & 0x7fffffff;
140 iy = hy & 0x7fffffff;
141 if (iy < 0x00800000 || ix == 0)
142 return ((hx ^ hy) & 0x80000000);
144 exp = (iy & 0x7f800000) >> 23;
145 ix &= 0x007fffff;
146 iy = (iy & 0x007fffff) | 0x00800000;
147 m = (unsigned long long)ix * iy;
148 n = m;
149 w = -1;
150 while (n) { n >>= 1; w++; }
152 /* FIXME: use guard bits */
153 exp += w - 126 - 46;
154 if (exp > 0)
155 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
156 else if (exp + 22 >= 0)
157 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
158 else
159 ix = 0;
161 ix |= (hx ^ hy) & 0x80000000;
162 return ix;
165 /* denormalized double * double */
166 static void mult64(unsigned long long x, unsigned long long y,
167 unsigned long long *highp, unsigned long long *lowp)
169 unsigned long long sub0, sub1, sub2, sub3;
170 unsigned long long high, low;
172 sub0 = (x >> 32) * (unsigned long) (y >> 32);
173 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
174 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
175 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
176 low = sub3;
177 high = 0LL;
178 sub3 += (sub1 << 32);
179 if (low > sub3)
180 high++;
181 low = sub3;
182 sub3 += (sub2 << 32);
183 if (low > sub3)
184 high++;
185 low = sub3;
186 high += (sub1 >> 32) + (sub2 >> 32);
187 high += sub0;
188 *lowp = low;
189 *highp = high;
192 static inline long long rshift64(unsigned long long mh,
193 unsigned long long ml, int n)
195 if (n >= 64)
196 return mh >> (n - 64);
197 return (mh << (64 - n)) | (ml >> n);
200 static long long denormal_muld(long long hx, long long hy)
202 unsigned long long ix, iy;
203 unsigned long long mh, ml, nh, nl;
204 int exp, w;
206 ix = hx & 0x7fffffffffffffffLL;
207 iy = hy & 0x7fffffffffffffffLL;
208 if (iy < 0x0010000000000000LL || ix == 0)
209 return ((hx ^ hy) & 0x8000000000000000LL);
211 exp = (iy & 0x7ff0000000000000LL) >> 52;
212 ix &= 0x000fffffffffffffLL;
213 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
214 mult64(ix, iy, &mh, &ml);
215 nh = mh;
216 nl = ml;
217 w = -1;
218 if (nh) {
219 while (nh) { nh >>= 1; w++;}
220 w += 64;
221 } else
222 while (nl) { nl >>= 1; w++;}
224 /* FIXME: use guard bits */
225 exp += w - 1022 - 52 * 2;
226 if (exp > 0)
227 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
228 | ((long long)exp << 52);
229 else if (exp + 51 >= 0)
230 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
231 else
232 ix = 0;
234 ix |= (hx ^ hy) & 0x8000000000000000LL;
235 return ix;
238 /* ix - iy where iy: denormal and ix, iy >= 0 */
239 static int denormal_subf1(unsigned int ix, unsigned int iy)
241 int frac;
242 int exp;
244 if (ix < 0x00800000)
245 return ix - iy;
247 exp = (ix & 0x7f800000) >> 23;
248 if (exp - 1 > 31)
249 return ix;
250 iy >>= exp - 1;
251 if (iy == 0)
252 return ix;
254 frac = (ix & 0x007fffff) | 0x00800000;
255 frac -= iy;
256 while (frac < 0x00800000) {
257 if (--exp == 0)
258 return frac;
259 frac <<= 1;
262 return (exp << 23) | (frac & 0x007fffff);
265 /* ix + iy where iy: denormal and ix, iy >= 0 */
266 static int denormal_addf1(unsigned int ix, unsigned int iy)
268 int frac;
269 int exp;
271 if (ix < 0x00800000)
272 return ix + iy;
274 exp = (ix & 0x7f800000) >> 23;
275 if (exp - 1 > 31)
276 return ix;
277 iy >>= exp - 1;
278 if (iy == 0)
279 return ix;
281 frac = (ix & 0x007fffff) | 0x00800000;
282 frac += iy;
283 if (frac >= 0x01000000) {
284 frac >>= 1;
285 ++exp;
288 return (exp << 23) | (frac & 0x007fffff);
291 static int denormal_addf(int hx, int hy)
293 unsigned int ix, iy;
294 int sign;
296 if ((hx ^ hy) & 0x80000000) {
297 sign = hx & 0x80000000;
298 ix = hx & 0x7fffffff;
299 iy = hy & 0x7fffffff;
300 if (iy < 0x00800000) {
301 ix = denormal_subf1(ix, iy);
302 if (ix < 0) {
303 ix = -ix;
304 sign ^= 0x80000000;
306 } else {
307 ix = denormal_subf1(iy, ix);
308 sign ^= 0x80000000;
310 } else {
311 sign = hx & 0x80000000;
312 ix = hx & 0x7fffffff;
313 iy = hy & 0x7fffffff;
314 if (iy < 0x00800000)
315 ix = denormal_addf1(ix, iy);
316 else
317 ix = denormal_addf1(iy, ix);
320 return sign | ix;
323 /* ix - iy where iy: denormal and ix, iy >= 0 */
324 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
326 long long frac;
327 int exp;
329 if (ix < 0x0010000000000000LL)
330 return ix - iy;
332 exp = (ix & 0x7ff0000000000000LL) >> 52;
333 if (exp - 1 > 63)
334 return ix;
335 iy >>= exp - 1;
336 if (iy == 0)
337 return ix;
339 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
340 frac -= iy;
341 while (frac < 0x0010000000000000LL) {
342 if (--exp == 0)
343 return frac;
344 frac <<= 1;
347 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
350 /* ix + iy where iy: denormal and ix, iy >= 0 */
351 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
353 long long frac;
354 long long exp;
356 if (ix < 0x0010000000000000LL)
357 return ix + iy;
359 exp = (ix & 0x7ff0000000000000LL) >> 52;
360 if (exp - 1 > 63)
361 return ix;
362 iy >>= exp - 1;
363 if (iy == 0)
364 return ix;
366 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
367 frac += iy;
368 if (frac >= 0x0020000000000000LL) {
369 frac >>= 1;
370 ++exp;
373 return (exp << 52) | (frac & 0x000fffffffffffffLL);
376 static long long denormal_addd(long long hx, long long hy)
378 unsigned long long ix, iy;
379 long long sign;
381 if ((hx ^ hy) & 0x8000000000000000LL) {
382 sign = hx & 0x8000000000000000LL;
383 ix = hx & 0x7fffffffffffffffLL;
384 iy = hy & 0x7fffffffffffffffLL;
385 if (iy < 0x0010000000000000LL) {
386 ix = denormal_subd1(ix, iy);
387 if (ix < 0) {
388 ix = -ix;
389 sign ^= 0x8000000000000000LL;
391 } else {
392 ix = denormal_subd1(iy, ix);
393 sign ^= 0x8000000000000000LL;
395 } else {
396 sign = hx & 0x8000000000000000LL;
397 ix = hx & 0x7fffffffffffffffLL;
398 iy = hy & 0x7fffffffffffffffLL;
399 if (iy < 0x0010000000000000LL)
400 ix = denormal_addd1(ix, iy);
401 else
402 ix = denormal_addd1(iy, ix);
405 return sign | ix;
409 * denormal_to_double - Given denormalized float number,
410 * store double float
412 * @fpu: Pointer to sh_fpu_hard structure
413 * @n: Index to FP register
415 static void
416 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
418 unsigned long du, dl;
419 unsigned long x = fpu->fpul;
420 int exp = 1023 - 126;
422 if (x != 0 && (x & 0x7f800000) == 0) {
423 du = (x & 0x80000000);
424 while ((x & 0x00800000) == 0) {
425 x <<= 1;
426 exp--;
428 x &= 0x007fffff;
429 du |= (exp << 20) | (x >> 3);
430 dl = x << 29;
432 fpu->fp_regs[n] = du;
433 fpu->fp_regs[n+1] = dl;
438 * ieee_fpe_handler - Handle denormalized number exception
440 * @regs: Pointer to register structure
442 * Returns 1 when it's handled (should not cause exception).
444 static int
445 ieee_fpe_handler (struct pt_regs *regs)
447 unsigned short insn = *(unsigned short *) regs->pc;
448 unsigned short finsn;
449 unsigned long nextpc;
450 int nib[4] = {
451 (insn >> 12) & 0xf,
452 (insn >> 8) & 0xf,
453 (insn >> 4) & 0xf,
454 insn & 0xf};
456 if (nib[0] == 0xb ||
457 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
458 regs->pr = regs->pc + 4;
459 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
460 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
461 finsn = *(unsigned short *) (regs->pc + 2);
462 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
463 if (regs->sr & 1)
464 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
465 else
466 nextpc = regs->pc + 4;
467 finsn = *(unsigned short *) (regs->pc + 2);
468 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
469 if (regs->sr & 1)
470 nextpc = regs->pc + 4;
471 else
472 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
473 finsn = *(unsigned short *) (regs->pc + 2);
474 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
475 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
476 nextpc = regs->regs[nib[1]];
477 finsn = *(unsigned short *) (regs->pc + 2);
478 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
479 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
480 nextpc = regs->pc + 4 + regs->regs[nib[1]];
481 finsn = *(unsigned short *) (regs->pc + 2);
482 } else if (insn == 0x000b) { /* rts */
483 nextpc = regs->pr;
484 finsn = *(unsigned short *) (regs->pc + 2);
485 } else {
486 nextpc = regs->pc + 2;
487 finsn = insn;
490 #define FPSCR_FPU_ERROR (1 << 17)
492 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
493 struct task_struct *tsk = current;
495 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
496 /* FPU error */
497 denormal_to_double (&tsk->thread.fpu.hard,
498 (finsn >> 8) & 0xf);
499 } else
500 return 0;
502 regs->pc = nextpc;
503 return 1;
504 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
505 struct task_struct *tsk = current;
506 int fpscr;
507 int n, m, prec;
508 unsigned int hx, hy;
510 n = (finsn >> 8) & 0xf;
511 m = (finsn >> 4) & 0xf;
512 hx = tsk->thread.fpu.hard.fp_regs[n];
513 hy = tsk->thread.fpu.hard.fp_regs[m];
514 fpscr = tsk->thread.fpu.hard.fpscr;
515 prec = fpscr & (1 << 19);
517 if ((fpscr & FPSCR_FPU_ERROR)
518 && (prec && ((hx & 0x7fffffff) < 0x00100000
519 || (hy & 0x7fffffff) < 0x00100000))) {
520 long long llx, lly;
522 /* FPU error because of denormal */
523 llx = ((long long) hx << 32)
524 | tsk->thread.fpu.hard.fp_regs[n+1];
525 lly = ((long long) hy << 32)
526 | tsk->thread.fpu.hard.fp_regs[m+1];
527 if ((hx & 0x7fffffff) >= 0x00100000)
528 llx = denormal_muld(lly, llx);
529 else
530 llx = denormal_muld(llx, lly);
531 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
532 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
533 } else if ((fpscr & FPSCR_FPU_ERROR)
534 && (!prec && ((hx & 0x7fffffff) < 0x00800000
535 || (hy & 0x7fffffff) < 0x00800000))) {
536 /* FPU error because of denormal */
537 if ((hx & 0x7fffffff) >= 0x00800000)
538 hx = denormal_mulf(hy, hx);
539 else
540 hx = denormal_mulf(hx, hy);
541 tsk->thread.fpu.hard.fp_regs[n] = hx;
542 } else
543 return 0;
545 regs->pc = nextpc;
546 return 1;
547 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
548 struct task_struct *tsk = current;
549 int fpscr;
550 int n, m, prec;
551 unsigned int hx, hy;
553 n = (finsn >> 8) & 0xf;
554 m = (finsn >> 4) & 0xf;
555 hx = tsk->thread.fpu.hard.fp_regs[n];
556 hy = tsk->thread.fpu.hard.fp_regs[m];
557 fpscr = tsk->thread.fpu.hard.fpscr;
558 prec = fpscr & (1 << 19);
560 if ((fpscr & FPSCR_FPU_ERROR)
561 && (prec && ((hx & 0x7fffffff) < 0x00100000
562 || (hy & 0x7fffffff) < 0x00100000))) {
563 long long llx, lly;
565 /* FPU error because of denormal */
566 llx = ((long long) hx << 32)
567 | tsk->thread.fpu.hard.fp_regs[n+1];
568 lly = ((long long) hy << 32)
569 | tsk->thread.fpu.hard.fp_regs[m+1];
570 if ((finsn & 0xf00f) == 0xf000)
571 llx = denormal_addd(llx, lly);
572 else
573 llx = denormal_addd(llx, lly ^ (1LL << 63));
574 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
575 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
576 } else if ((fpscr & FPSCR_FPU_ERROR)
577 && (!prec && ((hx & 0x7fffffff) < 0x00800000
578 || (hy & 0x7fffffff) < 0x00800000))) {
579 /* FPU error because of denormal */
580 if ((finsn & 0xf00f) == 0xf000)
581 hx = denormal_addf(hx, hy);
582 else
583 hx = denormal_addf(hx, hy ^ 0x80000000);
584 tsk->thread.fpu.hard.fp_regs[n] = hx;
585 } else
586 return 0;
588 regs->pc = nextpc;
589 return 1;
592 return 0;
595 BUILD_TRAP_HANDLER(fpu_error)
597 struct task_struct *tsk = current;
598 TRAP_HANDLER_DECL;
600 save_fpu(tsk, regs);
601 if (ieee_fpe_handler(regs)) {
602 tsk->thread.fpu.hard.fpscr &=
603 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
604 grab_fpu(regs);
605 restore_fpu(tsk);
606 set_tsk_thread_flag(tsk, TIF_USEDFPU);
607 return;
610 force_sig(SIGFPE, tsk);
613 BUILD_TRAP_HANDLER(fpu_state_restore)
615 struct task_struct *tsk = current;
616 TRAP_HANDLER_DECL;
618 grab_fpu(regs);
619 if (!user_mode(regs)) {
620 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
621 return;
624 if (used_math()) {
625 /* Using the FPU again. */
626 restore_fpu(tsk);
627 } else {
628 /* First time FPU user. */
629 fpu_init();
630 set_used_math();
632 set_tsk_thread_flag(tsk, TIF_USEDFPU);