OMAP: DSS2: fix irq-stats compilation
[linux/fpc-iii.git] / arch / sh / kernel / cpu / sh2a / fpu.c
blobd395ce5740e7dff1cd66c3f786e592a0ba9c97b2
1 /*
2 * Save/restore floating point context for signal handlers.
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
10 * FIXME! These routines can be optimized in big endian case.
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16 #include <asm/fpu.h>
18 /* The PR (precision) bit in the FP Status Register must be clear when
19 * an frchg instruction is executed, otherwise the instruction is undefined.
20 * Executing frchg with PR set causes a trap on some SH4 implementations.
23 #define FPSCR_RCHG 0x00000000
27 * Save FPU registers onto task structure.
29 void
30 save_fpu(struct task_struct *tsk)
32 unsigned long dummy;
34 enable_fpu();
35 asm volatile("sts.l fpul, @-%0\n\t"
36 "sts.l fpscr, @-%0\n\t"
37 "fmov.s fr15, @-%0\n\t"
38 "fmov.s fr14, @-%0\n\t"
39 "fmov.s fr13, @-%0\n\t"
40 "fmov.s fr12, @-%0\n\t"
41 "fmov.s fr11, @-%0\n\t"
42 "fmov.s fr10, @-%0\n\t"
43 "fmov.s fr9, @-%0\n\t"
44 "fmov.s fr8, @-%0\n\t"
45 "fmov.s fr7, @-%0\n\t"
46 "fmov.s fr6, @-%0\n\t"
47 "fmov.s fr5, @-%0\n\t"
48 "fmov.s fr4, @-%0\n\t"
49 "fmov.s fr3, @-%0\n\t"
50 "fmov.s fr2, @-%0\n\t"
51 "fmov.s fr1, @-%0\n\t"
52 "fmov.s fr0, @-%0\n\t"
53 "lds %3, fpscr\n\t"
54 : "=r" (dummy)
55 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
56 "r" (FPSCR_RCHG),
57 "r" (FPSCR_INIT)
58 : "memory");
60 disable_fpu();
63 static void
64 restore_fpu(struct task_struct *tsk)
66 unsigned long dummy;
68 enable_fpu();
69 asm volatile("fmov.s @%0+, fr0\n\t"
70 "fmov.s @%0+, fr1\n\t"
71 "fmov.s @%0+, fr2\n\t"
72 "fmov.s @%0+, fr3\n\t"
73 "fmov.s @%0+, fr4\n\t"
74 "fmov.s @%0+, fr5\n\t"
75 "fmov.s @%0+, fr6\n\t"
76 "fmov.s @%0+, fr7\n\t"
77 "fmov.s @%0+, fr8\n\t"
78 "fmov.s @%0+, fr9\n\t"
79 "fmov.s @%0+, fr10\n\t"
80 "fmov.s @%0+, fr11\n\t"
81 "fmov.s @%0+, fr12\n\t"
82 "fmov.s @%0+, fr13\n\t"
83 "fmov.s @%0+, fr14\n\t"
84 "fmov.s @%0+, fr15\n\t"
85 "lds.l @%0+, fpscr\n\t"
86 "lds.l @%0+, fpul\n\t"
87 : "=r" (dummy)
88 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
89 : "memory");
90 disable_fpu();
94 * Load the FPU with signalling NANS. This bit pattern we're using
95 * has the property that no matter wether considered as single or as
96 * double precission represents signaling NANS.
99 static void
100 fpu_init(void)
102 enable_fpu();
103 asm volatile("lds %0, fpul\n\t"
104 "fsts fpul, fr0\n\t"
105 "fsts fpul, fr1\n\t"
106 "fsts fpul, fr2\n\t"
107 "fsts fpul, fr3\n\t"
108 "fsts fpul, fr4\n\t"
109 "fsts fpul, fr5\n\t"
110 "fsts fpul, fr6\n\t"
111 "fsts fpul, fr7\n\t"
112 "fsts fpul, fr8\n\t"
113 "fsts fpul, fr9\n\t"
114 "fsts fpul, fr10\n\t"
115 "fsts fpul, fr11\n\t"
116 "fsts fpul, fr12\n\t"
117 "fsts fpul, fr13\n\t"
118 "fsts fpul, fr14\n\t"
119 "fsts fpul, fr15\n\t"
120 "lds %2, fpscr\n\t"
121 : /* no output */
122 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
123 disable_fpu();
127 * Emulate arithmetic ops on denormalized number for some FPU insns.
130 /* denormalized float * float */
131 static int denormal_mulf(int hx, int hy)
133 unsigned int ix, iy;
134 unsigned long long m, n;
135 int exp, w;
137 ix = hx & 0x7fffffff;
138 iy = hy & 0x7fffffff;
139 if (iy < 0x00800000 || ix == 0)
140 return ((hx ^ hy) & 0x80000000);
142 exp = (iy & 0x7f800000) >> 23;
143 ix &= 0x007fffff;
144 iy = (iy & 0x007fffff) | 0x00800000;
145 m = (unsigned long long)ix * iy;
146 n = m;
147 w = -1;
148 while (n) { n >>= 1; w++; }
150 /* FIXME: use guard bits */
151 exp += w - 126 - 46;
152 if (exp > 0)
153 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
154 else if (exp + 22 >= 0)
155 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
156 else
157 ix = 0;
159 ix |= (hx ^ hy) & 0x80000000;
160 return ix;
163 /* denormalized double * double */
164 static void mult64(unsigned long long x, unsigned long long y,
165 unsigned long long *highp, unsigned long long *lowp)
167 unsigned long long sub0, sub1, sub2, sub3;
168 unsigned long long high, low;
170 sub0 = (x >> 32) * (unsigned long) (y >> 32);
171 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
172 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
173 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
174 low = sub3;
175 high = 0LL;
176 sub3 += (sub1 << 32);
177 if (low > sub3)
178 high++;
179 low = sub3;
180 sub3 += (sub2 << 32);
181 if (low > sub3)
182 high++;
183 low = sub3;
184 high += (sub1 >> 32) + (sub2 >> 32);
185 high += sub0;
186 *lowp = low;
187 *highp = high;
190 static inline long long rshift64(unsigned long long mh,
191 unsigned long long ml, int n)
193 if (n >= 64)
194 return mh >> (n - 64);
195 return (mh << (64 - n)) | (ml >> n);
198 static long long denormal_muld(long long hx, long long hy)
200 unsigned long long ix, iy;
201 unsigned long long mh, ml, nh, nl;
202 int exp, w;
204 ix = hx & 0x7fffffffffffffffLL;
205 iy = hy & 0x7fffffffffffffffLL;
206 if (iy < 0x0010000000000000LL || ix == 0)
207 return ((hx ^ hy) & 0x8000000000000000LL);
209 exp = (iy & 0x7ff0000000000000LL) >> 52;
210 ix &= 0x000fffffffffffffLL;
211 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
212 mult64(ix, iy, &mh, &ml);
213 nh = mh;
214 nl = ml;
215 w = -1;
216 if (nh) {
217 while (nh) { nh >>= 1; w++;}
218 w += 64;
219 } else
220 while (nl) { nl >>= 1; w++;}
222 /* FIXME: use guard bits */
223 exp += w - 1022 - 52 * 2;
224 if (exp > 0)
225 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
226 | ((long long)exp << 52);
227 else if (exp + 51 >= 0)
228 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
229 else
230 ix = 0;
232 ix |= (hx ^ hy) & 0x8000000000000000LL;
233 return ix;
236 /* ix - iy where iy: denormal and ix, iy >= 0 */
237 static int denormal_subf1(unsigned int ix, unsigned int iy)
239 int frac;
240 int exp;
242 if (ix < 0x00800000)
243 return ix - iy;
245 exp = (ix & 0x7f800000) >> 23;
246 if (exp - 1 > 31)
247 return ix;
248 iy >>= exp - 1;
249 if (iy == 0)
250 return ix;
252 frac = (ix & 0x007fffff) | 0x00800000;
253 frac -= iy;
254 while (frac < 0x00800000) {
255 if (--exp == 0)
256 return frac;
257 frac <<= 1;
260 return (exp << 23) | (frac & 0x007fffff);
263 /* ix + iy where iy: denormal and ix, iy >= 0 */
264 static int denormal_addf1(unsigned int ix, unsigned int iy)
266 int frac;
267 int exp;
269 if (ix < 0x00800000)
270 return ix + iy;
272 exp = (ix & 0x7f800000) >> 23;
273 if (exp - 1 > 31)
274 return ix;
275 iy >>= exp - 1;
276 if (iy == 0)
277 return ix;
279 frac = (ix & 0x007fffff) | 0x00800000;
280 frac += iy;
281 if (frac >= 0x01000000) {
282 frac >>= 1;
283 ++exp;
286 return (exp << 23) | (frac & 0x007fffff);
289 static int denormal_addf(int hx, int hy)
291 unsigned int ix, iy;
292 int sign;
294 if ((hx ^ hy) & 0x80000000) {
295 sign = hx & 0x80000000;
296 ix = hx & 0x7fffffff;
297 iy = hy & 0x7fffffff;
298 if (iy < 0x00800000) {
299 ix = denormal_subf1(ix, iy);
300 if ((int) ix < 0) {
301 ix = -ix;
302 sign ^= 0x80000000;
304 } else {
305 ix = denormal_subf1(iy, ix);
306 sign ^= 0x80000000;
308 } else {
309 sign = hx & 0x80000000;
310 ix = hx & 0x7fffffff;
311 iy = hy & 0x7fffffff;
312 if (iy < 0x00800000)
313 ix = denormal_addf1(ix, iy);
314 else
315 ix = denormal_addf1(iy, ix);
318 return sign | ix;
321 /* ix - iy where iy: denormal and ix, iy >= 0 */
322 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
324 long long frac;
325 int exp;
327 if (ix < 0x0010000000000000LL)
328 return ix - iy;
330 exp = (ix & 0x7ff0000000000000LL) >> 52;
331 if (exp - 1 > 63)
332 return ix;
333 iy >>= exp - 1;
334 if (iy == 0)
335 return ix;
337 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
338 frac -= iy;
339 while (frac < 0x0010000000000000LL) {
340 if (--exp == 0)
341 return frac;
342 frac <<= 1;
345 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
348 /* ix + iy where iy: denormal and ix, iy >= 0 */
349 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
351 long long frac;
352 long long exp;
354 if (ix < 0x0010000000000000LL)
355 return ix + iy;
357 exp = (ix & 0x7ff0000000000000LL) >> 52;
358 if (exp - 1 > 63)
359 return ix;
360 iy >>= exp - 1;
361 if (iy == 0)
362 return ix;
364 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
365 frac += iy;
366 if (frac >= 0x0020000000000000LL) {
367 frac >>= 1;
368 ++exp;
371 return (exp << 52) | (frac & 0x000fffffffffffffLL);
374 static long long denormal_addd(long long hx, long long hy)
376 unsigned long long ix, iy;
377 long long sign;
379 if ((hx ^ hy) & 0x8000000000000000LL) {
380 sign = hx & 0x8000000000000000LL;
381 ix = hx & 0x7fffffffffffffffLL;
382 iy = hy & 0x7fffffffffffffffLL;
383 if (iy < 0x0010000000000000LL) {
384 ix = denormal_subd1(ix, iy);
385 if ((int) ix < 0) {
386 ix = -ix;
387 sign ^= 0x8000000000000000LL;
389 } else {
390 ix = denormal_subd1(iy, ix);
391 sign ^= 0x8000000000000000LL;
393 } else {
394 sign = hx & 0x8000000000000000LL;
395 ix = hx & 0x7fffffffffffffffLL;
396 iy = hy & 0x7fffffffffffffffLL;
397 if (iy < 0x0010000000000000LL)
398 ix = denormal_addd1(ix, iy);
399 else
400 ix = denormal_addd1(iy, ix);
403 return sign | ix;
407 * denormal_to_double - Given denormalized float number,
408 * store double float
410 * @fpu: Pointer to sh_fpu_hard structure
411 * @n: Index to FP register
413 static void
414 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
416 unsigned long du, dl;
417 unsigned long x = fpu->fpul;
418 int exp = 1023 - 126;
420 if (x != 0 && (x & 0x7f800000) == 0) {
421 du = (x & 0x80000000);
422 while ((x & 0x00800000) == 0) {
423 x <<= 1;
424 exp--;
426 x &= 0x007fffff;
427 du |= (exp << 20) | (x >> 3);
428 dl = x << 29;
430 fpu->fp_regs[n] = du;
431 fpu->fp_regs[n+1] = dl;
436 * ieee_fpe_handler - Handle denormalized number exception
438 * @regs: Pointer to register structure
440 * Returns 1 when it's handled (should not cause exception).
442 static int
443 ieee_fpe_handler (struct pt_regs *regs)
445 unsigned short insn = *(unsigned short *) regs->pc;
446 unsigned short finsn;
447 unsigned long nextpc;
448 int nib[4] = {
449 (insn >> 12) & 0xf,
450 (insn >> 8) & 0xf,
451 (insn >> 4) & 0xf,
452 insn & 0xf};
454 if (nib[0] == 0xb ||
455 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
456 regs->pr = regs->pc + 4;
457 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
458 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
459 finsn = *(unsigned short *) (regs->pc + 2);
460 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
461 if (regs->sr & 1)
462 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
463 else
464 nextpc = regs->pc + 4;
465 finsn = *(unsigned short *) (regs->pc + 2);
466 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
467 if (regs->sr & 1)
468 nextpc = regs->pc + 4;
469 else
470 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
471 finsn = *(unsigned short *) (regs->pc + 2);
472 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
473 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
474 nextpc = regs->regs[nib[1]];
475 finsn = *(unsigned short *) (regs->pc + 2);
476 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
477 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
478 nextpc = regs->pc + 4 + regs->regs[nib[1]];
479 finsn = *(unsigned short *) (regs->pc + 2);
480 } else if (insn == 0x000b) { /* rts */
481 nextpc = regs->pr;
482 finsn = *(unsigned short *) (regs->pc + 2);
483 } else {
484 nextpc = regs->pc + 2;
485 finsn = insn;
488 #define FPSCR_FPU_ERROR (1 << 17)
490 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
491 struct task_struct *tsk = current;
493 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
494 /* FPU error */
495 denormal_to_double (&tsk->thread.fpu.hard,
496 (finsn >> 8) & 0xf);
497 } else
498 return 0;
500 regs->pc = nextpc;
501 return 1;
502 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
503 struct task_struct *tsk = current;
504 int fpscr;
505 int n, m, prec;
506 unsigned int hx, hy;
508 n = (finsn >> 8) & 0xf;
509 m = (finsn >> 4) & 0xf;
510 hx = tsk->thread.fpu.hard.fp_regs[n];
511 hy = tsk->thread.fpu.hard.fp_regs[m];
512 fpscr = tsk->thread.fpu.hard.fpscr;
513 prec = fpscr & (1 << 19);
515 if ((fpscr & FPSCR_FPU_ERROR)
516 && (prec && ((hx & 0x7fffffff) < 0x00100000
517 || (hy & 0x7fffffff) < 0x00100000))) {
518 long long llx, lly;
520 /* FPU error because of denormal */
521 llx = ((long long) hx << 32)
522 | tsk->thread.fpu.hard.fp_regs[n+1];
523 lly = ((long long) hy << 32)
524 | tsk->thread.fpu.hard.fp_regs[m+1];
525 if ((hx & 0x7fffffff) >= 0x00100000)
526 llx = denormal_muld(lly, llx);
527 else
528 llx = denormal_muld(llx, lly);
529 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
530 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
531 } else if ((fpscr & FPSCR_FPU_ERROR)
532 && (!prec && ((hx & 0x7fffffff) < 0x00800000
533 || (hy & 0x7fffffff) < 0x00800000))) {
534 /* FPU error because of denormal */
535 if ((hx & 0x7fffffff) >= 0x00800000)
536 hx = denormal_mulf(hy, hx);
537 else
538 hx = denormal_mulf(hx, hy);
539 tsk->thread.fpu.hard.fp_regs[n] = hx;
540 } else
541 return 0;
543 regs->pc = nextpc;
544 return 1;
545 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
546 struct task_struct *tsk = current;
547 int fpscr;
548 int n, m, prec;
549 unsigned int hx, hy;
551 n = (finsn >> 8) & 0xf;
552 m = (finsn >> 4) & 0xf;
553 hx = tsk->thread.fpu.hard.fp_regs[n];
554 hy = tsk->thread.fpu.hard.fp_regs[m];
555 fpscr = tsk->thread.fpu.hard.fpscr;
556 prec = fpscr & (1 << 19);
558 if ((fpscr & FPSCR_FPU_ERROR)
559 && (prec && ((hx & 0x7fffffff) < 0x00100000
560 || (hy & 0x7fffffff) < 0x00100000))) {
561 long long llx, lly;
563 /* FPU error because of denormal */
564 llx = ((long long) hx << 32)
565 | tsk->thread.fpu.hard.fp_regs[n+1];
566 lly = ((long long) hy << 32)
567 | tsk->thread.fpu.hard.fp_regs[m+1];
568 if ((finsn & 0xf00f) == 0xf000)
569 llx = denormal_addd(llx, lly);
570 else
571 llx = denormal_addd(llx, lly ^ (1LL << 63));
572 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
573 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
574 } else if ((fpscr & FPSCR_FPU_ERROR)
575 && (!prec && ((hx & 0x7fffffff) < 0x00800000
576 || (hy & 0x7fffffff) < 0x00800000))) {
577 /* FPU error because of denormal */
578 if ((finsn & 0xf00f) == 0xf000)
579 hx = denormal_addf(hx, hy);
580 else
581 hx = denormal_addf(hx, hy ^ 0x80000000);
582 tsk->thread.fpu.hard.fp_regs[n] = hx;
583 } else
584 return 0;
586 regs->pc = nextpc;
587 return 1;
590 return 0;
593 BUILD_TRAP_HANDLER(fpu_error)
595 struct task_struct *tsk = current;
596 TRAP_HANDLER_DECL;
598 __unlazy_fpu(tsk, regs);
599 if (ieee_fpe_handler(regs)) {
600 tsk->thread.fpu.hard.fpscr &=
601 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
602 grab_fpu(regs);
603 restore_fpu(tsk);
604 task_thread_info(tsk)->status |= TS_USEDFPU;
605 return;
608 force_sig(SIGFPE, tsk);
611 void fpu_state_restore(struct pt_regs *regs)
613 struct task_struct *tsk = current;
615 grab_fpu(regs);
616 if (unlikely(!user_mode(regs))) {
617 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
618 BUG();
619 return;
622 if (likely(used_math())) {
623 /* Using the FPU again. */
624 restore_fpu(tsk);
625 } else {
626 /* First time FPU user. */
627 fpu_init();
628 set_used_math();
630 task_thread_info(tsk)->status |= TS_USEDFPU;
631 tsk->fpu_counter++;
634 BUILD_TRAP_HANDLER(fpu_state_restore)
636 TRAP_HANDLER_DECL;
638 fpu_state_restore(regs);