arch/sh/kernel/cpu/sh4/fpu.c

   1 /*
   2  * Save/restore floating point context for signal handlers.
   3  *
   4  * This file is subject to the terms and conditions of the GNU General Public
   5  * License.  See the file "COPYING" in the main directory of this archive
   6  * for more details.
   7  *
   8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
   9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
  10  *
  11  * FIXME! These routines have not been tested for big endian case.
  12  */
  13 #include <linux/sched.h>
  14 #include <linux/signal.h>
  15 #include <linux/io.h>
  16 #include <cpu/fpu.h>
  17 #include <asm/processor.h>
  18 #include <asm/system.h>
  19 #include <asm/fpu.h>
  20
  21 /* The PR (precision) bit in the FP Status Register must be clear when
  22  * an frchg instruction is executed, otherwise the instruction is undefined.
  23  * Executing frchg with PR set causes a trap on some SH4 implementations.
  24  */
  25
  26 #define FPSCR_RCHG 0x00000000
  27 extern unsigned long long float64_div(unsigned long long a,
  28                                       unsigned long long b);
  29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
  30 extern unsigned long long float64_mul(unsigned long long a,
  31                                       unsigned long long b);
  32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
  33 extern unsigned long long float64_add(unsigned long long a,
  34                                       unsigned long long b);
  35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
  36 extern unsigned long long float64_sub(unsigned long long a,
  37                                       unsigned long long b);
  38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
  39 extern unsigned long int float64_to_float32(unsigned long long a);
  40 static unsigned int fpu_exception_flags;
  41
  42 /*
  43  * Save FPU registers onto task structure.
  44  */
  45 void save_fpu(struct task_struct *tsk)
  46 {
  47         unsigned long dummy;
  48
  49         enable_fpu();
  50         asm volatile ("sts.l    fpul, @-%0\n\t"
  51                       "sts.l    fpscr, @-%0\n\t"
  52                       "lds      %2, fpscr\n\t"
  53                       "frchg\n\t"
  54                       "fmov.s   fr15, @-%0\n\t"
  55                       "fmov.s   fr14, @-%0\n\t"
  56                       "fmov.s   fr13, @-%0\n\t"
  57                       "fmov.s   fr12, @-%0\n\t"
  58                       "fmov.s   fr11, @-%0\n\t"
  59                       "fmov.s   fr10, @-%0\n\t"
  60                       "fmov.s   fr9, @-%0\n\t"
  61                       "fmov.s   fr8, @-%0\n\t"
  62                       "fmov.s   fr7, @-%0\n\t"
  63                       "fmov.s   fr6, @-%0\n\t"
  64                       "fmov.s   fr5, @-%0\n\t"
  65                       "fmov.s   fr4, @-%0\n\t"
  66                       "fmov.s   fr3, @-%0\n\t"
  67                       "fmov.s   fr2, @-%0\n\t"
  68                       "fmov.s   fr1, @-%0\n\t"
  69                       "fmov.s   fr0, @-%0\n\t"
  70                       "frchg\n\t"
  71                       "fmov.s   fr15, @-%0\n\t"
  72                       "fmov.s   fr14, @-%0\n\t"
  73                       "fmov.s   fr13, @-%0\n\t"
  74                       "fmov.s   fr12, @-%0\n\t"
  75                       "fmov.s   fr11, @-%0\n\t"
  76                       "fmov.s   fr10, @-%0\n\t"
  77                       "fmov.s   fr9, @-%0\n\t"
  78                       "fmov.s   fr8, @-%0\n\t"
  79                       "fmov.s   fr7, @-%0\n\t"
  80                       "fmov.s   fr6, @-%0\n\t"
  81                       "fmov.s   fr5, @-%0\n\t"
  82                       "fmov.s   fr4, @-%0\n\t"
  83                       "fmov.s   fr3, @-%0\n\t"
  84                       "fmov.s   fr2, @-%0\n\t"
  85                       "fmov.s   fr1, @-%0\n\t"
  86                       "fmov.s   fr0, @-%0\n\t"
  87                       "lds      %3, fpscr\n\t":"=r" (dummy)
  88                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
  89                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
  90                       :"memory");
  91
  92         disable_fpu();
  93 }
  94
  95 static void restore_fpu(struct task_struct *tsk)
  96 {
  97         unsigned long dummy;
  98
  99         enable_fpu();
 100         asm volatile ("lds      %2, fpscr\n\t"
 101                       "fmov.s   @%0+, fr0\n\t"
 102                       "fmov.s   @%0+, fr1\n\t"
 103                       "fmov.s   @%0+, fr2\n\t"
 104                       "fmov.s   @%0+, fr3\n\t"
 105                       "fmov.s   @%0+, fr4\n\t"
 106                       "fmov.s   @%0+, fr5\n\t"
 107                       "fmov.s   @%0+, fr6\n\t"
 108                       "fmov.s   @%0+, fr7\n\t"
 109                       "fmov.s   @%0+, fr8\n\t"
 110                       "fmov.s   @%0+, fr9\n\t"
 111                       "fmov.s   @%0+, fr10\n\t"
 112                       "fmov.s   @%0+, fr11\n\t"
 113                       "fmov.s   @%0+, fr12\n\t"
 114                       "fmov.s   @%0+, fr13\n\t"
 115                       "fmov.s   @%0+, fr14\n\t"
 116                       "fmov.s   @%0+, fr15\n\t"
 117                       "frchg\n\t"
 118                       "fmov.s   @%0+, fr0\n\t"
 119                       "fmov.s   @%0+, fr1\n\t"
 120                       "fmov.s   @%0+, fr2\n\t"
 121                       "fmov.s   @%0+, fr3\n\t"
 122                       "fmov.s   @%0+, fr4\n\t"
 123                       "fmov.s   @%0+, fr5\n\t"
 124                       "fmov.s   @%0+, fr6\n\t"
 125                       "fmov.s   @%0+, fr7\n\t"
 126                       "fmov.s   @%0+, fr8\n\t"
 127                       "fmov.s   @%0+, fr9\n\t"
 128                       "fmov.s   @%0+, fr10\n\t"
 129                       "fmov.s   @%0+, fr11\n\t"
 130                       "fmov.s   @%0+, fr12\n\t"
 131                       "fmov.s   @%0+, fr13\n\t"
 132                       "fmov.s   @%0+, fr14\n\t"
 133                       "fmov.s   @%0+, fr15\n\t"
 134                       "frchg\n\t"
 135                       "lds.l    @%0+, fpscr\n\t"
 136                       "lds.l    @%0+, fpul\n\t"
 137                       :"=r" (dummy)
 138                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
 139                       :"memory");
 140         disable_fpu();
 141 }
 142
 143 /*
 144  * Load the FPU with signalling NANS.  This bit pattern we're using
 145  * has the property that no matter wether considered as single or as
 146  * double precision represents signaling NANS.
 147  */
 148
 149 static void fpu_init(void)
 150 {
 151         enable_fpu();
 152         asm volatile (  "lds    %0, fpul\n\t"
 153                         "lds    %1, fpscr\n\t"
 154                         "fsts   fpul, fr0\n\t"
 155                         "fsts   fpul, fr1\n\t"
 156                         "fsts   fpul, fr2\n\t"
 157                         "fsts   fpul, fr3\n\t"
 158                         "fsts   fpul, fr4\n\t"
 159                         "fsts   fpul, fr5\n\t"
 160                         "fsts   fpul, fr6\n\t"
 161                         "fsts   fpul, fr7\n\t"
 162                         "fsts   fpul, fr8\n\t"
 163                         "fsts   fpul, fr9\n\t"
 164                         "fsts   fpul, fr10\n\t"
 165                         "fsts   fpul, fr11\n\t"
 166                         "fsts   fpul, fr12\n\t"
 167                         "fsts   fpul, fr13\n\t"
 168                         "fsts   fpul, fr14\n\t"
 169                         "fsts   fpul, fr15\n\t"
 170                         "frchg\n\t"
 171                         "fsts   fpul, fr0\n\t"
 172                         "fsts   fpul, fr1\n\t"
 173                         "fsts   fpul, fr2\n\t"
 174                         "fsts   fpul, fr3\n\t"
 175                         "fsts   fpul, fr4\n\t"
 176                         "fsts   fpul, fr5\n\t"
 177                         "fsts   fpul, fr6\n\t"
 178                         "fsts   fpul, fr7\n\t"
 179                         "fsts   fpul, fr8\n\t"
 180                         "fsts   fpul, fr9\n\t"
 181                         "fsts   fpul, fr10\n\t"
 182                         "fsts   fpul, fr11\n\t"
 183                         "fsts   fpul, fr12\n\t"
 184                         "fsts   fpul, fr13\n\t"
 185                         "fsts   fpul, fr14\n\t"
 186                         "fsts   fpul, fr15\n\t"
 187                         "frchg\n\t"
 188                         "lds    %2, fpscr\n\t"
 189                         :       /* no output */
 190                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
 191         disable_fpu();
 192 }
 193
 194 /**
 195  *      denormal_to_double - Given denormalized float number,
 196  *                           store double float
 197  *
 198  *      @fpu: Pointer to sh_fpu_hard structure
 199  *      @n: Index to FP register
 200  */
 201 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
 202 {
 203         unsigned long du, dl;
 204         unsigned long x = fpu->fpul;
 205         int exp = 1023 - 126;
 206
 207         if (x != 0 && (x & 0x7f800000) == 0) {
 208                 du = (x & 0x80000000);
 209                 while ((x & 0x00800000) == 0) {
 210                         x <<= 1;
 211                         exp--;
 212                 }
 213                 x &= 0x007fffff;
 214                 du |= (exp << 20) | (x >> 3);
 215                 dl = x << 29;
 216
 217                 fpu->fp_regs[n] = du;
 218                 fpu->fp_regs[n + 1] = dl;
 219         }
 220 }
 221
 222 /**
 223  *      ieee_fpe_handler - Handle denormalized number exception
 224  *
 225  *      @regs: Pointer to register structure
 226  *
 227  *      Returns 1 when it's handled (should not cause exception).
 228  */
 229 static int ieee_fpe_handler(struct pt_regs *regs)
 230 {
 231         unsigned short insn = *(unsigned short *)regs->pc;
 232         unsigned short finsn;
 233         unsigned long nextpc;
 234         int nib[4] = {
 235                 (insn >> 12) & 0xf,
 236                 (insn >> 8) & 0xf,
 237                 (insn >> 4) & 0xf,
 238                 insn & 0xf
 239         };
 240
 241         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
 242                 regs->pr = regs->pc + 4;  /* bsr & jsr */
 243
 244         if (nib[0] == 0xa || nib[0] == 0xb) {
 245                 /* bra & bsr */
 246                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
 247                 finsn = *(unsigned short *)(regs->pc + 2);
 248         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
 249                 /* bt/s */
 250                 if (regs->sr & 1)
 251                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 252                 else
 253                         nextpc = regs->pc + 4;
 254                 finsn = *(unsigned short *)(regs->pc + 2);
 255         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
 256                 /* bf/s */
 257                 if (regs->sr & 1)
 258                         nextpc = regs->pc + 4;
 259                 else
 260                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 261                 finsn = *(unsigned short *)(regs->pc + 2);
 262         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
 263                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 264                 /* jmp & jsr */
 265                 nextpc = regs->regs[nib[1]];
 266                 finsn = *(unsigned short *)(regs->pc + 2);
 267         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
 268                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 269                 /* braf & bsrf */
 270                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
 271                 finsn = *(unsigned short *)(regs->pc + 2);
 272         } else if (insn == 0x000b) {
 273                 /* rts */
 274                 nextpc = regs->pr;
 275                 finsn = *(unsigned short *)(regs->pc + 2);
 276         } else {
 277                 nextpc = regs->pc + instruction_size(insn);
 278                 finsn = insn;
 279         }
 280
 281         if ((finsn & 0xf1ff) == 0xf0ad) {
 282                 /* fcnvsd */
 283                 struct task_struct *tsk = current;
 284
 285                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
 286                         /* FPU error */
 287                         denormal_to_double(&tsk->thread.fpu.hard,
 288                                            (finsn >> 8) & 0xf);
 289                 else
 290                         return 0;
 291
 292                 regs->pc = nextpc;
 293                 return 1;
 294         } else if ((finsn & 0xf00f) == 0xf002) {
 295                 /* fmul */
 296                 struct task_struct *tsk = current;
 297                 int fpscr;
 298                 int n, m, prec;
 299                 unsigned int hx, hy;
 300
 301                 n = (finsn >> 8) & 0xf;
 302                 m = (finsn >> 4) & 0xf;
 303                 hx = tsk->thread.fpu.hard.fp_regs[n];
 304                 hy = tsk->thread.fpu.hard.fp_regs[m];
 305                 fpscr = tsk->thread.fpu.hard.fpscr;
 306                 prec = fpscr & FPSCR_DBL_PRECISION;
 307
 308                 if ((fpscr & FPSCR_CAUSE_ERROR)
 309                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 310                                  || (hy & 0x7fffffff) < 0x00100000))) {
 311                         long long llx, lly;
 312
 313                         /* FPU error because of denormal (doubles) */
 314                         llx = ((long long)hx << 32)
 315                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 316                         lly = ((long long)hy << 32)
 317                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 318                         llx = float64_mul(llx, lly);
 319                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 320                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 321                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 322                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 323                                          || (hy & 0x7fffffff) < 0x00800000))) {
 324                         /* FPU error because of denormal (floats) */
 325                         hx = float32_mul(hx, hy);
 326                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 327                 } else
 328                         return 0;
 329
 330                 regs->pc = nextpc;
 331                 return 1;
 332         } else if ((finsn & 0xf00e) == 0xf000) {
 333                 /* fadd, fsub */
 334                 struct task_struct *tsk = current;
 335                 int fpscr;
 336                 int n, m, prec;
 337                 unsigned int hx, hy;
 338
 339                 n = (finsn >> 8) & 0xf;
 340                 m = (finsn >> 4) & 0xf;
 341                 hx = tsk->thread.fpu.hard.fp_regs[n];
 342                 hy = tsk->thread.fpu.hard.fp_regs[m];
 343                 fpscr = tsk->thread.fpu.hard.fpscr;
 344                 prec = fpscr & FPSCR_DBL_PRECISION;
 345
 346                 if ((fpscr & FPSCR_CAUSE_ERROR)
 347                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 348                                  || (hy & 0x7fffffff) < 0x00100000))) {
 349                         long long llx, lly;
 350
 351                         /* FPU error because of denormal (doubles) */
 352                         llx = ((long long)hx << 32)
 353                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 354                         lly = ((long long)hy << 32)
 355                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 356                         if ((finsn & 0xf00f) == 0xf000)
 357                                 llx = float64_add(llx, lly);
 358                         else
 359                                 llx = float64_sub(llx, lly);
 360                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 361                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 362                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 363                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 364                                          || (hy & 0x7fffffff) < 0x00800000))) {
 365                         /* FPU error because of denormal (floats) */
 366                         if ((finsn & 0xf00f) == 0xf000)
 367                                 hx = float32_add(hx, hy);
 368                         else
 369                                 hx = float32_sub(hx, hy);
 370                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 371                 } else
 372                         return 0;
 373
 374                 regs->pc = nextpc;
 375                 return 1;
 376         } else if ((finsn & 0xf003) == 0xf003) {
 377                 /* fdiv */
 378                 struct task_struct *tsk = current;
 379                 int fpscr;
 380                 int n, m, prec;
 381                 unsigned int hx, hy;
 382
 383                 n = (finsn >> 8) & 0xf;
 384                 m = (finsn >> 4) & 0xf;
 385                 hx = tsk->thread.fpu.hard.fp_regs[n];
 386                 hy = tsk->thread.fpu.hard.fp_regs[m];
 387                 fpscr = tsk->thread.fpu.hard.fpscr;
 388                 prec = fpscr & FPSCR_DBL_PRECISION;
 389
 390                 if ((fpscr & FPSCR_CAUSE_ERROR)
 391                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 392                                  || (hy & 0x7fffffff) < 0x00100000))) {
 393                         long long llx, lly;
 394
 395                         /* FPU error because of denormal (doubles) */
 396                         llx = ((long long)hx << 32)
 397                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 398                         lly = ((long long)hy << 32)
 399                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 400
 401                         llx = float64_div(llx, lly);
 402
 403                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 404                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 405                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 406                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 407                                          || (hy & 0x7fffffff) < 0x00800000))) {
 408                         /* FPU error because of denormal (floats) */
 409                         hx = float32_div(hx, hy);
 410                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 411                 } else
 412                         return 0;
 413
 414                 regs->pc = nextpc;
 415                 return 1;
 416         } else if ((finsn & 0xf0bd) == 0xf0bd) {
 417                 /* fcnvds - double to single precision convert */
 418                 struct task_struct *tsk = current;
 419                 int m;
 420                 unsigned int hx;
 421
 422                 m = (finsn >> 8) & 0x7;
 423                 hx = tsk->thread.fpu.hard.fp_regs[m];
 424
 425                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
 426                         && ((hx & 0x7fffffff) < 0x00100000)) {
 427                         /* subnormal double to float conversion */
 428                         long long llx;
 429
 430                         llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
 431                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 432
 433                         tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
 434                 } else
 435                         return 0;
 436
 437                 regs->pc = nextpc;
 438                 return 1;
 439         }
 440
 441         return 0;
 442 }
 443
 444 void float_raise(unsigned int flags)
 445 {
 446         fpu_exception_flags |= flags;
 447 }
 448
 449 int float_rounding_mode(void)
 450 {
 451         struct task_struct *tsk = current;
 452         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
 453         return roundingMode;
 454 }
 455
 456 BUILD_TRAP_HANDLER(fpu_error)
 457 {
 458         struct task_struct *tsk = current;
 459         TRAP_HANDLER_DECL;
 460
 461         __unlazy_fpu(tsk, regs);
 462         fpu_exception_flags = 0;
 463         if (ieee_fpe_handler(regs)) {
 464                 tsk->thread.fpu.hard.fpscr &=
 465                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 466                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
 467                 /* Set the FPSCR flag as well as cause bits - simply
 468                  * replicate the cause */
 469                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
 470                 grab_fpu(regs);
 471                 restore_fpu(tsk);
 472                 task_thread_info(tsk)->status |= TS_USEDFPU;
 473                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
 474                      (fpu_exception_flags >> 2)) == 0) {
 475                         return;
 476                 }
 477         }
 478
 479         force_sig(SIGFPE, tsk);
 480 }
 481
 482 void fpu_state_restore(struct pt_regs *regs)
 483 {
 484         struct task_struct *tsk = current;
 485
 486         grab_fpu(regs);
 487         if (unlikely(!user_mode(regs))) {
 488                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
 489                 BUG();
 490                 return;
 491         }
 492
 493         if (likely(used_math())) {
 494                 /* Using the FPU again.  */
 495                 restore_fpu(tsk);
 496         } else {
 497                 /* First time FPU user.  */
 498                 fpu_init();
 499                 set_used_math();
 500         }
 501         task_thread_info(tsk)->status |= TS_USEDFPU;
 502         tsk->fpu_counter++;
 503 }
 504
 505 BUILD_TRAP_HANDLER(fpu_state_restore)
 506 {
 507         TRAP_HANDLER_DECL;
 508
 509         fpu_state_restore(regs);
 510 }