arch/sh/kernel/cpu/sh2a/fpu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Save/restore floating point context for signal handlers.
   4  *
   5  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
   6  *
   7  * FIXME! These routines can be optimized in big endian case.
   8  */
   9 #include <linux/sched/signal.h>
  10 #include <linux/signal.h>
  11 #include <asm/processor.h>
  12 #include <asm/io.h>
  13 #include <asm/fpu.h>
  14 #include <asm/traps.h>
  15
  16 /* The PR (precision) bit in the FP Status Register must be clear when
  17  * an frchg instruction is executed, otherwise the instruction is undefined.
  18  * Executing frchg with PR set causes a trap on some SH4 implementations.
  19  */
  20
  21 #define FPSCR_RCHG 0x00000000
  22
  23
  24 /*
  25  * Save FPU registers onto task structure.
  26  */
  27 void save_fpu(struct task_struct *tsk)
  28 {
  29         unsigned long dummy;
  30
  31         enable_fpu();
  32         asm volatile("sts.l     fpul, @-%0\n\t"
  33                      "sts.l     fpscr, @-%0\n\t"
  34                      "fmov.s    fr15, @-%0\n\t"
  35                      "fmov.s    fr14, @-%0\n\t"
  36                      "fmov.s    fr13, @-%0\n\t"
  37                      "fmov.s    fr12, @-%0\n\t"
  38                      "fmov.s    fr11, @-%0\n\t"
  39                      "fmov.s    fr10, @-%0\n\t"
  40                      "fmov.s    fr9, @-%0\n\t"
  41                      "fmov.s    fr8, @-%0\n\t"
  42                      "fmov.s    fr7, @-%0\n\t"
  43                      "fmov.s    fr6, @-%0\n\t"
  44                      "fmov.s    fr5, @-%0\n\t"
  45                      "fmov.s    fr4, @-%0\n\t"
  46                      "fmov.s    fr3, @-%0\n\t"
  47                      "fmov.s    fr2, @-%0\n\t"
  48                      "fmov.s    fr1, @-%0\n\t"
  49                      "fmov.s    fr0, @-%0\n\t"
  50                      "lds       %3, fpscr\n\t"
  51                      : "=r" (dummy)
  52                      : "0" ((char *)(&tsk->thread.xstate->hardfpu.status)),
  53                        "r" (FPSCR_RCHG),
  54                        "r" (FPSCR_INIT)
  55                      : "memory");
  56
  57         disable_fpu();
  58 }
  59
  60 void restore_fpu(struct task_struct *tsk)
  61 {
  62         unsigned long dummy;
  63
  64         enable_fpu();
  65         asm volatile("fmov.s    @%0+, fr0\n\t"
  66                      "fmov.s    @%0+, fr1\n\t"
  67                      "fmov.s    @%0+, fr2\n\t"
  68                      "fmov.s    @%0+, fr3\n\t"
  69                      "fmov.s    @%0+, fr4\n\t"
  70                      "fmov.s    @%0+, fr5\n\t"
  71                      "fmov.s    @%0+, fr6\n\t"
  72                      "fmov.s    @%0+, fr7\n\t"
  73                      "fmov.s    @%0+, fr8\n\t"
  74                      "fmov.s    @%0+, fr9\n\t"
  75                      "fmov.s    @%0+, fr10\n\t"
  76                      "fmov.s    @%0+, fr11\n\t"
  77                      "fmov.s    @%0+, fr12\n\t"
  78                      "fmov.s    @%0+, fr13\n\t"
  79                      "fmov.s    @%0+, fr14\n\t"
  80                      "fmov.s    @%0+, fr15\n\t"
  81                      "lds.l     @%0+, fpscr\n\t"
  82                      "lds.l     @%0+, fpul\n\t"
  83                      : "=r" (dummy)
  84                      : "0" (tsk->thread.xstate), "r" (FPSCR_RCHG)
  85                      : "memory");
  86         disable_fpu();
  87 }
  88
  89 /*
  90  *      Emulate arithmetic ops on denormalized number for some FPU insns.
  91  */
  92
  93 /* denormalized float * float */
  94 static int denormal_mulf(int hx, int hy)
  95 {
  96         unsigned int ix, iy;
  97         unsigned long long m, n;
  98         int exp, w;
  99
 100         ix = hx & 0x7fffffff;
 101         iy = hy & 0x7fffffff;
 102         if (iy < 0x00800000 || ix == 0)
 103                 return ((hx ^ hy) & 0x80000000);
 104
 105         exp = (iy & 0x7f800000) >> 23;
 106         ix &= 0x007fffff;
 107         iy = (iy & 0x007fffff) | 0x00800000;
 108         m = (unsigned long long)ix * iy;
 109         n = m;
 110         w = -1;
 111         while (n) { n >>= 1; w++; }
 112
 113         /* FIXME: use guard bits */
 114         exp += w - 126 - 46;
 115         if (exp > 0)
 116                 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
 117         else if (exp + 22 >= 0)
 118                 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
 119         else
 120                 ix = 0;
 121
 122         ix |= (hx ^ hy) & 0x80000000;
 123         return ix;
 124 }
 125
 126 /* denormalized double * double */
 127 static void mult64(unsigned long long x, unsigned long long y,
 128                 unsigned long long *highp, unsigned long long *lowp)
 129 {
 130         unsigned long long sub0, sub1, sub2, sub3;
 131         unsigned long long high, low;
 132
 133         sub0 = (x >> 32) * (unsigned long) (y >> 32);
 134         sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
 135         sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
 136         sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
 137         low = sub3;
 138         high = 0LL;
 139         sub3 += (sub1 << 32);
 140         if (low > sub3)
 141                 high++;
 142         low = sub3;
 143         sub3 += (sub2 << 32);
 144         if (low > sub3)
 145                 high++;
 146         low = sub3;
 147         high += (sub1 >> 32) + (sub2 >> 32);
 148         high += sub0;
 149         *lowp = low;
 150         *highp = high;
 151 }
 152
 153 static inline long long rshift64(unsigned long long mh,
 154                 unsigned long long ml, int n)
 155 {
 156         if (n >= 64)
 157                 return mh >> (n - 64);
 158         return (mh << (64 - n)) | (ml >> n);
 159 }
 160
 161 static long long denormal_muld(long long hx, long long hy)
 162 {
 163         unsigned long long ix, iy;
 164         unsigned long long mh, ml, nh, nl;
 165         int exp, w;
 166
 167         ix = hx & 0x7fffffffffffffffLL;
 168         iy = hy & 0x7fffffffffffffffLL;
 169         if (iy < 0x0010000000000000LL || ix == 0)
 170                 return ((hx ^ hy) & 0x8000000000000000LL);
 171
 172         exp = (iy & 0x7ff0000000000000LL) >> 52;
 173         ix &= 0x000fffffffffffffLL;
 174         iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
 175         mult64(ix, iy, &mh, &ml);
 176         nh = mh;
 177         nl = ml;
 178         w = -1;
 179         if (nh) {
 180                 while (nh) { nh >>= 1; w++;}
 181                 w += 64;
 182         } else
 183                 while (nl) { nl >>= 1; w++;}
 184
 185         /* FIXME: use guard bits */
 186         exp += w - 1022 - 52 * 2;
 187         if (exp > 0)
 188                 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
 189                         | ((long long)exp << 52);
 190         else if (exp + 51 >= 0)
 191                 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
 192         else
 193                 ix = 0;
 194
 195         ix |= (hx ^ hy) & 0x8000000000000000LL;
 196         return ix;
 197 }
 198
 199 /* ix - iy where iy: denormal and ix, iy >= 0 */
 200 static int denormal_subf1(unsigned int ix, unsigned int iy)
 201 {
 202         int frac;
 203         int exp;
 204
 205         if (ix < 0x00800000)
 206                 return ix - iy;
 207
 208         exp = (ix & 0x7f800000) >> 23;
 209         if (exp - 1 > 31)
 210                 return ix;
 211         iy >>= exp - 1;
 212         if (iy == 0)
 213                 return ix;
 214
 215         frac = (ix & 0x007fffff) | 0x00800000;
 216         frac -= iy;
 217         while (frac < 0x00800000) {
 218                 if (--exp == 0)
 219                         return frac;
 220                 frac <<= 1;
 221         }
 222
 223         return (exp << 23) | (frac & 0x007fffff);
 224 }
 225
 226 /* ix + iy where iy: denormal and ix, iy >= 0 */
 227 static int denormal_addf1(unsigned int ix, unsigned int iy)
 228 {
 229         int frac;
 230         int exp;
 231
 232         if (ix < 0x00800000)
 233                 return ix + iy;
 234
 235         exp = (ix & 0x7f800000) >> 23;
 236         if (exp - 1 > 31)
 237                 return ix;
 238         iy >>= exp - 1;
 239         if (iy == 0)
 240           return ix;
 241
 242         frac = (ix & 0x007fffff) | 0x00800000;
 243         frac += iy;
 244         if (frac >= 0x01000000) {
 245                 frac >>= 1;
 246                 ++exp;
 247         }
 248
 249         return (exp << 23) | (frac & 0x007fffff);
 250 }
 251
 252 static int denormal_addf(int hx, int hy)
 253 {
 254         unsigned int ix, iy;
 255         int sign;
 256
 257         if ((hx ^ hy) & 0x80000000) {
 258                 sign = hx & 0x80000000;
 259                 ix = hx & 0x7fffffff;
 260                 iy = hy & 0x7fffffff;
 261                 if (iy < 0x00800000) {
 262                         ix = denormal_subf1(ix, iy);
 263                         if ((int) ix < 0) {
 264                                 ix = -ix;
 265                                 sign ^= 0x80000000;
 266                         }
 267                 } else {
 268                         ix = denormal_subf1(iy, ix);
 269                         sign ^= 0x80000000;
 270                 }
 271         } else {
 272                 sign = hx & 0x80000000;
 273                 ix = hx & 0x7fffffff;
 274                 iy = hy & 0x7fffffff;
 275                 if (iy < 0x00800000)
 276                         ix = denormal_addf1(ix, iy);
 277                 else
 278                         ix = denormal_addf1(iy, ix);
 279         }
 280
 281         return sign | ix;
 282 }
 283
 284 /* ix - iy where iy: denormal and ix, iy >= 0 */
 285 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
 286 {
 287         long long frac;
 288         int exp;
 289
 290         if (ix < 0x0010000000000000LL)
 291                 return ix - iy;
 292
 293         exp = (ix & 0x7ff0000000000000LL) >> 52;
 294         if (exp - 1 > 63)
 295                 return ix;
 296         iy >>= exp - 1;
 297         if (iy == 0)
 298                 return ix;
 299
 300         frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
 301         frac -= iy;
 302         while (frac < 0x0010000000000000LL) {
 303                 if (--exp == 0)
 304                         return frac;
 305                 frac <<= 1;
 306         }
 307
 308         return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
 309 }
 310
 311 /* ix + iy where iy: denormal and ix, iy >= 0 */
 312 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
 313 {
 314         long long frac;
 315         long long exp;
 316
 317         if (ix < 0x0010000000000000LL)
 318                 return ix + iy;
 319
 320         exp = (ix & 0x7ff0000000000000LL) >> 52;
 321         if (exp - 1 > 63)
 322                 return ix;
 323         iy >>= exp - 1;
 324         if (iy == 0)
 325           return ix;
 326
 327         frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
 328         frac += iy;
 329         if (frac >= 0x0020000000000000LL) {
 330                 frac >>= 1;
 331                 ++exp;
 332         }
 333
 334         return (exp << 52) | (frac & 0x000fffffffffffffLL);
 335 }
 336
 337 static long long denormal_addd(long long hx, long long hy)
 338 {
 339         unsigned long long ix, iy;
 340         long long sign;
 341
 342         if ((hx ^ hy) & 0x8000000000000000LL) {
 343                 sign = hx & 0x8000000000000000LL;
 344                 ix = hx & 0x7fffffffffffffffLL;
 345                 iy = hy & 0x7fffffffffffffffLL;
 346                 if (iy < 0x0010000000000000LL) {
 347                         ix = denormal_subd1(ix, iy);
 348                         if ((int) ix < 0) {
 349                                 ix = -ix;
 350                                 sign ^= 0x8000000000000000LL;
 351                         }
 352                 } else {
 353                         ix = denormal_subd1(iy, ix);
 354                         sign ^= 0x8000000000000000LL;
 355                 }
 356         } else {
 357                 sign = hx & 0x8000000000000000LL;
 358                 ix = hx & 0x7fffffffffffffffLL;
 359                 iy = hy & 0x7fffffffffffffffLL;
 360                 if (iy < 0x0010000000000000LL)
 361                         ix = denormal_addd1(ix, iy);
 362                 else
 363                         ix = denormal_addd1(iy, ix);
 364         }
 365
 366         return sign | ix;
 367 }
 368
 369 /**
 370  *      denormal_to_double - Given denormalized float number,
 371  *                           store double float
 372  *
 373  *      @fpu: Pointer to sh_fpu_hard structure
 374  *      @n: Index to FP register
 375  */
 376 static void
 377 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
 378 {
 379         unsigned long du, dl;
 380         unsigned long x = fpu->fpul;
 381         int exp = 1023 - 126;
 382
 383         if (x != 0 && (x & 0x7f800000) == 0) {
 384                 du = (x & 0x80000000);
 385                 while ((x & 0x00800000) == 0) {
 386                         x <<= 1;
 387                         exp--;
 388                 }
 389                 x &= 0x007fffff;
 390                 du |= (exp << 20) | (x >> 3);
 391                 dl = x << 29;
 392
 393                 fpu->fp_regs[n] = du;
 394                 fpu->fp_regs[n+1] = dl;
 395         }
 396 }
 397
 398 /**
 399  *      ieee_fpe_handler - Handle denormalized number exception
 400  *
 401  *      @regs: Pointer to register structure
 402  *
 403  *      Returns 1 when it's handled (should not cause exception).
 404  */
 405 static int
 406 ieee_fpe_handler (struct pt_regs *regs)
 407 {
 408         unsigned short insn = *(unsigned short *) regs->pc;
 409         unsigned short finsn;
 410         unsigned long nextpc;
 411         int nib[4] = {
 412                 (insn >> 12) & 0xf,
 413                 (insn >> 8) & 0xf,
 414                 (insn >> 4) & 0xf,
 415                 insn & 0xf};
 416
 417         if (nib[0] == 0xb ||
 418             (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
 419                 regs->pr = regs->pc + 4;
 420         if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
 421                 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
 422                 finsn = *(unsigned short *) (regs->pc + 2);
 423         } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
 424                 if (regs->sr & 1)
 425                         nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
 426                 else
 427                         nextpc = regs->pc + 4;
 428                 finsn = *(unsigned short *) (regs->pc + 2);
 429         } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
 430                 if (regs->sr & 1)
 431                         nextpc = regs->pc + 4;
 432                 else
 433                         nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
 434                 finsn = *(unsigned short *) (regs->pc + 2);
 435         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
 436                  (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
 437                 nextpc = regs->regs[nib[1]];
 438                 finsn = *(unsigned short *) (regs->pc + 2);
 439         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
 440                  (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
 441                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
 442                 finsn = *(unsigned short *) (regs->pc + 2);
 443         } else if (insn == 0x000b) { /* rts */
 444                 nextpc = regs->pr;
 445                 finsn = *(unsigned short *) (regs->pc + 2);
 446         } else {
 447                 nextpc = regs->pc + 2;
 448                 finsn = insn;
 449         }
 450
 451 #define FPSCR_FPU_ERROR (1 << 17)
 452
 453         if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
 454                 struct task_struct *tsk = current;
 455
 456                 if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_FPU_ERROR)) {
 457                         /* FPU error */
 458                         denormal_to_double (&tsk->thread.xstate->hardfpu,
 459                                             (finsn >> 8) & 0xf);
 460                 } else
 461                         return 0;
 462
 463                 regs->pc = nextpc;
 464                 return 1;
 465         } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
 466                 struct task_struct *tsk = current;
 467                 int fpscr;
 468                 int n, m, prec;
 469                 unsigned int hx, hy;
 470
 471                 n = (finsn >> 8) & 0xf;
 472                 m = (finsn >> 4) & 0xf;
 473                 hx = tsk->thread.xstate->hardfpu.fp_regs[n];
 474                 hy = tsk->thread.xstate->hardfpu.fp_regs[m];
 475                 fpscr = tsk->thread.xstate->hardfpu.fpscr;
 476                 prec = fpscr & (1 << 19);
 477
 478                 if ((fpscr & FPSCR_FPU_ERROR)
 479                      && (prec && ((hx & 0x7fffffff) < 0x00100000
 480                                    || (hy & 0x7fffffff) < 0x00100000))) {
 481                         long long llx, lly;
 482
 483                         /* FPU error because of denormal */
 484                         llx = ((long long) hx << 32)
 485                                | tsk->thread.xstate->hardfpu.fp_regs[n+1];
 486                         lly = ((long long) hy << 32)
 487                                | tsk->thread.xstate->hardfpu.fp_regs[m+1];
 488                         if ((hx & 0x7fffffff) >= 0x00100000)
 489                                 llx = denormal_muld(lly, llx);
 490                         else
 491                                 llx = denormal_muld(llx, lly);
 492                         tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
 493                         tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff;
 494                 } else if ((fpscr & FPSCR_FPU_ERROR)
 495                      && (!prec && ((hx & 0x7fffffff) < 0x00800000
 496                                    || (hy & 0x7fffffff) < 0x00800000))) {
 497                         /* FPU error because of denormal */
 498                         if ((hx & 0x7fffffff) >= 0x00800000)
 499                                 hx = denormal_mulf(hy, hx);
 500                         else
 501                                 hx = denormal_mulf(hx, hy);
 502                         tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 503                 } else
 504                         return 0;
 505
 506                 regs->pc = nextpc;
 507                 return 1;
 508         } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
 509                 struct task_struct *tsk = current;
 510                 int fpscr;
 511                 int n, m, prec;
 512                 unsigned int hx, hy;
 513
 514                 n = (finsn >> 8) & 0xf;
 515                 m = (finsn >> 4) & 0xf;
 516                 hx = tsk->thread.xstate->hardfpu.fp_regs[n];
 517                 hy = tsk->thread.xstate->hardfpu.fp_regs[m];
 518                 fpscr = tsk->thread.xstate->hardfpu.fpscr;
 519                 prec = fpscr & (1 << 19);
 520
 521                 if ((fpscr & FPSCR_FPU_ERROR)
 522                      && (prec && ((hx & 0x7fffffff) < 0x00100000
 523                                    || (hy & 0x7fffffff) < 0x00100000))) {
 524                         long long llx, lly;
 525
 526                         /* FPU error because of denormal */
 527                         llx = ((long long) hx << 32)
 528                                | tsk->thread.xstate->hardfpu.fp_regs[n+1];
 529                         lly = ((long long) hy << 32)
 530                                | tsk->thread.xstate->hardfpu.fp_regs[m+1];
 531                         if ((finsn & 0xf00f) == 0xf000)
 532                                 llx = denormal_addd(llx, lly);
 533                         else
 534                                 llx = denormal_addd(llx, lly ^ (1LL << 63));
 535                         tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
 536                         tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff;
 537                 } else if ((fpscr & FPSCR_FPU_ERROR)
 538                      && (!prec && ((hx & 0x7fffffff) < 0x00800000
 539                                    || (hy & 0x7fffffff) < 0x00800000))) {
 540                         /* FPU error because of denormal */
 541                         if ((finsn & 0xf00f) == 0xf000)
 542                                 hx = denormal_addf(hx, hy);
 543                         else
 544                                 hx = denormal_addf(hx, hy ^ 0x80000000);
 545                         tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 546                 } else
 547                         return 0;
 548
 549                 regs->pc = nextpc;
 550                 return 1;
 551         }
 552
 553         return 0;
 554 }
 555
 556 BUILD_TRAP_HANDLER(fpu_error)
 557 {
 558         struct task_struct *tsk = current;
 559         TRAP_HANDLER_DECL;
 560
 561         __unlazy_fpu(tsk, regs);
 562         if (ieee_fpe_handler(regs)) {
 563                 tsk->thread.xstate->hardfpu.fpscr &=
 564                         ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 565                 grab_fpu(regs);
 566                 restore_fpu(tsk);
 567                 task_thread_info(tsk)->status |= TS_USEDFPU;
 568                 return;
 569         }
 570
 571         force_sig(SIGFPE);
 572 }