arch/arm/vfp/vfpsingle.c

   1 /*
   2  *  linux/arch/arm/vfp/vfpsingle.c
   3  *
   4  * This code is derived in part from John R. Housers softfloat library, which
   5  * carries the following notice:
   6  *
   7  * ===========================================================================
   8  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9  * Arithmetic Package, Release 2.
  10  *
  11  * Written by John R. Hauser.  This work was made possible in part by the
  12  * International Computer Science Institute, located at Suite 600, 1947 Center
  13  * Street, Berkeley, California 94704.  Funding was partially provided by the
  14  * National Science Foundation under grant MIP-9311980.  The original version
  15  * of this code was written as part of a project to build a fixed-point vector
  16  * processor in collaboration with the University of California at Berkeley,
  17  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19  * arithmetic/softfloat.html'.
  20  *
  21  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26  *
  27  * Derivative works are acceptable, even for commercial purposes, so long as
  28  * (1) they include prominent notice that the work is derivative, and (2) they
  29  * include prominent notice akin to these three paragraphs for those parts of
  30  * this code that are retained.
  31  * ===========================================================================
  32  */
  33 #include <linux/kernel.h>
  34 #include <linux/bitops.h>
  35
  36 #include <asm/div64.h>
  37 #include <asm/vfp.h>
  38
  39 #include "vfpinstr.h"
  40 #include "vfp.h"
  41
  42 static struct vfp_single vfp_single_default_qnan = {
  43         .exponent       = 255,
  44         .sign           = 0,
  45         .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
  46 };
  47
  48 static void vfp_single_dump(const char *str, struct vfp_single *s)
  49 {
  50         pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
  51                  str, s->sign != 0, s->exponent, s->significand);
  52 }
  53
  54 static void vfp_single_normalise_denormal(struct vfp_single *vs)
  55 {
  56         int bits = 31 - fls(vs->significand);
  57
  58         vfp_single_dump("normalise_denormal: in", vs);
  59
  60         if (bits) {
  61                 vs->exponent -= bits - 1;
  62                 vs->significand <<= bits;
  63         }
  64
  65         vfp_single_dump("normalise_denormal: out", vs);
  66 }
  67
  68 #ifndef DEBUG
  69 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  70 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
  71 #else
  72 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
  73 #endif
  74 {
  75         u32 significand, incr, rmode;
  76         int exponent, shift, underflow;
  77
  78         vfp_single_dump("pack: in", vs);
  79
  80         /*
  81          * Infinities and NaNs are a special case.
  82          */
  83         if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
  84                 goto pack;
  85
  86         /*
  87          * Special-case zero.
  88          */
  89         if (vs->significand == 0) {
  90                 vs->exponent = 0;
  91                 goto pack;
  92         }
  93
  94         exponent = vs->exponent;
  95         significand = vs->significand;
  96
  97         /*
  98          * Normalise first.  Note that we shift the significand up to
  99          * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
 100          * significant bit.
 101          */
 102         shift = 32 - fls(significand);
 103         if (shift < 32 && shift) {
 104                 exponent -= shift;
 105                 significand <<= shift;
 106         }
 107
 108 #ifdef DEBUG
 109         vs->exponent = exponent;
 110         vs->significand = significand;
 111         vfp_single_dump("pack: normalised", vs);
 112 #endif
 113
 114         /*
 115          * Tiny number?
 116          */
 117         underflow = exponent < 0;
 118         if (underflow) {
 119                 significand = vfp_shiftright32jamming(significand, -exponent);
 120                 exponent = 0;
 121 #ifdef DEBUG
 122                 vs->exponent = exponent;
 123                 vs->significand = significand;
 124                 vfp_single_dump("pack: tiny number", vs);
 125 #endif
 126                 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
 127                         underflow = 0;
 128         }
 129
 130         /*
 131          * Select rounding increment.
 132          */
 133         incr = 0;
 134         rmode = fpscr & FPSCR_RMODE_MASK;
 135
 136         if (rmode == FPSCR_ROUND_NEAREST) {
 137                 incr = 1 << VFP_SINGLE_LOW_BITS;
 138                 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
 139                         incr -= 1;
 140         } else if (rmode == FPSCR_ROUND_TOZERO) {
 141                 incr = 0;
 142         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
 143                 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
 144
 145         pr_debug("VFP: rounding increment = 0x%08x\n", incr);
 146
 147         /*
 148          * Is our rounding going to overflow?
 149          */
 150         if ((significand + incr) < significand) {
 151                 exponent += 1;
 152                 significand = (significand >> 1) | (significand & 1);
 153                 incr >>= 1;
 154 #ifdef DEBUG
 155                 vs->exponent = exponent;
 156                 vs->significand = significand;
 157                 vfp_single_dump("pack: overflow", vs);
 158 #endif
 159         }
 160
 161         /*
 162          * If any of the low bits (which will be shifted out of the
 163          * number) are non-zero, the result is inexact.
 164          */
 165         if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
 166                 exceptions |= FPSCR_IXC;
 167
 168         /*
 169          * Do our rounding.
 170          */
 171         significand += incr;
 172
 173         /*
 174          * Infinity?
 175          */
 176         if (exponent >= 254) {
 177                 exceptions |= FPSCR_OFC | FPSCR_IXC;
 178                 if (incr == 0) {
 179                         vs->exponent = 253;
 180                         vs->significand = 0x7fffffff;
 181                 } else {
 182                         vs->exponent = 255;             /* infinity */
 183                         vs->significand = 0;
 184                 }
 185         } else {
 186                 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
 187                         exponent = 0;
 188                 if (exponent || significand > 0x80000000)
 189                         underflow = 0;
 190                 if (underflow)
 191                         exceptions |= FPSCR_UFC;
 192                 vs->exponent = exponent;
 193                 vs->significand = significand >> 1;
 194         }
 195
 196  pack:
 197         vfp_single_dump("pack: final", vs);
 198         {
 199                 s32 d = vfp_single_pack(vs);
 200 #ifdef DEBUG
 201                 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
 202                          sd, d, exceptions);
 203 #endif
 204                 vfp_put_float(d, sd);
 205         }
 206
 207         return exceptions;
 208 }
 209
 210 /*
 211  * Propagate the NaN, setting exceptions if it is signalling.
 212  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 213  */
 214 static u32
 215 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
 216                   struct vfp_single *vsm, u32 fpscr)
 217 {
 218         struct vfp_single *nan;
 219         int tn, tm = 0;
 220
 221         tn = vfp_single_type(vsn);
 222
 223         if (vsm)
 224                 tm = vfp_single_type(vsm);
 225
 226         if (fpscr & FPSCR_DEFAULT_NAN)
 227                 /*
 228                  * Default NaN mode - always returns a quiet NaN
 229                  */
 230                 nan = &vfp_single_default_qnan;
 231         else {
 232                 /*
 233                  * Contemporary mode - select the first signalling
 234                  * NAN, or if neither are signalling, the first
 235                  * quiet NAN.
 236                  */
 237                 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 238                         nan = vsn;
 239                 else
 240                         nan = vsm;
 241                 /*
 242                  * Make the NaN quiet.
 243                  */
 244                 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 245         }
 246
 247         *vsd = *nan;
 248
 249         /*
 250          * If one was a signalling NAN, raise invalid operation.
 251          */
 252         return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 253 }
 254
 255
 256 /*
 257  * Extended operations
 258  */
 259 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
 260 {
 261         vfp_put_float(vfp_single_packed_abs(m), sd);
 262         return 0;
 263 }
 264
 265 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
 266 {
 267         vfp_put_float(m, sd);
 268         return 0;
 269 }
 270
 271 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
 272 {
 273         vfp_put_float(vfp_single_packed_negate(m), sd);
 274         return 0;
 275 }
 276
 277 static const u16 sqrt_oddadjust[] = {
 278         0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
 279         0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
 280 };
 281
 282 static const u16 sqrt_evenadjust[] = {
 283         0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
 284         0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
 285 };
 286
 287 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
 288 {
 289         int index;
 290         u32 z, a;
 291
 292         if ((significand & 0xc0000000) != 0x40000000) {
 293                 pr_warn("VFP: estimate_sqrt: invalid significand\n");
 294         }
 295
 296         a = significand << 1;
 297         index = (a >> 27) & 15;
 298         if (exponent & 1) {
 299                 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
 300                 z = ((a / z) << 14) + (z << 15);
 301                 a >>= 1;
 302         } else {
 303                 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
 304                 z = a / z + z;
 305                 z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
 306                 if (z <= a)
 307                         return (s32)a >> 1;
 308         }
 309         {
 310                 u64 v = (u64)a << 31;
 311                 do_div(v, z);
 312                 return v + (z >> 1);
 313         }
 314 }
 315
 316 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
 317 {
 318         struct vfp_single vsm, vsd;
 319         int ret, tm;
 320
 321         vfp_single_unpack(&vsm, m);
 322         tm = vfp_single_type(&vsm);
 323         if (tm & (VFP_NAN|VFP_INFINITY)) {
 324                 struct vfp_single *vsp = &vsd;
 325
 326                 if (tm & VFP_NAN)
 327                         ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
 328                 else if (vsm.sign == 0) {
 329  sqrt_copy:
 330                         vsp = &vsm;
 331                         ret = 0;
 332                 } else {
 333  sqrt_invalid:
 334                         vsp = &vfp_single_default_qnan;
 335                         ret = FPSCR_IOC;
 336                 }
 337                 vfp_put_float(vfp_single_pack(vsp), sd);
 338                 return ret;
 339         }
 340
 341         /*
 342          * sqrt(+/- 0) == +/- 0
 343          */
 344         if (tm & VFP_ZERO)
 345                 goto sqrt_copy;
 346
 347         /*
 348          * Normalise a denormalised number
 349          */
 350         if (tm & VFP_DENORMAL)
 351                 vfp_single_normalise_denormal(&vsm);
 352
 353         /*
 354          * sqrt(<0) = invalid
 355          */
 356         if (vsm.sign)
 357                 goto sqrt_invalid;
 358
 359         vfp_single_dump("sqrt", &vsm);
 360
 361         /*
 362          * Estimate the square root.
 363          */
 364         vsd.sign = 0;
 365         vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
 366         vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
 367
 368         vfp_single_dump("sqrt estimate", &vsd);
 369
 370         /*
 371          * And now adjust.
 372          */
 373         if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
 374                 if (vsd.significand < 2) {
 375                         vsd.significand = 0xffffffff;
 376                 } else {
 377                         u64 term;
 378                         s64 rem;
 379                         vsm.significand <<= !(vsm.exponent & 1);
 380                         term = (u64)vsd.significand * vsd.significand;
 381                         rem = ((u64)vsm.significand << 32) - term;
 382
 383                         pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
 384
 385                         while (rem < 0) {
 386                                 vsd.significand -= 1;
 387                                 rem += ((u64)vsd.significand << 1) | 1;
 388                         }
 389                         vsd.significand |= rem != 0;
 390                 }
 391         }
 392         vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
 393
 394         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
 395 }
 396
 397 /*
 398  * Equal        := ZC
 399  * Less than    := N
 400  * Greater than := C
 401  * Unordered    := CV
 402  */
 403 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
 404 {
 405         s32 d;
 406         u32 ret = 0;
 407
 408         d = vfp_get_float(sd);
 409         if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
 410                 ret |= FPSCR_C | FPSCR_V;
 411                 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 412                         /*
 413                          * Signalling NaN, or signalling on quiet NaN
 414                          */
 415                         ret |= FPSCR_IOC;
 416         }
 417
 418         if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
 419                 ret |= FPSCR_C | FPSCR_V;
 420                 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 421                         /*
 422                          * Signalling NaN, or signalling on quiet NaN
 423                          */
 424                         ret |= FPSCR_IOC;
 425         }
 426
 427         if (ret == 0) {
 428                 if (d == m || vfp_single_packed_abs(d | m) == 0) {
 429                         /*
 430                          * equal
 431                          */
 432                         ret |= FPSCR_Z | FPSCR_C;
 433                 } else if (vfp_single_packed_sign(d ^ m)) {
 434                         /*
 435                          * different signs
 436                          */
 437                         if (vfp_single_packed_sign(d))
 438                                 /*
 439                                  * d is negative, so d < m
 440                                  */
 441                                 ret |= FPSCR_N;
 442                         else
 443                                 /*
 444                                  * d is positive, so d > m
 445                                  */
 446                                 ret |= FPSCR_C;
 447                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
 448                         /*
 449                          * d < m
 450                          */
 451                         ret |= FPSCR_N;
 452                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
 453                         /*
 454                          * d > m
 455                          */
 456                         ret |= FPSCR_C;
 457                 }
 458         }
 459         return ret;
 460 }
 461
 462 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
 463 {
 464         return vfp_compare(sd, 0, m, fpscr);
 465 }
 466
 467 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
 468 {
 469         return vfp_compare(sd, 1, m, fpscr);
 470 }
 471
 472 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
 473 {
 474         return vfp_compare(sd, 0, 0, fpscr);
 475 }
 476
 477 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
 478 {
 479         return vfp_compare(sd, 1, 0, fpscr);
 480 }
 481
 482 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
 483 {
 484         struct vfp_single vsm;
 485         struct vfp_double vdd;
 486         int tm;
 487         u32 exceptions = 0;
 488
 489         vfp_single_unpack(&vsm, m);
 490
 491         tm = vfp_single_type(&vsm);
 492
 493         /*
 494          * If we have a signalling NaN, signal invalid operation.
 495          */
 496         if (tm == VFP_SNAN)
 497                 exceptions = FPSCR_IOC;
 498
 499         if (tm & VFP_DENORMAL)
 500                 vfp_single_normalise_denormal(&vsm);
 501
 502         vdd.sign = vsm.sign;
 503         vdd.significand = (u64)vsm.significand << 32;
 504
 505         /*
 506          * If we have an infinity or NaN, the exponent must be 2047.
 507          */
 508         if (tm & (VFP_INFINITY|VFP_NAN)) {
 509                 vdd.exponent = 2047;
 510                 if (tm == VFP_QNAN)
 511                         vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 512                 goto pack_nan;
 513         } else if (tm & VFP_ZERO)
 514                 vdd.exponent = 0;
 515         else
 516                 vdd.exponent = vsm.exponent + (1023 - 127);
 517
 518         return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
 519
 520  pack_nan:
 521         vfp_put_double(vfp_double_pack(&vdd), dd);
 522         return exceptions;
 523 }
 524
 525 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
 526 {
 527         struct vfp_single vs;
 528
 529         vs.sign = 0;
 530         vs.exponent = 127 + 31 - 1;
 531         vs.significand = (u32)m;
 532
 533         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
 534 }
 535
 536 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
 537 {
 538         struct vfp_single vs;
 539
 540         vs.sign = (m & 0x80000000) >> 16;
 541         vs.exponent = 127 + 31 - 1;
 542         vs.significand = vs.sign ? -m : m;
 543
 544         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
 545 }
 546
 547 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
 548 {
 549         struct vfp_single vsm;
 550         u32 d, exceptions = 0;
 551         int rmode = fpscr & FPSCR_RMODE_MASK;
 552         int tm;
 553
 554         vfp_single_unpack(&vsm, m);
 555         vfp_single_dump("VSM", &vsm);
 556
 557         /*
 558          * Do we have a denormalised number?
 559          */
 560         tm = vfp_single_type(&vsm);
 561         if (tm & VFP_DENORMAL)
 562                 exceptions |= FPSCR_IDC;
 563
 564         if (tm & VFP_NAN)
 565                 vsm.sign = 0;
 566
 567         if (vsm.exponent >= 127 + 32) {
 568                 d = vsm.sign ? 0 : 0xffffffff;
 569                 exceptions = FPSCR_IOC;
 570         } else if (vsm.exponent >= 127 - 1) {
 571                 int shift = 127 + 31 - vsm.exponent;
 572                 u32 rem, incr = 0;
 573
 574                 /*
 575                  * 2^0 <= m < 2^32-2^8
 576                  */
 577                 d = (vsm.significand << 1) >> shift;
 578                 rem = vsm.significand << (33 - shift);
 579
 580                 if (rmode == FPSCR_ROUND_NEAREST) {
 581                         incr = 0x80000000;
 582                         if ((d & 1) == 0)
 583                                 incr -= 1;
 584                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 585                         incr = 0;
 586                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 587                         incr = ~0;
 588                 }
 589
 590                 if ((rem + incr) < rem) {
 591                         if (d < 0xffffffff)
 592                                 d += 1;
 593                         else
 594                                 exceptions |= FPSCR_IOC;
 595                 }
 596
 597                 if (d && vsm.sign) {
 598                         d = 0;
 599                         exceptions |= FPSCR_IOC;
 600                 } else if (rem)
 601                         exceptions |= FPSCR_IXC;
 602         } else {
 603                 d = 0;
 604                 if (vsm.exponent | vsm.significand) {
 605                         exceptions |= FPSCR_IXC;
 606                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 607                                 d = 1;
 608                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
 609                                 d = 0;
 610                                 exceptions |= FPSCR_IOC;
 611                         }
 612                 }
 613         }
 614
 615         pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 616
 617         vfp_put_float(d, sd);
 618
 619         return exceptions;
 620 }
 621
 622 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
 623 {
 624         return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
 625 }
 626
 627 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
 628 {
 629         struct vfp_single vsm;
 630         u32 d, exceptions = 0;
 631         int rmode = fpscr & FPSCR_RMODE_MASK;
 632         int tm;
 633
 634         vfp_single_unpack(&vsm, m);
 635         vfp_single_dump("VSM", &vsm);
 636
 637         /*
 638          * Do we have a denormalised number?
 639          */
 640         tm = vfp_single_type(&vsm);
 641         if (vfp_single_type(&vsm) & VFP_DENORMAL)
 642                 exceptions |= FPSCR_IDC;
 643
 644         if (tm & VFP_NAN) {
 645                 d = 0;
 646                 exceptions |= FPSCR_IOC;
 647         } else if (vsm.exponent >= 127 + 32) {
 648                 /*
 649                  * m >= 2^31-2^7: invalid
 650                  */
 651                 d = 0x7fffffff;
 652                 if (vsm.sign)
 653                         d = ~d;
 654                 exceptions |= FPSCR_IOC;
 655         } else if (vsm.exponent >= 127 - 1) {
 656                 int shift = 127 + 31 - vsm.exponent;
 657                 u32 rem, incr = 0;
 658
 659                 /* 2^0 <= m <= 2^31-2^7 */
 660                 d = (vsm.significand << 1) >> shift;
 661                 rem = vsm.significand << (33 - shift);
 662
 663                 if (rmode == FPSCR_ROUND_NEAREST) {
 664                         incr = 0x80000000;
 665                         if ((d & 1) == 0)
 666                                 incr -= 1;
 667                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 668                         incr = 0;
 669                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 670                         incr = ~0;
 671                 }
 672
 673                 if ((rem + incr) < rem && d < 0xffffffff)
 674                         d += 1;
 675                 if (d > 0x7fffffff + (vsm.sign != 0)) {
 676                         d = 0x7fffffff + (vsm.sign != 0);
 677                         exceptions |= FPSCR_IOC;
 678                 } else if (rem)
 679                         exceptions |= FPSCR_IXC;
 680
 681                 if (vsm.sign)
 682                         d = -d;
 683         } else {
 684                 d = 0;
 685                 if (vsm.exponent | vsm.significand) {
 686                         exceptions |= FPSCR_IXC;
 687                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 688                                 d = 1;
 689                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
 690                                 d = -1;
 691                 }
 692         }
 693
 694         pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 695
 696         vfp_put_float((s32)d, sd);
 697
 698         return exceptions;
 699 }
 700
 701 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
 702 {
 703         return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 704 }
 705
 706 static struct op fops_ext[32] = {
 707         [FEXT_TO_IDX(FEXT_FCPY)]        = { vfp_single_fcpy,   0 },
 708         [FEXT_TO_IDX(FEXT_FABS)]        = { vfp_single_fabs,   0 },
 709         [FEXT_TO_IDX(FEXT_FNEG)]        = { vfp_single_fneg,   0 },
 710         [FEXT_TO_IDX(FEXT_FSQRT)]       = { vfp_single_fsqrt,  0 },
 711         [FEXT_TO_IDX(FEXT_FCMP)]        = { vfp_single_fcmp,   OP_SCALAR },
 712         [FEXT_TO_IDX(FEXT_FCMPE)]       = { vfp_single_fcmpe,  OP_SCALAR },
 713         [FEXT_TO_IDX(FEXT_FCMPZ)]       = { vfp_single_fcmpz,  OP_SCALAR },
 714         [FEXT_TO_IDX(FEXT_FCMPEZ)]      = { vfp_single_fcmpez, OP_SCALAR },
 715         [FEXT_TO_IDX(FEXT_FCVT)]        = { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
 716         [FEXT_TO_IDX(FEXT_FUITO)]       = { vfp_single_fuito,  OP_SCALAR },
 717         [FEXT_TO_IDX(FEXT_FSITO)]       = { vfp_single_fsito,  OP_SCALAR },
 718         [FEXT_TO_IDX(FEXT_FTOUI)]       = { vfp_single_ftoui,  OP_SCALAR },
 719         [FEXT_TO_IDX(FEXT_FTOUIZ)]      = { vfp_single_ftouiz, OP_SCALAR },
 720         [FEXT_TO_IDX(FEXT_FTOSI)]       = { vfp_single_ftosi,  OP_SCALAR },
 721         [FEXT_TO_IDX(FEXT_FTOSIZ)]      = { vfp_single_ftosiz, OP_SCALAR },
 722 };
 723
 724
 725
 726
 727
 728 static u32
 729 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
 730                           struct vfp_single *vsm, u32 fpscr)
 731 {
 732         struct vfp_single *vsp;
 733         u32 exceptions = 0;
 734         int tn, tm;
 735
 736         tn = vfp_single_type(vsn);
 737         tm = vfp_single_type(vsm);
 738
 739         if (tn & tm & VFP_INFINITY) {
 740                 /*
 741                  * Two infinities.  Are they different signs?
 742                  */
 743                 if (vsn->sign ^ vsm->sign) {
 744                         /*
 745                          * different signs -> invalid
 746                          */
 747                         exceptions = FPSCR_IOC;
 748                         vsp = &vfp_single_default_qnan;
 749                 } else {
 750                         /*
 751                          * same signs -> valid
 752                          */
 753                         vsp = vsn;
 754                 }
 755         } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 756                 /*
 757                  * One infinity and one number -> infinity
 758                  */
 759                 vsp = vsn;
 760         } else {
 761                 /*
 762                  * 'n' is a NaN of some type
 763                  */
 764                 return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 765         }
 766         *vsd = *vsp;
 767         return exceptions;
 768 }
 769
 770 static u32
 771 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
 772                struct vfp_single *vsm, u32 fpscr)
 773 {
 774         u32 exp_diff, m_sig;
 775
 776         if (vsn->significand & 0x80000000 ||
 777             vsm->significand & 0x80000000) {
 778                 pr_info("VFP: bad FP values in %s\n", __func__);
 779                 vfp_single_dump("VSN", vsn);
 780                 vfp_single_dump("VSM", vsm);
 781         }
 782
 783         /*
 784          * Ensure that 'n' is the largest magnitude number.  Note that
 785          * if 'n' and 'm' have equal exponents, we do not swap them.
 786          * This ensures that NaN propagation works correctly.
 787          */
 788         if (vsn->exponent < vsm->exponent) {
 789                 struct vfp_single *t = vsn;
 790                 vsn = vsm;
 791                 vsm = t;
 792         }
 793
 794         /*
 795          * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 796          * infinity or a NaN here.
 797          */
 798         if (vsn->exponent == 255)
 799                 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
 800
 801         /*
 802          * We have two proper numbers, where 'vsn' is the larger magnitude.
 803          *
 804          * Copy 'n' to 'd' before doing the arithmetic.
 805          */
 806         *vsd = *vsn;
 807
 808         /*
 809          * Align both numbers.
 810          */
 811         exp_diff = vsn->exponent - vsm->exponent;
 812         m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
 813
 814         /*
 815          * If the signs are different, we are really subtracting.
 816          */
 817         if (vsn->sign ^ vsm->sign) {
 818                 m_sig = vsn->significand - m_sig;
 819                 if ((s32)m_sig < 0) {
 820                         vsd->sign = vfp_sign_negate(vsd->sign);
 821                         m_sig = -m_sig;
 822                 } else if (m_sig == 0) {
 823                         vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 824                                       FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 825                 }
 826         } else {
 827                 m_sig = vsn->significand + m_sig;
 828         }
 829         vsd->significand = m_sig;
 830
 831         return 0;
 832 }
 833
 834 static u32
 835 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
 836 {
 837         vfp_single_dump("VSN", vsn);
 838         vfp_single_dump("VSM", vsm);
 839
 840         /*
 841          * Ensure that 'n' is the largest magnitude number.  Note that
 842          * if 'n' and 'm' have equal exponents, we do not swap them.
 843          * This ensures that NaN propagation works correctly.
 844          */
 845         if (vsn->exponent < vsm->exponent) {
 846                 struct vfp_single *t = vsn;
 847                 vsn = vsm;
 848                 vsm = t;
 849                 pr_debug("VFP: swapping M <-> N\n");
 850         }
 851
 852         vsd->sign = vsn->sign ^ vsm->sign;
 853
 854         /*
 855          * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 856          */
 857         if (vsn->exponent == 255) {
 858                 if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
 859                         return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 860                 if ((vsm->exponent | vsm->significand) == 0) {
 861                         *vsd = vfp_single_default_qnan;
 862                         return FPSCR_IOC;
 863                 }
 864                 vsd->exponent = vsn->exponent;
 865                 vsd->significand = 0;
 866                 return 0;
 867         }
 868
 869         /*
 870          * If 'm' is zero, the result is always zero.  In this case,
 871          * 'n' may be zero or a number, but it doesn't matter which.
 872          */
 873         if ((vsm->exponent | vsm->significand) == 0) {
 874                 vsd->exponent = 0;
 875                 vsd->significand = 0;
 876                 return 0;
 877         }
 878
 879         /*
 880          * We add 2 to the destination exponent for the same reason as
 881          * the addition case - though this time we have +1 from each
 882          * input operand.
 883          */
 884         vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
 885         vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
 886
 887         vfp_single_dump("VSD", vsd);
 888         return 0;
 889 }
 890
 891 #define NEG_MULTIPLY    (1 << 0)
 892 #define NEG_SUBTRACT    (1 << 1)
 893
 894 static u32
 895 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
 896 {
 897         struct vfp_single vsd, vsp, vsn, vsm;
 898         u32 exceptions;
 899         s32 v;
 900
 901         v = vfp_get_float(sn);
 902         pr_debug("VFP: s%u = %08x\n", sn, v);
 903         vfp_single_unpack(&vsn, v);
 904         if (vsn.exponent == 0 && vsn.significand)
 905                 vfp_single_normalise_denormal(&vsn);
 906
 907         vfp_single_unpack(&vsm, m);
 908         if (vsm.exponent == 0 && vsm.significand)
 909                 vfp_single_normalise_denormal(&vsm);
 910
 911         exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
 912         if (negate & NEG_MULTIPLY)
 913                 vsp.sign = vfp_sign_negate(vsp.sign);
 914
 915         v = vfp_get_float(sd);
 916         pr_debug("VFP: s%u = %08x\n", sd, v);
 917         vfp_single_unpack(&vsn, v);
 918         if (vsn.exponent == 0 && vsn.significand)
 919                 vfp_single_normalise_denormal(&vsn);
 920         if (negate & NEG_SUBTRACT)
 921                 vsn.sign = vfp_sign_negate(vsn.sign);
 922
 923         exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
 924
 925         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
 926 }
 927
 928 /*
 929  * Standard operations
 930  */
 931
 932 /*
 933  * sd = sd + (sn * sm)
 934  */
 935 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
 936 {
 937         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
 938 }
 939
 940 /*
 941  * sd = sd - (sn * sm)
 942  */
 943 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
 944 {
 945         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
 946 }
 947
 948 /*
 949  * sd = -sd + (sn * sm)
 950  */
 951 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
 952 {
 953         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
 954 }
 955
 956 /*
 957  * sd = -sd - (sn * sm)
 958  */
 959 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
 960 {
 961         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 962 }
 963
 964 /*
 965  * sd = sn * sm
 966  */
 967 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
 968 {
 969         struct vfp_single vsd, vsn, vsm;
 970         u32 exceptions;
 971         s32 n = vfp_get_float(sn);
 972
 973         pr_debug("VFP: s%u = %08x\n", sn, n);
 974
 975         vfp_single_unpack(&vsn, n);
 976         if (vsn.exponent == 0 && vsn.significand)
 977                 vfp_single_normalise_denormal(&vsn);
 978
 979         vfp_single_unpack(&vsm, m);
 980         if (vsm.exponent == 0 && vsm.significand)
 981                 vfp_single_normalise_denormal(&vsm);
 982
 983         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 984         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
 985 }
 986
 987 /*
 988  * sd = -(sn * sm)
 989  */
 990 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
 991 {
 992         struct vfp_single vsd, vsn, vsm;
 993         u32 exceptions;
 994         s32 n = vfp_get_float(sn);
 995
 996         pr_debug("VFP: s%u = %08x\n", sn, n);
 997
 998         vfp_single_unpack(&vsn, n);
 999         if (vsn.exponent == 0 && vsn.significand)
1000                 vfp_single_normalise_denormal(&vsn);
1001
1002         vfp_single_unpack(&vsm, m);
1003         if (vsm.exponent == 0 && vsm.significand)
1004                 vfp_single_normalise_denormal(&vsm);
1005
1006         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1007         vsd.sign = vfp_sign_negate(vsd.sign);
1008         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1009 }
1010
1011 /*
1012  * sd = sn + sm
1013  */
1014 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1015 {
1016         struct vfp_single vsd, vsn, vsm;
1017         u32 exceptions;
1018         s32 n = vfp_get_float(sn);
1019
1020         pr_debug("VFP: s%u = %08x\n", sn, n);
1021
1022         /*
1023          * Unpack and normalise denormals.
1024          */
1025         vfp_single_unpack(&vsn, n);
1026         if (vsn.exponent == 0 && vsn.significand)
1027                 vfp_single_normalise_denormal(&vsn);
1028
1029         vfp_single_unpack(&vsm, m);
1030         if (vsm.exponent == 0 && vsm.significand)
1031                 vfp_single_normalise_denormal(&vsm);
1032
1033         exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1034
1035         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1036 }
1037
1038 /*
1039  * sd = sn - sm
1040  */
1041 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1042 {
1043         /*
1044          * Subtraction is addition with one sign inverted.
1045          */
1046         return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1047 }
1048
1049 /*
1050  * sd = sn / sm
1051  */
1052 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1053 {
1054         struct vfp_single vsd, vsn, vsm;
1055         u32 exceptions = 0;
1056         s32 n = vfp_get_float(sn);
1057         int tm, tn;
1058
1059         pr_debug("VFP: s%u = %08x\n", sn, n);
1060
1061         vfp_single_unpack(&vsn, n);
1062         vfp_single_unpack(&vsm, m);
1063
1064         vsd.sign = vsn.sign ^ vsm.sign;
1065
1066         tn = vfp_single_type(&vsn);
1067         tm = vfp_single_type(&vsm);
1068
1069         /*
1070          * Is n a NAN?
1071          */
1072         if (tn & VFP_NAN)
1073                 goto vsn_nan;
1074
1075         /*
1076          * Is m a NAN?
1077          */
1078         if (tm & VFP_NAN)
1079                 goto vsm_nan;
1080
1081         /*
1082          * If n and m are infinity, the result is invalid
1083          * If n and m are zero, the result is invalid
1084          */
1085         if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1086                 goto invalid;
1087
1088         /*
1089          * If n is infinity, the result is infinity
1090          */
1091         if (tn & VFP_INFINITY)
1092                 goto infinity;
1093
1094         /*
1095          * If m is zero, raise div0 exception
1096          */
1097         if (tm & VFP_ZERO)
1098                 goto divzero;
1099
1100         /*
1101          * If m is infinity, or n is zero, the result is zero
1102          */
1103         if (tm & VFP_INFINITY || tn & VFP_ZERO)
1104                 goto zero;
1105
1106         if (tn & VFP_DENORMAL)
1107                 vfp_single_normalise_denormal(&vsn);
1108         if (tm & VFP_DENORMAL)
1109                 vfp_single_normalise_denormal(&vsm);
1110
1111         /*
1112          * Ok, we have two numbers, we can perform division.
1113          */
1114         vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1115         vsm.significand <<= 1;
1116         if (vsm.significand <= (2 * vsn.significand)) {
1117                 vsn.significand >>= 1;
1118                 vsd.exponent++;
1119         }
1120         {
1121                 u64 significand = (u64)vsn.significand << 32;
1122                 do_div(significand, vsm.significand);
1123                 vsd.significand = significand;
1124         }
1125         if ((vsd.significand & 0x3f) == 0)
1126                 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1127
1128         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1129
1130  vsn_nan:
1131         exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1132  pack:
1133         vfp_put_float(vfp_single_pack(&vsd), sd);
1134         return exceptions;
1135
1136  vsm_nan:
1137         exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1138         goto pack;
1139
1140  zero:
1141         vsd.exponent = 0;
1142         vsd.significand = 0;
1143         goto pack;
1144
1145  divzero:
1146         exceptions = FPSCR_DZC;
1147  infinity:
1148         vsd.exponent = 255;
1149         vsd.significand = 0;
1150         goto pack;
1151
1152  invalid:
1153         vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1154         return FPSCR_IOC;
1155 }
1156
1157 static struct op fops[16] = {
1158         [FOP_TO_IDX(FOP_FMAC)]  = { vfp_single_fmac,  0 },
1159         [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 },
1160         [FOP_TO_IDX(FOP_FMSC)]  = { vfp_single_fmsc,  0 },
1161         [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 },
1162         [FOP_TO_IDX(FOP_FMUL)]  = { vfp_single_fmul,  0 },
1163         [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 },
1164         [FOP_TO_IDX(FOP_FADD)]  = { vfp_single_fadd,  0 },
1165         [FOP_TO_IDX(FOP_FSUB)]  = { vfp_single_fsub,  0 },
1166         [FOP_TO_IDX(FOP_FDIV)]  = { vfp_single_fdiv,  0 },
1167 };
1168
1169 #define FREG_BANK(x)    ((x) & 0x18)
1170 #define FREG_IDX(x)     ((x) & 7)
1171
1172 u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1173 {
1174         u32 op = inst & FOP_MASK;
1175         u32 exceptions = 0;
1176         unsigned int dest;
1177         unsigned int sn = vfp_get_sn(inst);
1178         unsigned int sm = vfp_get_sm(inst);
1179         unsigned int vecitr, veclen, vecstride;
1180         struct op *fop;
1181
1182         vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1183
1184         fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1185
1186         /*
1187          * fcvtsd takes a dN register number as destination, not sN.
1188          * Technically, if bit 0 of dd is set, this is an invalid
1189          * instruction.  However, we ignore this for efficiency.
1190          * It also only operates on scalars.
1191          */
1192         if (fop->flags & OP_DD)
1193                 dest = vfp_get_dd(inst);
1194         else
1195                 dest = vfp_get_sd(inst);
1196
1197         /*
1198          * If destination bank is zero, vector length is always '1'.
1199          * ARM DDI0100F C5.1.3, C5.3.2.
1200          */
1201         if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1202                 veclen = 0;
1203         else
1204                 veclen = fpscr & FPSCR_LENGTH_MASK;
1205
1206         pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1207                  (veclen >> FPSCR_LENGTH_BIT) + 1);
1208
1209         if (!fop->fn)
1210                 goto invalid;
1211
1212         for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1213                 s32 m = vfp_get_float(sm);
1214                 u32 except;
1215                 char type;
1216
1217                 type = fop->flags & OP_DD ? 'd' : 's';
1218                 if (op == FOP_EXT)
1219                         pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1220                                  vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1221                                  sm, m);
1222                 else
1223                         pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1224                                  vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1225                                  FOP_TO_IDX(op), sm, m);
1226
1227                 except = fop->fn(dest, sn, m, fpscr);
1228                 pr_debug("VFP: itr%d: exceptions=%08x\n",
1229                          vecitr >> FPSCR_LENGTH_BIT, except);
1230
1231                 exceptions |= except;
1232
1233                 /*
1234                  * CHECK: It appears to be undefined whether we stop when
1235                  * we encounter an exception.  We continue.
1236                  */
1237                 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1238                 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1239                 if (FREG_BANK(sm) != 0)
1240                         sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1241         }
1242         return exceptions;
1243
1244  invalid:
1245         return (u32)-1;
1246 }