libgcc/config/xtensa/ieee754-df.S

   1 /* IEEE-754 double-precision functions for Xtensa
   2    Copyright (C) 2006-2024 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef __XTENSA_EB__
  27 #define xh a2
  28 #define xl a3
  29 #define yh a4
  30 #define yl a5
  31 #else
  32 #define xh a3
  33 #define xl a2
  34 #define yh a5
  35 #define yl a4
  36 #endif
  37
  38 /*  Warning!  The branch displacements for some Xtensa branch instructions
  39     are quite small, and this code has been carefully laid out to keep
  40     branch targets in range.  If you change anything, be sure to check that
  41     the assembler is not relaxing anything to branch over a jump.  */
  42
  43 #ifdef L_negdf2
  44
  45         .align  4
  46         .global __negdf2
  47         .type   __negdf2, @function
  48 __negdf2:
  49         leaf_entry sp, 16
  50         movi    a4, 0x80000000
  51         xor     xh, xh, a4
  52         leaf_return
  53
  54 #endif /* L_negdf2 */
  55
  56 #ifdef L_addsubdf3
  57
  58         .literal_position
  59         /* Addition */
  60 __adddf3_aux:
  61
  62         /* Handle NaNs and Infinities.  (This code is placed before the
  63            start of the function just to keep it in range of the limited
  64            branch displacements.)  */
  65
  66 .Ladd_xnan_or_inf:
  67         /* If y is neither Infinity nor NaN, return x.  */
  68         bnall   yh, a6, .Ladd_return_nan_or_inf
  69         /* If x is a NaN, return it.  Otherwise, return y.  */
  70         slli    a7, xh, 12
  71         or      a7, a7, xl
  72         bnez    a7, .Ladd_return_nan
  73
  74 .Ladd_ynan_or_inf:
  75         /* Return y.  */
  76         mov     xh, yh
  77         mov     xl, yl
  78
  79 .Ladd_return_nan_or_inf:
  80         slli    a7, xh, 12
  81         or      a7, a7, xl
  82         bnez    a7, .Ladd_return_nan
  83         leaf_return
  84
  85 .Ladd_return_nan:
  86         movi    a4, 0x80000     /* make it a quiet NaN */
  87         or      xh, xh, a4
  88         leaf_return
  89
  90 .Ladd_opposite_signs:
  91         /* Operand signs differ.  Do a subtraction.  */
  92         slli    a7, a6, 11
  93         xor     yh, yh, a7
  94         j       .Lsub_same_sign
  95
  96         .align  4
  97         .global __adddf3
  98         .type   __adddf3, @function
  99 __adddf3:
 100         leaf_entry sp, 16
 101         movi    a6, 0x7ff00000
 102
 103         /* Check if the two operands have the same sign.  */
 104         xor     a7, xh, yh
 105         bltz    a7, .Ladd_opposite_signs
 106
 107 .Ladd_same_sign:
 108         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 109         ball    xh, a6, .Ladd_xnan_or_inf
 110         ball    yh, a6, .Ladd_ynan_or_inf
 111
 112         /* Compare the exponents.  The smaller operand will be shifted
 113            right by the exponent difference and added to the larger
 114            one.  */
 115         extui   a7, xh, 20, 12
 116         extui   a8, yh, 20, 12
 117         bltu    a7, a8, .Ladd_shiftx
 118
 119 .Ladd_shifty:
 120         /* Check if the smaller (or equal) exponent is zero.  */
 121         bnone   yh, a6, .Ladd_yexpzero
 122
 123         /* Replace yh sign/exponent with 0x001.  */
 124         or      yh, yh, a6
 125         slli    yh, yh, 11
 126         srli    yh, yh, 11
 127
 128 .Ladd_yexpdiff:
 129         /* Compute the exponent difference.  Optimize for difference < 32.  */
 130         sub     a10, a7, a8
 131         bgeui   a10, 32, .Ladd_bigshifty
 132
 133         /* Shift yh/yl right by the exponent difference.  Any bits that are
 134            shifted out of yl are saved in a9 for rounding the result.  */
 135         ssr     a10
 136         movi    a9, 0
 137         src     a9, yl, a9
 138         src     yl, yh, yl
 139         srl     yh, yh
 140
 141 .Ladd_addy:
 142         /* Do the 64-bit addition.  */
 143         add     xl, xl, yl
 144         add     xh, xh, yh
 145         bgeu    xl, yl, 1f
 146         addi    xh, xh, 1
 147 1:
 148         /* Check if the add overflowed into the exponent.  */
 149         extui   a10, xh, 20, 12
 150         beq     a10, a7, .Ladd_round
 151         mov     a8, a7
 152         j       .Ladd_carry
 153
 154 .Ladd_yexpzero:
 155         /* y is a subnormal value.  Replace its sign/exponent with zero,
 156            i.e., no implicit "1.0", and increment the apparent exponent
 157            because subnormals behave as if they had the minimum (nonzero)
 158            exponent.  Test for the case when both exponents are zero.  */
 159         slli    yh, yh, 12
 160         srli    yh, yh, 12
 161         bnone   xh, a6, .Ladd_bothexpzero
 162         addi    a8, a8, 1
 163         j       .Ladd_yexpdiff
 164
 165 .Ladd_bothexpzero:
 166         /* Both exponents are zero.  Handle this as a special case.  There
 167            is no need to shift or round, and the normal code for handling
 168            a carry into the exponent field will not work because it
 169            assumes there is an implicit "1.0" that needs to be added.  */
 170         add     xl, xl, yl
 171         add     xh, xh, yh
 172         bgeu    xl, yl, 1f
 173         addi    xh, xh, 1
 174 1:      leaf_return
 175
 176 .Ladd_bigshifty:
 177         /* Exponent difference > 64 -- just return the bigger value.  */
 178         bgeui   a10, 64, 1b
 179
 180         /* Shift yh/yl right by the exponent difference.  Any bits that are
 181            shifted out are saved in a9 for rounding the result.  */
 182         ssr     a10
 183         sll     a11, yl         /* lost bits shifted out of yl */
 184         src     a9, yh, yl
 185         srl     yl, yh
 186         movi    yh, 0
 187         beqz    a11, .Ladd_addy
 188         or      a9, a9, a10     /* any positive, nonzero value will work */
 189         j       .Ladd_addy
 190
 191 .Ladd_xexpzero:
 192         /* Same as "yexpzero" except skip handling the case when both
 193            exponents are zero.  */
 194         slli    xh, xh, 12
 195         srli    xh, xh, 12
 196         addi    a7, a7, 1
 197         j       .Ladd_xexpdiff
 198
 199 .Ladd_shiftx:
 200         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 201            because the exponent difference is always nonzero in this version,
 202            the shift sequence can use SLL and skip loading a constant zero.  */
 203         bnone   xh, a6, .Ladd_xexpzero
 204
 205         or      xh, xh, a6
 206         slli    xh, xh, 11
 207         srli    xh, xh, 11
 208
 209 .Ladd_xexpdiff:
 210         sub     a10, a8, a7
 211         bgeui   a10, 32, .Ladd_bigshiftx
 212
 213         ssr     a10
 214         sll     a9, xl
 215         src     xl, xh, xl
 216         srl     xh, xh
 217
 218 .Ladd_addx:
 219         add     xl, xl, yl
 220         add     xh, xh, yh
 221         bgeu    xl, yl, 1f
 222         addi    xh, xh, 1
 223 1:
 224         /* Check if the add overflowed into the exponent.  */
 225         extui   a10, xh, 20, 12
 226         bne     a10, a8, .Ladd_carry
 227
 228 .Ladd_round:
 229         /* Round up if the leftover fraction is >= 1/2.  */
 230         bgez    a9, 1f
 231         addi    xl, xl, 1
 232         beqz    xl, .Ladd_roundcarry
 233
 234         /* Check if the leftover fraction is exactly 1/2.  */
 235         slli    a9, a9, 1
 236         beqz    a9, .Ladd_exactlyhalf
 237 1:      leaf_return
 238
 239 .Ladd_bigshiftx:
 240         /* Mostly the same thing as "bigshifty"....  */
 241         bgeui   a10, 64, .Ladd_returny
 242
 243         ssr     a10
 244         sll     a11, xl
 245         src     a9, xh, xl
 246         srl     xl, xh
 247         movi    xh, 0
 248         beqz    a11, .Ladd_addx
 249         or      a9, a9, a10
 250         j       .Ladd_addx
 251
 252 .Ladd_returny:
 253         mov     xh, yh
 254         mov     xl, yl
 255         leaf_return
 256
 257 .Ladd_carry:
 258         /* The addition has overflowed into the exponent field, so the
 259            value needs to be renormalized.  The mantissa of the result
 260            can be recovered by subtracting the original exponent and
 261            adding 0x100000 (which is the explicit "1.0" for the
 262            mantissa of the non-shifted operand -- the "1.0" for the
 263            shifted operand was already added).  The mantissa can then
 264            be shifted right by one bit.  The explicit "1.0" of the
 265            shifted mantissa then needs to be replaced by the exponent,
 266            incremented by one to account for the normalizing shift.
 267            It is faster to combine these operations: do the shift first
 268            and combine the additions and subtractions.  If x is the
 269            original exponent, the result is:
 270                shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
 271            or:
 272                shifted mantissa + ((x + 1) << 19)
 273            Note that the exponent is incremented here by leaving the
 274            explicit "1.0" of the mantissa in the exponent field.  */
 275
 276         /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
 277         mov     a10, xl
 278         ssai    1
 279         src     xl, xh, xl
 280         srl     xh, xh
 281
 282         /* See explanation above.  The original exponent is in a8.  */
 283         addi    a8, a8, 1
 284         slli    a8, a8, 19
 285         add     xh, xh, a8
 286
 287         /* Return an Infinity if the exponent overflowed.  */
 288         ball    xh, a6, .Ladd_infinity
 289
 290         /* Same thing as the "round" code except the msb of the leftover
 291            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 292         bbci.l  a10, 0, 1f
 293         addi    xl, xl, 1
 294         beqz    xl, .Ladd_roundcarry
 295         beqz    a9, .Ladd_exactlyhalf
 296 1:      leaf_return
 297
 298 .Ladd_infinity:
 299         /* Clear the mantissa.  */
 300         movi    xl, 0
 301         srli    xh, xh, 20
 302         slli    xh, xh, 20
 303
 304         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 305         slli    a8, a8, 1
 306         or      xh, xh, a8
 307         leaf_return
 308
 309 .Ladd_exactlyhalf:
 310         /* Round down to the nearest even value.  */
 311         srli    xl, xl, 1
 312         slli    xl, xl, 1
 313         leaf_return
 314
 315 .Ladd_roundcarry:
 316         /* xl is always zero when the rounding increment overflows, so
 317            there's no need to round it to an even value.  */
 318         addi    xh, xh, 1
 319         /* Overflow to the exponent is OK.  */
 320         leaf_return
 321
 322
 323         /* Subtraction */
 324 __subdf3_aux:
 325
 326         /* Handle NaNs and Infinities.  (This code is placed before the
 327            start of the function just to keep it in range of the limited
 328            branch displacements.)  */
 329
 330 .Lsub_xnan_or_inf:
 331         /* If y is neither Infinity nor NaN, return x.  */
 332         bnall   yh, a6, .Lsub_return_nan_or_inf
 333
 334 .Lsub_return_nan:
 335         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 336         movi    a4, 0x80000     /* make it a quiet NaN */
 337         or      xh, xh, a4
 338         leaf_return
 339
 340 .Lsub_ynan_or_inf:
 341         /* Negate y and return it.  */
 342         slli    a7, a6, 11
 343         xor     xh, yh, a7
 344         mov     xl, yl
 345
 346 .Lsub_return_nan_or_inf:
 347         slli    a7, xh, 12
 348         or      a7, a7, xl
 349         bnez    a7, .Lsub_return_nan
 350         leaf_return
 351
 352 .Lsub_opposite_signs:
 353         /* Operand signs differ.  Do an addition.  */
 354         slli    a7, a6, 11
 355         xor     yh, yh, a7
 356         j       .Ladd_same_sign
 357
 358         .align  4
 359         .global __subdf3
 360         .type   __subdf3, @function
 361 __subdf3:
 362         leaf_entry sp, 16
 363         movi    a6, 0x7ff00000
 364
 365         /* Check if the two operands have the same sign.  */
 366         xor     a7, xh, yh
 367         bltz    a7, .Lsub_opposite_signs
 368
 369 .Lsub_same_sign:
 370         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 371         ball    xh, a6, .Lsub_xnan_or_inf
 372         ball    yh, a6, .Lsub_ynan_or_inf
 373
 374         /* Compare the operands.  In contrast to addition, the entire
 375            value matters here.  */
 376         extui   a7, xh, 20, 11
 377         extui   a8, yh, 20, 11
 378         bltu    xh, yh, .Lsub_xsmaller
 379         beq     xh, yh, .Lsub_compare_low
 380
 381 .Lsub_ysmaller:
 382         /* Check if the smaller (or equal) exponent is zero.  */
 383         bnone   yh, a6, .Lsub_yexpzero
 384
 385         /* Replace yh sign/exponent with 0x001.  */
 386         or      yh, yh, a6
 387         slli    yh, yh, 11
 388         srli    yh, yh, 11
 389
 390 .Lsub_yexpdiff:
 391         /* Compute the exponent difference.  Optimize for difference < 32.  */
 392         sub     a10, a7, a8
 393         bgeui   a10, 32, .Lsub_bigshifty
 394
 395         /* Shift yh/yl right by the exponent difference.  Any bits that are
 396            shifted out of yl are saved in a9 for rounding the result.  */
 397         ssr     a10
 398         movi    a9, 0
 399         src     a9, yl, a9
 400         src     yl, yh, yl
 401         srl     yh, yh
 402
 403 .Lsub_suby:
 404         /* Do the 64-bit subtraction.  */
 405         sub     xh, xh, yh
 406         bgeu    xl, yl, 1f
 407         addi    xh, xh, -1
 408 1:      sub     xl, xl, yl
 409
 410         /* Subtract the leftover bits in a9 from zero and propagate any
 411            borrow from xh/xl.  */
 412         neg     a9, a9
 413         beqz    a9, 1f
 414         addi    a5, xh, -1
 415         moveqz  xh, a5, xl
 416         addi    xl, xl, -1
 417 1:
 418         /* Check if the subtract underflowed into the exponent.  */
 419         extui   a10, xh, 20, 11
 420         beq     a10, a7, .Lsub_round
 421         j       .Lsub_borrow
 422
 423 .Lsub_compare_low:
 424         /* The high words are equal.  Compare the low words.  */
 425         bltu    xl, yl, .Lsub_xsmaller
 426         bltu    yl, xl, .Lsub_ysmaller
 427         /* The operands are equal.  Return 0.0.  */
 428         movi    xh, 0
 429         movi    xl, 0
 430 1:      leaf_return
 431
 432 .Lsub_yexpzero:
 433         /* y is a subnormal value.  Replace its sign/exponent with zero,
 434            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 435            y's apparent exponent because subnormals behave as if they had
 436            the minimum (nonzero) exponent.  */
 437         slli    yh, yh, 12
 438         srli    yh, yh, 12
 439         bnone   xh, a6, .Lsub_yexpdiff
 440         addi    a8, a8, 1
 441         j       .Lsub_yexpdiff
 442
 443 .Lsub_bigshifty:
 444         /* Exponent difference > 64 -- just return the bigger value.  */
 445         bgeui   a10, 64, 1b
 446
 447         /* Shift yh/yl right by the exponent difference.  Any bits that are
 448            shifted out are saved in a9 for rounding the result.  */
 449         ssr     a10
 450         sll     a11, yl         /* lost bits shifted out of yl */
 451         src     a9, yh, yl
 452         srl     yl, yh
 453         movi    yh, 0
 454         beqz    a11, .Lsub_suby
 455         or      a9, a9, a10     /* any positive, nonzero value will work */
 456         j       .Lsub_suby
 457
 458 .Lsub_xsmaller:
 459         /* Same thing as the "ysmaller" code, but with x and y swapped and
 460            with y negated.  */
 461         bnone   xh, a6, .Lsub_xexpzero
 462
 463         or      xh, xh, a6
 464         slli    xh, xh, 11
 465         srli    xh, xh, 11
 466
 467 .Lsub_xexpdiff:
 468         sub     a10, a8, a7
 469         bgeui   a10, 32, .Lsub_bigshiftx
 470
 471         ssr     a10
 472         movi    a9, 0
 473         src     a9, xl, a9
 474         src     xl, xh, xl
 475         srl     xh, xh
 476
 477         /* Negate y.  */
 478         slli    a11, a6, 11
 479         xor     yh, yh, a11
 480
 481 .Lsub_subx:
 482         sub     xl, yl, xl
 483         sub     xh, yh, xh
 484         bgeu    yl, xl, 1f
 485         addi    xh, xh, -1
 486 1:
 487         /* Subtract the leftover bits in a9 from zero and propagate any
 488            borrow from xh/xl.  */
 489         neg     a9, a9
 490         beqz    a9, 1f
 491         addi    a5, xh, -1
 492         moveqz  xh, a5, xl
 493         addi    xl, xl, -1
 494 1:
 495         /* Check if the subtract underflowed into the exponent.  */
 496         extui   a10, xh, 20, 11
 497         bne     a10, a8, .Lsub_borrow
 498
 499 .Lsub_round:
 500         /* Round up if the leftover fraction is >= 1/2.  */
 501         bgez    a9, 1f
 502         addi    xl, xl, 1
 503         beqz    xl, .Lsub_roundcarry
 504
 505         /* Check if the leftover fraction is exactly 1/2.  */
 506         slli    a9, a9, 1
 507         beqz    a9, .Lsub_exactlyhalf
 508 1:      leaf_return
 509
 510 .Lsub_xexpzero:
 511         /* Same as "yexpzero".  */
 512         slli    xh, xh, 12
 513         srli    xh, xh, 12
 514         bnone   yh, a6, .Lsub_xexpdiff
 515         addi    a7, a7, 1
 516         j       .Lsub_xexpdiff
 517
 518 .Lsub_bigshiftx:
 519         /* Mostly the same thing as "bigshifty", but with the sign bit of the
 520            shifted value set so that the subsequent subtraction flips the
 521            sign of y.  */
 522         bgeui   a10, 64, .Lsub_returny
 523
 524         ssr     a10
 525         sll     a11, xl
 526         src     a9, xh, xl
 527         srl     xl, xh
 528         slli    xh, a6, 11      /* set sign bit of xh */
 529         beqz    a11, .Lsub_subx
 530         or      a9, a9, a10
 531         j       .Lsub_subx
 532
 533 .Lsub_returny:
 534         /* Negate and return y.  */
 535         slli    a7, a6, 11
 536         xor     xh, yh, a7
 537         mov     xl, yl
 538         leaf_return
 539
 540 .Lsub_borrow:
 541         /* The subtraction has underflowed into the exponent field, so the
 542            value needs to be renormalized.  Shift the mantissa left as
 543            needed to remove any leading zeros and adjust the exponent
 544            accordingly.  If the exponent is not large enough to remove
 545            all the leading zeros, the result will be a subnormal value.  */
 546
 547         slli    a8, xh, 12
 548         beqz    a8, .Lsub_xhzero
 549         do_nsau a6, a8, a7, a11
 550         srli    a8, a8, 12
 551         bge     a6, a10, .Lsub_subnormal
 552         addi    a6, a6, 1
 553
 554 .Lsub_shift_lt32:
 555         /* Shift the mantissa (a8/xl/a9) left by a6.  */
 556         ssl     a6
 557         src     a8, a8, xl
 558         src     xl, xl, a9
 559         sll     a9, a9
 560
 561         /* Combine the shifted mantissa with the sign and exponent,
 562            decrementing the exponent by a6.  (The exponent has already
 563            been decremented by one due to the borrow from the subtraction,
 564            but adding the mantissa will increment the exponent by one.)  */
 565         srli    xh, xh, 20
 566         sub     xh, xh, a6
 567         slli    xh, xh, 20
 568         add     xh, xh, a8
 569         j       .Lsub_round
 570
 571 .Lsub_exactlyhalf:
 572         /* Round down to the nearest even value.  */
 573         srli    xl, xl, 1
 574         slli    xl, xl, 1
 575         leaf_return
 576
 577 .Lsub_roundcarry:
 578         /* xl is always zero when the rounding increment overflows, so
 579            there's no need to round it to an even value.  */
 580         addi    xh, xh, 1
 581         /* Overflow to the exponent is OK.  */
 582         leaf_return
 583
 584 .Lsub_xhzero:
 585         /* When normalizing the result, all the mantissa bits in the high
 586            word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
 587         do_nsau a6, xl, a7, a11
 588         addi    a6, a6, 21
 589         blt     a10, a6, .Lsub_subnormal
 590
 591 .Lsub_normalize_shift:
 592         bltui   a6, 32, .Lsub_shift_lt32
 593
 594         ssl     a6
 595         src     a8, xl, a9
 596         sll     xl, a9
 597         movi    a9, 0
 598
 599         srli    xh, xh, 20
 600         sub     xh, xh, a6
 601         slli    xh, xh, 20
 602         add     xh, xh, a8
 603         j       .Lsub_round
 604
 605 .Lsub_subnormal:
 606         /* The exponent is too small to shift away all the leading zeros.
 607            Set a6 to the current exponent (which has already been
 608            decremented by the borrow) so that the exponent of the result
 609            will be zero.  Do not add 1 to a6 in this case, because: (1)
 610            adding the mantissa will not increment the exponent, so there is
 611            no need to subtract anything extra from the exponent to
 612            compensate, and (2) the effective exponent of a subnormal is 1
 613            not 0 so the shift amount must be 1 smaller than normal. */
 614         mov     a6, a10
 615         j       .Lsub_normalize_shift
 616
 617 #endif /* L_addsubdf3 */
 618
 619 #ifdef L_muldf3
 620
 621         /* Multiplication */
 622 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 623 #define XCHAL_NO_MUL 1
 624 #endif
 625
 626         .literal_position
 627 __muldf3_aux:
 628
 629         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 630            (This code is placed before the start of the function just to
 631            keep it in range of the limited branch displacements.)  */
 632
 633 .Lmul_xexpzero:
 634         /* Clear the sign bit of x.  */
 635         slli    xh, xh, 1
 636         srli    xh, xh, 1
 637
 638         /* If x is zero, return zero.  */
 639         or      a10, xh, xl
 640         beqz    a10, .Lmul_return_zero
 641
 642         /* Normalize x.  Adjust the exponent in a8.  */
 643         beqz    xh, .Lmul_xh_zero
 644         do_nsau a10, xh, a11, a12
 645         addi    a10, a10, -11
 646         ssl     a10
 647         src     xh, xh, xl
 648         sll     xl, xl
 649         movi    a8, 1
 650         sub     a8, a8, a10
 651         j       .Lmul_xnormalized
 652 .Lmul_xh_zero:
 653         do_nsau a10, xl, a11, a12
 654         addi    a10, a10, -11
 655         movi    a8, -31
 656         sub     a8, a8, a10
 657         ssl     a10
 658         bltz    a10, .Lmul_xl_srl
 659         sll     xh, xl
 660         movi    xl, 0
 661         j       .Lmul_xnormalized
 662 .Lmul_xl_srl:
 663         srl     xh, xl
 664         sll     xl, xl
 665         j       .Lmul_xnormalized
 666
 667 .Lmul_yexpzero:
 668         /* Clear the sign bit of y.  */
 669         slli    yh, yh, 1
 670         srli    yh, yh, 1
 671
 672         /* If y is zero, return zero.  */
 673         or      a10, yh, yl
 674         beqz    a10, .Lmul_return_zero
 675
 676         /* Normalize y.  Adjust the exponent in a9.  */
 677         beqz    yh, .Lmul_yh_zero
 678         do_nsau a10, yh, a11, a12
 679         addi    a10, a10, -11
 680         ssl     a10
 681         src     yh, yh, yl
 682         sll     yl, yl
 683         movi    a9, 1
 684         sub     a9, a9, a10
 685         j       .Lmul_ynormalized
 686 .Lmul_yh_zero:
 687         do_nsau a10, yl, a11, a12
 688         addi    a10, a10, -11
 689         movi    a9, -31
 690         sub     a9, a9, a10
 691         ssl     a10
 692         bltz    a10, .Lmul_yl_srl
 693         sll     yh, yl
 694         movi    yl, 0
 695         j       .Lmul_ynormalized
 696 .Lmul_yl_srl:
 697         srl     yh, yl
 698         sll     yl, yl
 699         j       .Lmul_ynormalized
 700
 701 .Lmul_return_zero:
 702         /* Return zero with the appropriate sign bit.  */
 703         srli    xh, a7, 31
 704         slli    xh, xh, 31
 705         movi    xl, 0
 706         j       .Lmul_done
 707
 708 .Lmul_xnan_or_inf:
 709         /* If y is zero, return NaN.  */
 710         bnez    yl, 1f
 711         slli    a8, yh, 1
 712         beqz    a8, .Lmul_return_nan
 713 1:
 714         /* If y is NaN, return y.  */
 715         bnall   yh, a6, .Lmul_returnx
 716         slli    a8, yh, 12
 717         or      a8, a8, yl
 718         beqz    a8, .Lmul_returnx
 719
 720 .Lmul_returny:
 721         mov     xh, yh
 722         mov     xl, yl
 723
 724 .Lmul_returnx:
 725         slli    a8, xh, 12
 726         or      a8, a8, xl
 727         bnez    a8, .Lmul_return_nan
 728         /* Set the sign bit and return.  */
 729         extui   a7, a7, 31, 1
 730         slli    xh, xh, 1
 731         ssai    1
 732         src     xh, a7, xh
 733         j       .Lmul_done
 734
 735 .Lmul_ynan_or_inf:
 736         /* If x is zero, return NaN.  */
 737         bnez    xl, .Lmul_returny
 738         slli    a8, xh, 1
 739         bnez    a8, .Lmul_returny
 740         mov     xh, yh
 741
 742 .Lmul_return_nan:
 743         movi    a4, 0x80000     /* make it a quiet NaN */
 744         or      xh, xh, a4
 745         j       .Lmul_done
 746
 747         .align  4
 748         .global __muldf3
 749         .type   __muldf3, @function
 750 __muldf3:
 751 #if __XTENSA_CALL0_ABI__
 752         leaf_entry sp, 32
 753         addi    sp, sp, -32
 754         s32i    a12, sp, 16
 755         s32i    a13, sp, 20
 756         s32i    a14, sp, 24
 757         s32i    a15, sp, 28
 758 #elif XCHAL_NO_MUL
 759         /* This is not really a leaf function; allocate enough stack space
 760            to allow CALL12s to a helper function.  */
 761         leaf_entry sp, 64
 762 #else
 763         leaf_entry sp, 32
 764 #endif
 765         movi    a6, 0x7ff00000
 766
 767         /* Get the sign of the result.  */
 768         xor     a7, xh, yh
 769
 770         /* Check for NaN and infinity.  */
 771         ball    xh, a6, .Lmul_xnan_or_inf
 772         ball    yh, a6, .Lmul_ynan_or_inf
 773
 774         /* Extract the exponents.  */
 775         extui   a8, xh, 20, 11
 776         extui   a9, yh, 20, 11
 777
 778         beqz    a8, .Lmul_xexpzero
 779 .Lmul_xnormalized:
 780         beqz    a9, .Lmul_yexpzero
 781 .Lmul_ynormalized:
 782
 783         /* Add the exponents.  */
 784         add     a8, a8, a9
 785
 786         /* Replace sign/exponent fields with explicit "1.0".  */
 787         movi    a10, 0x1fffff
 788         or      xh, xh, a6
 789         and     xh, xh, a10
 790         or      yh, yh, a6
 791         and     yh, yh, a10
 792
 793         /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
 794            The least-significant word of the result is thrown away except
 795            that if it is nonzero, the lsb of a6 is set to 1.  */
 796 #if XCHAL_HAVE_MUL32_HIGH
 797
 798         /* Compute a6 with any carry-outs in a10.  */
 799         movi    a10, 0
 800         mull    a6, xl, yh
 801         mull    a11, xh, yl
 802         add     a6, a6, a11
 803         bgeu    a6, a11, 1f
 804         addi    a10, a10, 1
 805 1:
 806         muluh   a11, xl, yl
 807         add     a6, a6, a11
 808         bgeu    a6, a11, 1f
 809         addi    a10, a10, 1
 810 1:
 811         /* If the low word of the result is nonzero, set the lsb of a6.  */
 812         mull    a11, xl, yl
 813         beqz    a11, 1f
 814         movi    a9, 1
 815         or      a6, a6, a9
 816 1:
 817         /* Compute xl with any carry-outs in a9.  */
 818         movi    a9, 0
 819         mull    a11, xh, yh
 820         add     a10, a10, a11
 821         bgeu    a10, a11, 1f
 822         addi    a9, a9, 1
 823 1:
 824         muluh   a11, xh, yl
 825         add     a10, a10, a11
 826         bgeu    a10, a11, 1f
 827         addi    a9, a9, 1
 828 1:
 829         muluh   xl, xl, yh
 830         add     xl, xl, a10
 831         bgeu    xl, a10, 1f
 832         addi    a9, a9, 1
 833 1:
 834         /* Compute xh.  */
 835         muluh   xh, xh, yh
 836         add     xh, xh, a9
 837
 838 #else /* ! XCHAL_HAVE_MUL32_HIGH */
 839
 840         /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
 841            products.  These partial products are:
 842
 843                 0 xll * yll
 844
 845                 1 xll * ylh
 846                 2 xlh * yll
 847
 848                 3 xll * yhl
 849                 4 xlh * ylh
 850                 5 xhl * yll
 851
 852                 6 xll * yhh
 853                 7 xlh * yhl
 854                 8 xhl * ylh
 855                 9 xhh * yll
 856
 857                 10 xlh * yhh
 858                 11 xhl * yhl
 859                 12 xhh * ylh
 860
 861                 13 xhl * yhh
 862                 14 xhh * yhl
 863
 864                 15 xhh * yhh
 865
 866            where the input chunks are (hh, hl, lh, ll).  If using the Mul16
 867            or Mul32 multiplier options, these input chunks must be stored in
 868            separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
 869            that the inputs come from either half of the registers, so there
 870            is no need to shift them out ahead of time.  If there is no
 871            multiply hardware, the 16-bit chunks can be extracted when setting
 872            up the arguments to the separate multiply function.  */
 873
 874         /* Save a7 since it is needed to hold a temporary value.  */
 875         s32i    a7, sp, 4
 876 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 877         /* Calling a separate multiply function will clobber a0 and requires
 878            use of a8 as a temporary, so save those values now.  (The function
 879            uses a custom ABI so nothing else needs to be saved.)  */
 880         s32i    a0, sp, 0
 881         s32i    a8, sp, 8
 882 #endif
 883
 884 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 885
 886 #define xlh a12
 887 #define ylh a13
 888 #define xhh a14
 889 #define yhh a15
 890
 891         /* Get the high halves of the inputs into registers.  */
 892         srli    xlh, xl, 16
 893         srli    ylh, yl, 16
 894         srli    xhh, xh, 16
 895         srli    yhh, yh, 16
 896
 897 #define xll xl
 898 #define yll yl
 899 #define xhl xh
 900 #define yhl yh
 901
 902 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 903         /* Clear the high halves of the inputs.  This does not matter
 904            for MUL16 because the high bits are ignored.  */
 905         extui   xl, xl, 0, 16
 906         extui   xh, xh, 0, 16
 907         extui   yl, yl, 0, 16
 908         extui   yh, yh, 0, 16
 909 #endif
 910 #endif /* MUL16 || MUL32 */
 911
 912
 913 #if XCHAL_HAVE_MUL16
 914
 915 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 916         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 917
 918 #elif XCHAL_HAVE_MUL32
 919
 920 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 921         mull    dst, xreg ## xhalf, yreg ## yhalf
 922
 923 #elif XCHAL_HAVE_MAC16
 924
 925 /* The preprocessor insists on inserting a space when concatenating after
 926    a period in the definition of do_mul below.  These macros are a workaround
 927    using underscores instead of periods when doing the concatenation.  */
 928 #define umul_aa_ll umul.aa.ll
 929 #define umul_aa_lh umul.aa.lh
 930 #define umul_aa_hl umul.aa.hl
 931 #define umul_aa_hh umul.aa.hh
 932
 933 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 934         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 935         rsr     dst, ACCLO
 936
 937 #else /* no multiply hardware */
 938
 939 #define set_arg_l(dst, src) \
 940         extui   dst, src, 0, 16
 941 #define set_arg_h(dst, src) \
 942         srli    dst, src, 16
 943
 944 #if __XTENSA_CALL0_ABI__
 945 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 946         set_arg_ ## xhalf (a13, xreg); \
 947         set_arg_ ## yhalf (a14, yreg); \
 948         call0   .Lmul_mulsi3; \
 949         mov     dst, a12
 950 #else
 951 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 952         set_arg_ ## xhalf (a14, xreg); \
 953         set_arg_ ## yhalf (a15, yreg); \
 954         call12  .Lmul_mulsi3; \
 955         mov     dst, a14
 956 #endif /* __XTENSA_CALL0_ABI__ */
 957
 958 #endif /* no multiply hardware */
 959
 960         /* Add pp1 and pp2 into a10 with carry-out in a9.  */
 961         do_mul(a10, xl, l, yl, h)       /* pp 1 */
 962         do_mul(a11, xl, h, yl, l)       /* pp 2 */
 963         movi    a9, 0
 964         add     a10, a10, a11
 965         bgeu    a10, a11, 1f
 966         addi    a9, a9, 1
 967 1:
 968         /* Initialize a6 with a9/a10 shifted into position.  Note that
 969            this value can be safely incremented without any carry-outs.  */
 970         ssai    16
 971         src     a6, a9, a10
 972
 973         /* Compute the low word into a10.  */
 974         do_mul(a11, xl, l, yl, l)       /* pp 0 */
 975         sll     a10, a10
 976         add     a10, a10, a11
 977         bgeu    a10, a11, 1f
 978         addi    a6, a6, 1
 979 1:
 980         /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
 981            This is good enough to determine the low half of a6, so that any
 982            nonzero bits from the low word of the result can be collapsed
 983            into a6, freeing up a register.  */
 984         movi    a9, 0
 985         do_mul(a11, xl, l, yh, l)       /* pp 3 */
 986         add     a6, a6, a11
 987         bgeu    a6, a11, 1f
 988         addi    a9, a9, 1
 989 1:
 990         do_mul(a11, xl, h, yl, h)       /* pp 4 */
 991         add     a6, a6, a11
 992         bgeu    a6, a11, 1f
 993         addi    a9, a9, 1
 994 1:
 995         do_mul(a11, xh, l, yl, l)       /* pp 5 */
 996         add     a6, a6, a11
 997         bgeu    a6, a11, 1f
 998         addi    a9, a9, 1
 999 1:
1000         /* Collapse any nonzero bits from the low word into a6.  */
1001         beqz    a10, 1f
1002         movi    a11, 1
1003         or      a6, a6, a11
1004 1:
1005         /* Add pp6-9 into a11 with carry-outs in a10.  */
1006         do_mul(a7, xl, l, yh, h)        /* pp 6 */
1007         do_mul(a11, xh, h, yl, l)       /* pp 9 */
1008         movi    a10, 0
1009         add     a11, a11, a7
1010         bgeu    a11, a7, 1f
1011         addi    a10, a10, 1
1012 1:
1013         do_mul(a7, xl, h, yh, l)        /* pp 7 */
1014         add     a11, a11, a7
1015         bgeu    a11, a7, 1f
1016         addi    a10, a10, 1
1017 1:
1018         do_mul(a7, xh, l, yl, h)        /* pp 8 */
1019         add     a11, a11, a7
1020         bgeu    a11, a7, 1f
1021         addi    a10, a10, 1
1022 1:
1023         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1024         src     a10, a10, a11
1025         add     a10, a10, a9
1026         sll     a11, a11
1027         add     a6, a6, a11
1028         bgeu    a6, a11, 1f
1029         addi    a10, a10, 1
1030 1:
1031         /* Add pp10-12 into xl with carry-outs in a9.  */
1032         movi    a9, 0
1033         do_mul(xl, xl, h, yh, h)        /* pp 10 */
1034         add     xl, xl, a10
1035         bgeu    xl, a10, 1f
1036         addi    a9, a9, 1
1037 1:
1038         do_mul(a10, xh, l, yh, l)       /* pp 11 */
1039         add     xl, xl, a10
1040         bgeu    xl, a10, 1f
1041         addi    a9, a9, 1
1042 1:
1043         do_mul(a10, xh, h, yl, h)       /* pp 12 */
1044         add     xl, xl, a10
1045         bgeu    xl, a10, 1f
1046         addi    a9, a9, 1
1047 1:
1048         /* Add pp13-14 into a11 with carry-outs in a10.  */
1049         do_mul(a11, xh, l, yh, h)       /* pp 13 */
1050         do_mul(a7, xh, h, yh, l)        /* pp 14 */
1051         movi    a10, 0
1052         add     a11, a11, a7
1053         bgeu    a11, a7, 1f
1054         addi    a10, a10, 1
1055 1:
1056         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1057         src     a10, a10, a11
1058         add     a10, a10, a9
1059         sll     a11, a11
1060         add     xl, xl, a11
1061         bgeu    xl, a11, 1f
1062         addi    a10, a10, 1
1063 1:
1064         /* Compute xh.  */
1065         do_mul(xh, xh, h, yh, h)        /* pp 15 */
1066         add     xh, xh, a10
1067
1068         /* Restore values saved on the stack during the multiplication.  */
1069         l32i    a7, sp, 4
1070 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1071         l32i    a0, sp, 0
1072         l32i    a8, sp, 8
1073 #endif
1074 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1075
1076         /* Shift left by 12 bits, unless there was a carry-out from the
1077            multiply, in which case, shift by 11 bits and increment the
1078            exponent.  Note: It is convenient to use the constant 0x3ff
1079            instead of 0x400 when removing the extra exponent bias (so that
1080            it is easy to construct 0x7fe for the overflow check).  Reverse
1081            the logic here to decrement the exponent sum by one unless there
1082            was a carry-out.  */
1083         movi    a4, 11
1084         srli    a5, xh, 21 - 12
1085         bnez    a5, 1f
1086         addi    a4, a4, 1
1087         addi    a8, a8, -1
1088 1:      ssl     a4
1089         src     xh, xh, xl
1090         src     xl, xl, a6
1091         sll     a6, a6
1092
1093         /* Subtract the extra bias from the exponent sum (plus one to account
1094            for the explicit "1.0" of the mantissa that will be added to the
1095            exponent in the final result).  */
1096         movi    a4, 0x3ff
1097         sub     a8, a8, a4
1098
1099         /* Check for over/underflow.  The value in a8 is one less than the
1100            final exponent, so values in the range 0..7fd are OK here.  */
1101         slli    a4, a4, 1       /* 0x7fe */
1102         bgeu    a8, a4, .Lmul_overflow
1103
1104 .Lmul_round:
1105         /* Round.  */
1106         bgez    a6, .Lmul_rounded
1107         addi    xl, xl, 1
1108         beqz    xl, .Lmul_roundcarry
1109         slli    a6, a6, 1
1110         beqz    a6, .Lmul_exactlyhalf
1111
1112 .Lmul_rounded:
1113         /* Add the exponent to the mantissa.  */
1114         slli    a8, a8, 20
1115         add     xh, xh, a8
1116
1117 .Lmul_addsign:
1118         /* Add the sign bit.  */
1119         srli    a7, a7, 31
1120         slli    a7, a7, 31
1121         or      xh, xh, a7
1122
1123 .Lmul_done:
1124 #if __XTENSA_CALL0_ABI__
1125         l32i    a12, sp, 16
1126         l32i    a13, sp, 20
1127         l32i    a14, sp, 24
1128         l32i    a15, sp, 28
1129         addi    sp, sp, 32
1130 #endif
1131         leaf_return
1132
1133 .Lmul_exactlyhalf:
1134         /* Round down to the nearest even value.  */
1135         srli    xl, xl, 1
1136         slli    xl, xl, 1
1137         j       .Lmul_rounded
1138
1139 .Lmul_roundcarry:
1140         /* xl is always zero when the rounding increment overflows, so
1141            there's no need to round it to an even value.  */
1142         addi    xh, xh, 1
1143         /* Overflow is OK -- it will be added to the exponent.  */
1144         j       .Lmul_rounded
1145
1146 .Lmul_overflow:
1147         bltz    a8, .Lmul_underflow
1148         /* Return +/- Infinity.  */
1149         addi    a8, a4, 1       /* 0x7ff */
1150         slli    xh, a8, 20
1151         movi    xl, 0
1152         j       .Lmul_addsign
1153
1154 .Lmul_underflow:
1155         /* Create a subnormal value, where the exponent field contains zero,
1156            but the effective exponent is 1.  The value of a8 is one less than
1157            the actual exponent, so just negate it to get the shift amount.  */
1158         neg     a8, a8
1159         mov     a9, a6
1160         ssr     a8
1161         bgeui   a8, 32, .Lmul_bigshift
1162
1163         /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1164            in a6 (combined with the shifted-out bits currently in a6) for
1165            rounding the result.  */
1166         sll     a6, xl
1167         src     xl, xh, xl
1168         srl     xh, xh
1169         j       1f
1170
1171 .Lmul_bigshift:
1172         bgeui   a8, 64, .Lmul_flush_to_zero
1173         sll     a10, xl         /* lost bits shifted out of xl */
1174         src     a6, xh, xl
1175         srl     xl, xh
1176         movi    xh, 0
1177         or      a9, a9, a10
1178
1179         /* Set the exponent to zero.  */
1180 1:      movi    a8, 0
1181
1182         /* Pack any nonzero bits shifted out into a6.  */
1183         beqz    a9, .Lmul_round
1184         movi    a9, 1
1185         or      a6, a6, a9
1186         j       .Lmul_round
1187
1188 .Lmul_flush_to_zero:
1189         /* Return zero with the appropriate sign bit.  */
1190         srli    xh, a7, 31
1191         slli    xh, xh, 31
1192         movi    xl, 0
1193         j       .Lmul_done
1194
1195 #if XCHAL_NO_MUL
1196
1197         /* For Xtensa processors with no multiply hardware, this simplified
1198            version of _mulsi3 is used for multiplying 16-bit chunks of
1199            the floating-point mantissas.  When using CALL0, this function
1200            uses a custom ABI: the inputs are passed in a13 and a14, the
1201            result is returned in a12, and a8 and a15 are clobbered.  */
1202         .align  4
1203 .Lmul_mulsi3:
1204         leaf_entry sp, 16
1205         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1206         movi    \dst, 0
1207 1:      add     \tmp1, \src2, \dst
1208         extui   \tmp2, \src1, 0, 1
1209         movnez  \dst, \tmp1, \tmp2
1210
1211         do_addx2 \tmp1, \src2, \dst, \tmp1
1212         extui   \tmp2, \src1, 1, 1
1213         movnez  \dst, \tmp1, \tmp2
1214
1215         do_addx4 \tmp1, \src2, \dst, \tmp1
1216         extui   \tmp2, \src1, 2, 1
1217         movnez  \dst, \tmp1, \tmp2
1218
1219         do_addx8 \tmp1, \src2, \dst, \tmp1
1220         extui   \tmp2, \src1, 3, 1
1221         movnez  \dst, \tmp1, \tmp2
1222
1223         srli    \src1, \src1, 4
1224         slli    \src2, \src2, 4
1225         bnez    \src1, 1b
1226         .endm
1227 #if __XTENSA_CALL0_ABI__
1228         mul_mulsi3_body a12, a13, a14, a15, a8
1229 #else
1230         /* The result will be written into a2, so save that argument in a4.  */
1231         mov     a4, a2
1232         mul_mulsi3_body a2, a4, a3, a5, a6
1233 #endif
1234         leaf_return
1235 #endif /* XCHAL_NO_MUL */
1236 #endif /* L_muldf3 */
1237
1238 #ifdef L_divdf3
1239
1240         /* Division */
1241
1242 #if XCHAL_HAVE_DFP_DIV
1243
1244         .text
1245         .align 4
1246         .global __divdf3
1247         .type   __divdf3, @function
1248 __divdf3:
1249         leaf_entry      sp, 16
1250
1251         wfrd            f1, xh, xl
1252         wfrd            f2, yh, yl
1253
1254         div0.d          f3, f2
1255         nexp01.d        f4, f2
1256         const.d         f0, 1
1257         maddn.d         f0, f4, f3
1258         const.d         f5, 0
1259         mov.d           f7, f2
1260         mkdadj.d        f7, f1
1261         maddn.d         f3, f0, f3
1262         maddn.d         f5, f0, f0
1263         nexp01.d        f1, f1
1264         div0.d          f2, f2
1265         maddn.d         f3, f5, f3
1266         const.d         f5, 1
1267         const.d         f0, 0
1268         neg.d           f6, f1
1269         maddn.d         f5, f4, f3
1270         maddn.d         f0, f6, f2
1271         maddn.d         f3, f5, f3
1272         maddn.d         f6, f4, f0
1273         const.d         f2, 1
1274         maddn.d         f2, f4, f3
1275         maddn.d         f0, f6, f3
1276         neg.d           f1, f1
1277         maddn.d         f3, f2, f3
1278         maddn.d         f1, f4, f0
1279         addexpm.d       f0, f7
1280         addexp.d        f3, f7
1281         divn.d          f0, f1, f3
1282
1283         rfr             xl, f0
1284         rfrd            xh, f0
1285
1286         leaf_return
1287
1288 #else
1289
1290         .literal_position
1291
1292 __divdf3_aux:
1293
1294         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1295            (This code is placed before the start of the function just to
1296            keep it in range of the limited branch displacements.)  */
1297
1298 .Ldiv_yexpzero:
1299         /* Clear the sign bit of y.  */
1300         slli    yh, yh, 1
1301         srli    yh, yh, 1
1302
1303         /* Check for division by zero.  */
1304         or      a10, yh, yl
1305         beqz    a10, .Ldiv_yzero
1306
1307         /* Normalize y.  Adjust the exponent in a9.  */
1308         beqz    yh, .Ldiv_yh_zero
1309         do_nsau a10, yh, a11, a9
1310         addi    a10, a10, -11
1311         ssl     a10
1312         src     yh, yh, yl
1313         sll     yl, yl
1314         movi    a9, 1
1315         sub     a9, a9, a10
1316         j       .Ldiv_ynormalized
1317 .Ldiv_yh_zero:
1318         do_nsau a10, yl, a11, a9
1319         addi    a10, a10, -11
1320         movi    a9, -31
1321         sub     a9, a9, a10
1322         ssl     a10
1323         bltz    a10, .Ldiv_yl_srl
1324         sll     yh, yl
1325         movi    yl, 0
1326         j       .Ldiv_ynormalized
1327 .Ldiv_yl_srl:
1328         srl     yh, yl
1329         sll     yl, yl
1330         j       .Ldiv_ynormalized
1331
1332 .Ldiv_yzero:
1333         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1334         slli    xh, xh, 1
1335         srli    xh, xh, 1
1336         or      xl, xl, xh
1337         srli    xh, a7, 31
1338         slli    xh, xh, 31
1339         or      xh, xh, a6
1340         bnez    xl, 1f
1341         movi    a4, 0x80000     /* make it a quiet NaN */
1342         or      xh, xh, a4
1343 1:      movi    xl, 0
1344         leaf_return
1345
1346 .Ldiv_xexpzero:
1347         /* Clear the sign bit of x.  */
1348         slli    xh, xh, 1
1349         srli    xh, xh, 1
1350
1351         /* If x is zero, return zero.  */
1352         or      a10, xh, xl
1353         beqz    a10, .Ldiv_return_zero
1354
1355         /* Normalize x.  Adjust the exponent in a8.  */
1356         beqz    xh, .Ldiv_xh_zero
1357         do_nsau a10, xh, a11, a8
1358         addi    a10, a10, -11
1359         ssl     a10
1360         src     xh, xh, xl
1361         sll     xl, xl
1362         movi    a8, 1
1363         sub     a8, a8, a10
1364         j       .Ldiv_xnormalized
1365 .Ldiv_xh_zero:
1366         do_nsau a10, xl, a11, a8
1367         addi    a10, a10, -11
1368         movi    a8, -31
1369         sub     a8, a8, a10
1370         ssl     a10
1371         bltz    a10, .Ldiv_xl_srl
1372         sll     xh, xl
1373         movi    xl, 0
1374         j       .Ldiv_xnormalized
1375 .Ldiv_xl_srl:
1376         srl     xh, xl
1377         sll     xl, xl
1378         j       .Ldiv_xnormalized
1379
1380 .Ldiv_return_zero:
1381         /* Return zero with the appropriate sign bit.  */
1382         srli    xh, a7, 31
1383         slli    xh, xh, 31
1384         movi    xl, 0
1385         leaf_return
1386
1387 .Ldiv_xnan_or_inf:
1388         /* Set the sign bit of the result.  */
1389         srli    a7, yh, 31
1390         slli    a7, a7, 31
1391         xor     xh, xh, a7
1392         /* If y is NaN or Inf, return NaN.  */
1393         ball    yh, a6, .Ldiv_return_nan
1394         slli    a8, xh, 12
1395         or      a8, a8, xl
1396         bnez    a8, .Ldiv_return_nan
1397         leaf_return
1398
1399 .Ldiv_ynan_or_inf:
1400         /* If y is Infinity, return zero.  */
1401         slli    a8, yh, 12
1402         or      a8, a8, yl
1403         beqz    a8, .Ldiv_return_zero
1404         /* y is NaN; return it.  */
1405         mov     xh, yh
1406         mov     xl, yl
1407
1408 .Ldiv_return_nan:
1409         movi    a4, 0x80000     /* make it a quiet NaN */
1410         or      xh, xh, a4
1411         leaf_return
1412
1413 .Ldiv_highequal1:
1414         bltu    xl, yl, 2f
1415         j       3f
1416
1417         .align  4
1418         .global __divdf3
1419         .type   __divdf3, @function
1420 __divdf3:
1421         leaf_entry sp, 16
1422         movi    a6, 0x7ff00000
1423
1424         /* Get the sign of the result.  */
1425         xor     a7, xh, yh
1426
1427         /* Check for NaN and infinity.  */
1428         ball    xh, a6, .Ldiv_xnan_or_inf
1429         ball    yh, a6, .Ldiv_ynan_or_inf
1430
1431         /* Extract the exponents.  */
1432         extui   a8, xh, 20, 11
1433         extui   a9, yh, 20, 11
1434
1435         beqz    a9, .Ldiv_yexpzero
1436 .Ldiv_ynormalized:
1437         beqz    a8, .Ldiv_xexpzero
1438 .Ldiv_xnormalized:
1439
1440         /* Subtract the exponents.  */
1441         sub     a8, a8, a9
1442
1443         /* Replace sign/exponent fields with explicit "1.0".  */
1444         movi    a10, 0x1fffff
1445         or      xh, xh, a6
1446         and     xh, xh, a10
1447         or      yh, yh, a6
1448         and     yh, yh, a10
1449
1450         /* Set SAR for left shift by one.  */
1451         ssai    (32 - 1)
1452
1453         /* The first digit of the mantissa division must be a one.
1454            Shift x (and adjust the exponent) as needed to make this true.  */
1455         bltu    yh, xh, 3f
1456         beq     yh, xh, .Ldiv_highequal1
1457 2:      src     xh, xh, xl
1458         sll     xl, xl
1459         addi    a8, a8, -1
1460 3:
1461         /* Do the first subtraction and shift.  */
1462         sub     xh, xh, yh
1463         bgeu    xl, yl, 1f
1464         addi    xh, xh, -1
1465 1:      sub     xl, xl, yl
1466         src     xh, xh, xl
1467         sll     xl, xl
1468
1469         /* Put the quotient into a10/a11.  */
1470         movi    a10, 0
1471         movi    a11, 1
1472
1473         /* Divide one bit at a time for 52 bits.  */
1474         movi    a9, 52
1475 #if XCHAL_HAVE_LOOPS
1476         loop    a9, .Ldiv_loopend
1477 #endif
1478 .Ldiv_loop:
1479         /* Shift the quotient << 1.  */
1480         src     a10, a10, a11
1481         sll     a11, a11
1482
1483         /* Is this digit a 0 or 1?  */
1484         bltu    xh, yh, 3f
1485         beq     xh, yh, .Ldiv_highequal2
1486
1487         /* Output a 1 and subtract.  */
1488 2:      addi    a11, a11, 1
1489         sub     xh, xh, yh
1490         bgeu    xl, yl, 1f
1491         addi    xh, xh, -1
1492 1:      sub     xl, xl, yl
1493
1494         /* Shift the dividend << 1.  */
1495 3:      src     xh, xh, xl
1496         sll     xl, xl
1497
1498 #if !XCHAL_HAVE_LOOPS
1499         addi    a9, a9, -1
1500         bnez    a9, .Ldiv_loop
1501 #endif
1502 .Ldiv_loopend:
1503
1504         /* Add the exponent bias (less one to account for the explicit "1.0"
1505            of the mantissa that will be added to the exponent in the final
1506            result).  */
1507         movi    a9, 0x3fe
1508         add     a8, a8, a9
1509
1510         /* Check for over/underflow.  The value in a8 is one less than the
1511            final exponent, so values in the range 0..7fd are OK here.  */
1512         addmi   a9, a9, 0x400   /* 0x7fe */
1513         bgeu    a8, a9, .Ldiv_overflow
1514
1515 .Ldiv_round:
1516         /* Round.  The remainder (<< 1) is in xh/xl.  */
1517         bltu    xh, yh, .Ldiv_rounded
1518         beq     xh, yh, .Ldiv_highequal3
1519 .Ldiv_roundup:
1520         addi    a11, a11, 1
1521         beqz    a11, .Ldiv_roundcarry
1522
1523 .Ldiv_rounded:
1524         mov     xl, a11
1525         /* Add the exponent to the mantissa.  */
1526         slli    a8, a8, 20
1527         add     xh, a10, a8
1528
1529 .Ldiv_addsign:
1530         /* Add the sign bit.  */
1531         srli    a7, a7, 31
1532         slli    a7, a7, 31
1533         or      xh, xh, a7
1534         leaf_return
1535
1536 .Ldiv_highequal2:
1537         bgeu    xl, yl, 2b
1538         j       3b
1539
1540 .Ldiv_highequal3:
1541         bltu    xl, yl, .Ldiv_rounded
1542         bne     xl, yl, .Ldiv_roundup
1543
1544         /* Remainder is exactly half the divisor.  Round even.  */
1545         addi    a11, a11, 1
1546         beqz    a11, .Ldiv_roundcarry
1547         srli    a11, a11, 1
1548         slli    a11, a11, 1
1549         j       .Ldiv_rounded
1550
1551 .Ldiv_overflow:
1552         bltz    a8, .Ldiv_underflow
1553         /* Return +/- Infinity.  */
1554         addi    a8, a9, 1       /* 0x7ff */
1555         slli    xh, a8, 20
1556         movi    xl, 0
1557         j       .Ldiv_addsign
1558
1559 .Ldiv_underflow:
1560         /* Create a subnormal value, where the exponent field contains zero,
1561            but the effective exponent is 1.  The value of a8 is one less than
1562            the actual exponent, so just negate it to get the shift amount.  */
1563         neg     a8, a8
1564         ssr     a8
1565         bgeui   a8, 32, .Ldiv_bigshift
1566
1567         /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1568            saved in a6 for rounding the result.  */
1569         sll     a6, a11
1570         src     a11, a10, a11
1571         srl     a10, a10
1572         j       1f
1573
1574 .Ldiv_bigshift:
1575         bgeui   a8, 64, .Ldiv_flush_to_zero
1576         sll     a9, a11         /* lost bits shifted out of a11 */
1577         src     a6, a10, a11
1578         srl     a11, a10
1579         movi    a10, 0
1580         or      xl, xl, a9
1581
1582         /* Set the exponent to zero.  */
1583 1:      movi    a8, 0
1584
1585         /* Pack any nonzero remainder (in xh/xl) into a6.  */
1586         or      xh, xh, xl
1587         beqz    xh, 1f
1588         movi    a9, 1
1589         or      a6, a6, a9
1590
1591         /* Round a10/a11 based on the bits shifted out into a6.  */
1592 1:      bgez    a6, .Ldiv_rounded
1593         addi    a11, a11, 1
1594         beqz    a11, .Ldiv_roundcarry
1595         slli    a6, a6, 1
1596         bnez    a6, .Ldiv_rounded
1597         srli    a11, a11, 1
1598         slli    a11, a11, 1
1599         j       .Ldiv_rounded
1600
1601 .Ldiv_roundcarry:
1602         /* a11 is always zero when the rounding increment overflows, so
1603            there's no need to round it to an even value.  */
1604         addi    a10, a10, 1
1605         /* Overflow to the exponent field is OK.  */
1606         j       .Ldiv_rounded
1607
1608 .Ldiv_flush_to_zero:
1609         /* Return zero with the appropriate sign bit.  */
1610         srli    xh, a7, 31
1611         slli    xh, xh, 31
1612         movi    xl, 0
1613         leaf_return
1614
1615 #endif /* XCHAL_HAVE_DFP_DIV */
1616
1617 #endif /* L_divdf3 */
1618
1619 #ifdef L_cmpdf2
1620
1621         /* Equal and Not Equal */
1622
1623         .align  4
1624         .global __eqdf2
1625         .global __nedf2
1626         .set    __nedf2, __eqdf2
1627         .type   __eqdf2, @function
1628 __eqdf2:
1629         leaf_entry sp, 16
1630         bne     xl, yl, 2f
1631         bne     xh, yh, 4f
1632
1633         /* The values are equal but NaN != NaN.  Check the exponent.  */
1634         movi    a6, 0x7ff00000
1635         ball    xh, a6, 3f
1636
1637         /* Equal.  */
1638         movi    a2, 0
1639         leaf_return
1640
1641         /* Not equal.  */
1642 2:      movi    a2, 1
1643         leaf_return
1644
1645         /* Check if the mantissas are nonzero.  */
1646 3:      slli    a7, xh, 12
1647         or      a7, a7, xl
1648         j       5f
1649
1650         /* Check if x and y are zero with different signs.  */
1651 4:      or      a7, xh, yh
1652         slli    a7, a7, 1
1653         or      a7, a7, xl      /* xl == yl here */
1654
1655         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1656            or x when exponent(x) = 0x7ff and x == y.  */
1657 5:      movi    a2, 0
1658         movi    a3, 1
1659         movnez  a2, a3, a7
1660         leaf_return
1661
1662
1663         /* Greater Than */
1664
1665         .align  4
1666         .global __gtdf2
1667         .type   __gtdf2, @function
1668 __gtdf2:
1669         leaf_entry sp, 16
1670         movi    a6, 0x7ff00000
1671         ball    xh, a6, 2f
1672 1:      bnall   yh, a6, .Lle_cmp
1673
1674         /* Check if y is a NaN.  */
1675         slli    a7, yh, 12
1676         or      a7, a7, yl
1677         beqz    a7, .Lle_cmp
1678         movi    a2, 0
1679         leaf_return
1680
1681         /* Check if x is a NaN.  */
1682 2:      slli    a7, xh, 12
1683         or      a7, a7, xl
1684         beqz    a7, 1b
1685         movi    a2, 0
1686         leaf_return
1687
1688
1689         /* Less Than or Equal */
1690
1691         .align  4
1692         .global __ledf2
1693         .type   __ledf2, @function
1694 __ledf2:
1695         leaf_entry sp, 16
1696         movi    a6, 0x7ff00000
1697         ball    xh, a6, 2f
1698 1:      bnall   yh, a6, .Lle_cmp
1699
1700         /* Check if y is a NaN.  */
1701         slli    a7, yh, 12
1702         or      a7, a7, yl
1703         beqz    a7, .Lle_cmp
1704         movi    a2, 1
1705         leaf_return
1706
1707         /* Check if x is a NaN.  */
1708 2:      slli    a7, xh, 12
1709         or      a7, a7, xl
1710         beqz    a7, 1b
1711         movi    a2, 1
1712         leaf_return
1713
1714 .Lle_cmp:
1715         /* Check if x and y have different signs.  */
1716         xor     a7, xh, yh
1717         bltz    a7, .Lle_diff_signs
1718
1719         /* Check if x is negative.  */
1720         bltz    xh, .Lle_xneg
1721
1722         /* Check if x <= y.  */
1723         bltu    xh, yh, 4f
1724         bne     xh, yh, 5f
1725         bltu    yl, xl, 5f
1726 4:      movi    a2, 0
1727         leaf_return
1728
1729 .Lle_xneg:
1730         /* Check if y <= x.  */
1731         bltu    yh, xh, 4b
1732         bne     yh, xh, 5f
1733         bgeu    xl, yl, 4b
1734 5:      movi    a2, 1
1735         leaf_return
1736
1737 .Lle_diff_signs:
1738         bltz    xh, 4b
1739
1740         /* Check if both x and y are zero.  */
1741         or      a7, xh, yh
1742         slli    a7, a7, 1
1743         or      a7, a7, xl
1744         or      a7, a7, yl
1745         movi    a2, 1
1746         movi    a3, 0
1747         moveqz  a2, a3, a7
1748         leaf_return
1749
1750
1751         /* Greater Than or Equal */
1752
1753         .align  4
1754         .global __gedf2
1755         .type   __gedf2, @function
1756 __gedf2:
1757         leaf_entry sp, 16
1758         movi    a6, 0x7ff00000
1759         ball    xh, a6, 2f
1760 1:      bnall   yh, a6, .Llt_cmp
1761
1762         /* Check if y is a NaN.  */
1763         slli    a7, yh, 12
1764         or      a7, a7, yl
1765         beqz    a7, .Llt_cmp
1766         movi    a2, -1
1767         leaf_return
1768
1769         /* Check if x is a NaN.  */
1770 2:      slli    a7, xh, 12
1771         or      a7, a7, xl
1772         beqz    a7, 1b
1773         movi    a2, -1
1774         leaf_return
1775
1776
1777         /* Less Than */
1778
1779         .align  4
1780         .global __ltdf2
1781         .type   __ltdf2, @function
1782 __ltdf2:
1783         leaf_entry sp, 16
1784         movi    a6, 0x7ff00000
1785         ball    xh, a6, 2f
1786 1:      bnall   yh, a6, .Llt_cmp
1787
1788         /* Check if y is a NaN.  */
1789         slli    a7, yh, 12
1790         or      a7, a7, yl
1791         beqz    a7, .Llt_cmp
1792         movi    a2, 0
1793         leaf_return
1794
1795         /* Check if x is a NaN.  */
1796 2:      slli    a7, xh, 12
1797         or      a7, a7, xl
1798         beqz    a7, 1b
1799         movi    a2, 0
1800         leaf_return
1801
1802 .Llt_cmp:
1803         /* Check if x and y have different signs.  */
1804         xor     a7, xh, yh
1805         bltz    a7, .Llt_diff_signs
1806
1807         /* Check if x is negative.  */
1808         bltz    xh, .Llt_xneg
1809
1810         /* Check if x < y.  */
1811         bltu    xh, yh, 4f
1812         bne     xh, yh, 5f
1813         bgeu    xl, yl, 5f
1814 4:      movi    a2, -1
1815         leaf_return
1816
1817 .Llt_xneg:
1818         /* Check if y < x.  */
1819         bltu    yh, xh, 4b
1820         bne     yh, xh, 5f
1821         bltu    yl, xl, 4b
1822 5:      movi    a2, 0
1823         leaf_return
1824
1825 .Llt_diff_signs:
1826         bgez    xh, 5b
1827
1828         /* Check if both x and y are nonzero.  */
1829         or      a7, xh, yh
1830         slli    a7, a7, 1
1831         or      a7, a7, xl
1832         or      a7, a7, yl
1833         movi    a2, 0
1834         movi    a3, -1
1835         movnez  a2, a3, a7
1836         leaf_return
1837
1838
1839         /* Unordered */
1840
1841         .align  4
1842         .global __unorddf2
1843         .type   __unorddf2, @function
1844 __unorddf2:
1845         leaf_entry sp, 16
1846         movi    a6, 0x7ff00000
1847         ball    xh, a6, 3f
1848 1:      ball    yh, a6, 4f
1849 2:      movi    a2, 0
1850         leaf_return
1851
1852 3:      slli    a7, xh, 12
1853         or      a7, a7, xl
1854         beqz    a7, 1b
1855         movi    a2, 1
1856         leaf_return
1857
1858 4:      slli    a7, yh, 12
1859         or      a7, a7, yl
1860         beqz    a7, 2b
1861         movi    a2, 1
1862         leaf_return
1863
1864 #endif /* L_cmpdf2 */
1865
1866 #ifdef L_fixdfsi
1867
1868         .align  4
1869         .global __fixdfsi
1870         .type   __fixdfsi, @function
1871 __fixdfsi:
1872         leaf_entry sp, 16
1873
1874         /* Check for NaN and Infinity.  */
1875         movi    a6, 0x7ff00000
1876         ball    xh, a6, .Lfixdfsi_nan_or_inf
1877
1878         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1879         extui   a4, xh, 20, 11
1880         extui   a5, a6, 19, 10  /* 0x3fe */
1881         sub     a4, a4, a5
1882         bgei    a4, 32, .Lfixdfsi_maxint
1883         blti    a4, 1, .Lfixdfsi_zero
1884
1885         /* Add explicit "1.0" and shift << 11.  */
1886         or      a7, xh, a6
1887         ssai    (32 - 11)
1888         src     a5, a7, xl
1889
1890         /* Shift back to the right, based on the exponent.  */
1891         ssl     a4              /* shift by 32 - a4 */
1892         srl     a5, a5
1893
1894         /* Negate the result if sign != 0.  */
1895         neg     a2, a5
1896         movgez  a2, a5, a7
1897         leaf_return
1898
1899 .Lfixdfsi_nan_or_inf:
1900         /* Handle Infinity and NaN.  */
1901         slli    a4, xh, 12
1902         or      a4, a4, xl
1903         beqz    a4, .Lfixdfsi_maxint
1904
1905         /* Translate NaN to +maxint.  */
1906         movi    xh, 0
1907
1908 .Lfixdfsi_maxint:
1909         slli    a4, a6, 11      /* 0x80000000 */
1910         addi    a5, a4, -1      /* 0x7fffffff */
1911         movgez  a4, a5, xh
1912         mov     a2, a4
1913         leaf_return
1914
1915 .Lfixdfsi_zero:
1916         movi    a2, 0
1917         leaf_return
1918
1919 #endif /* L_fixdfsi */
1920
1921 #ifdef L_fixdfdi
1922
1923         .align  4
1924         .global __fixdfdi
1925         .type   __fixdfdi, @function
1926 __fixdfdi:
1927         leaf_entry sp, 16
1928
1929         /* Check for NaN and Infinity.  */
1930         movi    a6, 0x7ff00000
1931         ball    xh, a6, .Lfixdfdi_nan_or_inf
1932
1933         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1934         extui   a4, xh, 20, 11
1935         extui   a5, a6, 19, 10  /* 0x3fe */
1936         sub     a4, a4, a5
1937         bgei    a4, 64, .Lfixdfdi_maxint
1938         blti    a4, 1, .Lfixdfdi_zero
1939
1940         /* Add explicit "1.0" and shift << 11.  */
1941         or      a7, xh, a6
1942         ssai    (32 - 11)
1943         src     xh, a7, xl
1944         sll     xl, xl
1945
1946         /* Shift back to the right, based on the exponent.  */
1947         ssl     a4              /* shift by 64 - a4 */
1948         bgei    a4, 32, .Lfixdfdi_smallshift
1949         srl     xl, xh
1950         movi    xh, 0
1951
1952 .Lfixdfdi_shifted:
1953         /* Negate the result if sign != 0.  */
1954         bgez    a7, 1f
1955         neg     xl, xl
1956         neg     xh, xh
1957         beqz    xl, 1f
1958         addi    xh, xh, -1
1959 1:      leaf_return
1960
1961 .Lfixdfdi_smallshift:
1962         src     xl, xh, xl
1963         srl     xh, xh
1964         j       .Lfixdfdi_shifted
1965
1966 .Lfixdfdi_nan_or_inf:
1967         /* Handle Infinity and NaN.  */
1968         slli    a4, xh, 12
1969         or      a4, a4, xl
1970         beqz    a4, .Lfixdfdi_maxint
1971
1972         /* Translate NaN to +maxint.  */
1973         movi    xh, 0
1974
1975 .Lfixdfdi_maxint:
1976         slli    a7, a6, 11      /* 0x80000000 */
1977         bgez    xh, 1f
1978         mov     xh, a7
1979         movi    xl, 0
1980         leaf_return
1981
1982 1:      addi    xh, a7, -1      /* 0x7fffffff */
1983         movi    xl, -1
1984         leaf_return
1985
1986 .Lfixdfdi_zero:
1987         movi    xh, 0
1988         movi    xl, 0
1989         leaf_return
1990
1991 #endif /* L_fixdfdi */
1992
1993 #ifdef L_fixunsdfsi
1994
1995         .align  4
1996         .global __fixunsdfsi
1997         .type   __fixunsdfsi, @function
1998 __fixunsdfsi:
1999         leaf_entry sp, 16
2000
2001         /* Check for NaN and Infinity.  */
2002         movi    a6, 0x7ff00000
2003         ball    xh, a6, .Lfixunsdfsi_nan_or_inf
2004
2005         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
2006         extui   a4, xh, 20, 11
2007         extui   a5, a6, 20, 10  /* 0x3ff */
2008         sub     a4, a4, a5
2009         bgei    a4, 32, .Lfixunsdfsi_maxint
2010         bltz    a4, .Lfixunsdfsi_zero
2011
2012         /* Add explicit "1.0" and shift << 11.  */
2013         or      a7, xh, a6
2014         ssai    (32 - 11)
2015         src     a5, a7, xl
2016
2017         /* Shift back to the right, based on the exponent.  */
2018         addi    a4, a4, 1
2019         beqi    a4, 32, .Lfixunsdfsi_bigexp
2020         ssl     a4              /* shift by 32 - a4 */
2021         srl     a5, a5
2022
2023         /* Negate the result if sign != 0.  */
2024         neg     a2, a5
2025         movgez  a2, a5, a7
2026         leaf_return
2027
2028 .Lfixunsdfsi_nan_or_inf:
2029         /* Handle Infinity and NaN.  */
2030         slli    a4, xh, 12
2031         or      a4, a4, xl
2032         beqz    a4, .Lfixunsdfsi_maxint
2033
2034         /* Translate NaN to 0xffffffff.  */
2035         movi    a2, -1
2036         leaf_return
2037
2038 .Lfixunsdfsi_maxint:
2039         slli    a4, a6, 11      /* 0x80000000 */
2040         movi    a5, -1          /* 0xffffffff */
2041         movgez  a4, a5, xh
2042         mov     a2, a4
2043         leaf_return
2044
2045 .Lfixunsdfsi_zero:
2046         movi    a2, 0
2047         leaf_return
2048
2049 .Lfixunsdfsi_bigexp:
2050         /* Handle unsigned maximum exponent case.  */
2051         bltz    xh, 1f
2052         mov     a2, a5          /* no shift needed */
2053         leaf_return
2054
2055         /* Return 0x80000000 if negative.  */
2056 1:      slli    a2, a6, 11
2057         leaf_return
2058
2059 #endif /* L_fixunsdfsi */
2060
2061 #ifdef L_fixunsdfdi
2062
2063         .align  4
2064         .global __fixunsdfdi
2065         .type   __fixunsdfdi, @function
2066 __fixunsdfdi:
2067         leaf_entry sp, 16
2068
2069         /* Check for NaN and Infinity.  */
2070         movi    a6, 0x7ff00000
2071         ball    xh, a6, .Lfixunsdfdi_nan_or_inf
2072
2073         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
2074         extui   a4, xh, 20, 11
2075         extui   a5, a6, 20, 10  /* 0x3ff */
2076         sub     a4, a4, a5
2077         bgei    a4, 64, .Lfixunsdfdi_maxint
2078         bltz    a4, .Lfixunsdfdi_zero
2079
2080         /* Add explicit "1.0" and shift << 11.  */
2081         or      a7, xh, a6
2082         ssai    (32 - 11)
2083         src     xh, a7, xl
2084         sll     xl, xl
2085
2086         /* Shift back to the right, based on the exponent.  */
2087         addi    a4, a4, 1
2088         beqi    a4, 64, .Lfixunsdfdi_bigexp
2089         ssl     a4              /* shift by 64 - a4 */
2090         bgei    a4, 32, .Lfixunsdfdi_smallshift
2091         srl     xl, xh
2092         movi    xh, 0
2093
2094 .Lfixunsdfdi_shifted:
2095         /* Negate the result if sign != 0.  */
2096         bgez    a7, 1f
2097         neg     xl, xl
2098         neg     xh, xh
2099         beqz    xl, 1f
2100         addi    xh, xh, -1
2101 1:      leaf_return
2102
2103 .Lfixunsdfdi_smallshift:
2104         src     xl, xh, xl
2105         srl     xh, xh
2106         j       .Lfixunsdfdi_shifted
2107
2108 .Lfixunsdfdi_nan_or_inf:
2109         /* Handle Infinity and NaN.  */
2110         slli    a4, xh, 12
2111         or      a4, a4, xl
2112         beqz    a4, .Lfixunsdfdi_maxint
2113
2114         /* Translate NaN to 0xffffffff.... */
2115 1:      movi    xh, -1
2116         movi    xl, -1
2117         leaf_return
2118
2119 .Lfixunsdfdi_maxint:
2120         bgez    xh, 1b
2121 2:      slli    xh, a6, 11      /* 0x80000000 */
2122         movi    xl, 0
2123         leaf_return
2124
2125 .Lfixunsdfdi_zero:
2126         movi    xh, 0
2127         movi    xl, 0
2128         leaf_return
2129
2130 .Lfixunsdfdi_bigexp:
2131         /* Handle unsigned maximum exponent case.  */
2132         bltz    a7, 2b
2133         leaf_return             /* no shift needed */
2134
2135 #endif /* L_fixunsdfdi */
2136
2137 #ifdef L_floatsidf
2138
2139         .align  4
2140         .global __floatunsidf
2141         .type   __floatunsidf, @function
2142 __floatunsidf:
2143         leaf_entry sp, 16
2144         beqz    a2, .Lfloatsidf_return_zero
2145
2146         /* Set the sign to zero and jump to the floatsidf code.  */
2147         movi    a7, 0
2148         j       .Lfloatsidf_normalize
2149
2150         .align  4
2151         .global __floatsidf
2152         .type   __floatsidf, @function
2153 __floatsidf:
2154         leaf_entry sp, 16
2155
2156         /* Check for zero.  */
2157         beqz    a2, .Lfloatsidf_return_zero
2158
2159         /* Save the sign.  */
2160         extui   a7, a2, 31, 1
2161
2162         /* Get the absolute value.  */
2163 #if XCHAL_HAVE_ABS
2164         abs     a2, a2
2165 #else
2166         neg     a4, a2
2167         movltz  a2, a4, a2
2168 #endif
2169
2170 .Lfloatsidf_normalize:
2171         /* Normalize with the first 1 bit in the msb.  */
2172         do_nsau a4, a2, a5, a6
2173         ssl     a4
2174         sll     a5, a2
2175
2176         /* Shift the mantissa into position.  */
2177         srli    xh, a5, 11
2178         slli    xl, a5, (32 - 11)
2179
2180         /* Set the exponent.  */
2181         movi    a5, 0x41d       /* 0x3fe + 31 */
2182         sub     a5, a5, a4
2183         slli    a5, a5, 20
2184         add     xh, xh, a5
2185
2186         /* Add the sign and return. */
2187         slli    a7, a7, 31
2188         or      xh, xh, a7
2189         leaf_return
2190
2191 .Lfloatsidf_return_zero:
2192         movi    a3, 0
2193         leaf_return
2194
2195 #endif /* L_floatsidf */
2196
2197 #ifdef L_floatdidf
2198
2199         .align  4
2200         .global __floatundidf
2201         .type   __floatundidf, @function
2202 __floatundidf:
2203         leaf_entry sp, 16
2204
2205         /* Check for zero.  */
2206         or      a4, xh, xl
2207         beqz    a4, 2f
2208
2209         /* Set the sign to zero and jump to the floatdidf code.  */
2210         movi    a7, 0
2211         j       .Lfloatdidf_normalize
2212
2213         .align  4
2214         .global __floatdidf
2215         .type   __floatdidf, @function
2216 __floatdidf:
2217         leaf_entry sp, 16
2218
2219         /* Check for zero.  */
2220         or      a4, xh, xl
2221         beqz    a4, 2f
2222
2223         /* Save the sign.  */
2224         extui   a7, xh, 31, 1
2225
2226         /* Get the absolute value.  */
2227         bgez    xh, .Lfloatdidf_normalize
2228         neg     xl, xl
2229         neg     xh, xh
2230         beqz    xl, .Lfloatdidf_normalize
2231         addi    xh, xh, -1
2232
2233 .Lfloatdidf_normalize:
2234         /* Normalize with the first 1 bit in the msb of xh.  */
2235         beqz    xh, .Lfloatdidf_bigshift
2236         do_nsau a4, xh, a5, a6
2237         ssl     a4
2238         src     xh, xh, xl
2239         sll     xl, xl
2240
2241 .Lfloatdidf_shifted:
2242         /* Shift the mantissa into position, with rounding bits in a6.  */
2243         ssai    11
2244         sll     a6, xl
2245         src     xl, xh, xl
2246         srl     xh, xh
2247
2248         /* Set the exponent.  */
2249         movi    a5, 0x43d       /* 0x3fe + 63 */
2250         sub     a5, a5, a4
2251         slli    a5, a5, 20
2252         add     xh, xh, a5
2253
2254         /* Add the sign.  */
2255         slli    a7, a7, 31
2256         or      xh, xh, a7
2257
2258         /* Round up if the leftover fraction is >= 1/2.  */
2259         bgez    a6, 2f
2260         addi    xl, xl, 1
2261         beqz    xl, .Lfloatdidf_roundcarry
2262
2263         /* Check if the leftover fraction is exactly 1/2.  */
2264         slli    a6, a6, 1
2265         beqz    a6, .Lfloatdidf_exactlyhalf
2266 2:      leaf_return
2267
2268 .Lfloatdidf_bigshift:
2269         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2270         do_nsau a4, xl, a5, a6
2271         ssl     a4
2272         sll     xh, xl
2273         movi    xl, 0
2274         addi    a4, a4, 32
2275         j       .Lfloatdidf_shifted
2276
2277 .Lfloatdidf_exactlyhalf:
2278         /* Round down to the nearest even value.  */
2279         srli    xl, xl, 1
2280         slli    xl, xl, 1
2281         leaf_return
2282
2283 .Lfloatdidf_roundcarry:
2284         /* xl is always zero when the rounding increment overflows, so
2285            there's no need to round it to an even value.  */
2286         addi    xh, xh, 1
2287         /* Overflow to the exponent is OK.  */
2288         leaf_return
2289
2290 #endif /* L_floatdidf */
2291
2292 #ifdef L_truncdfsf2
2293
2294         .align  4
2295         .global __truncdfsf2
2296         .type   __truncdfsf2, @function
2297 __truncdfsf2:
2298         leaf_entry sp, 16
2299
2300         /* Adjust the exponent bias.  */
2301         movi    a4, (0x3ff - 0x7f) << 20
2302         sub     a5, xh, a4
2303
2304         /* Check for underflow.  */
2305         xor     a6, xh, a5
2306         bltz    a6, .Ltrunc_underflow
2307         extui   a6, a5, 20, 11
2308         beqz    a6, .Ltrunc_underflow
2309
2310         /* Check for overflow.  */
2311         movi    a4, 255
2312         bge     a6, a4, .Ltrunc_overflow
2313
2314         /* Shift a5/xl << 3 into a5/a4.  */
2315         ssai    (32 - 3)
2316         src     a5, a5, xl
2317         sll     a4, xl
2318
2319 .Ltrunc_addsign:
2320         /* Add the sign bit.  */
2321         extui   a6, xh, 31, 1
2322         slli    a6, a6, 31
2323         or      a2, a6, a5
2324
2325         /* Round up if the leftover fraction is >= 1/2.  */
2326         bgez    a4, 1f
2327         addi    a2, a2, 1
2328         /* Overflow to the exponent is OK.  The answer will be correct.  */
2329
2330         /* Check if the leftover fraction is exactly 1/2.  */
2331         slli    a4, a4, 1
2332         beqz    a4, .Ltrunc_exactlyhalf
2333 1:      leaf_return
2334
2335 .Ltrunc_exactlyhalf:
2336         /* Round down to the nearest even value.  */
2337         srli    a2, a2, 1
2338         slli    a2, a2, 1
2339         leaf_return
2340
2341 .Ltrunc_overflow:
2342         /* Check if exponent == 0x7ff.  */
2343         movi    a4, 0x7ff00000
2344         bnall   xh, a4, 1f
2345
2346         /* Check if mantissa is nonzero.  */
2347         slli    a5, xh, 12
2348         or      a5, a5, xl
2349         beqz    a5, 1f
2350
2351         /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2352         srli    a4, a4, 1
2353
2354 1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2355         /* Add the sign bit.  */
2356         extui   a6, xh, 31, 1
2357         ssai    1
2358         src     a2, a6, a4
2359         leaf_return
2360
2361 .Ltrunc_underflow:
2362         /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2363         extui   a6, xh, 20, 11
2364         movi    a5, 0x3ff - 0x7f
2365         sub     a6, a5, a6
2366         addi    a6, a6, 1
2367         bgeui   a6, 32, 1f
2368
2369         /* Replace the exponent with an explicit "1.0".  */
2370         slli    a5, a5, 13      /* 0x700000 */
2371         or      a5, a5, xh
2372         slli    a5, a5, 11
2373         srli    a5, a5, 11
2374
2375         /* Shift the mantissa left by 3 bits (into a5/a4).  */
2376         ssai    (32 - 3)
2377         src     a5, a5, xl
2378         sll     a4, xl
2379
2380         /* Shift right by a6.  */
2381         ssr     a6
2382         sll     a7, a4
2383         src     a4, a5, a4
2384         srl     a5, a5
2385         beqz    a7, .Ltrunc_addsign
2386         or      a4, a4, a6      /* any positive, nonzero value will work */
2387         j       .Ltrunc_addsign
2388
2389         /* Return +/- zero.  */
2390 1:      extui   a2, xh, 31, 1
2391         slli    a2, a2, 31
2392         leaf_return
2393
2394 #endif /* L_truncdfsf2 */
2395
2396 #ifdef L_extendsfdf2
2397
2398         .align  4
2399         .global __extendsfdf2
2400         .type   __extendsfdf2, @function
2401 __extendsfdf2:
2402         leaf_entry sp, 16
2403
2404         /* Save the sign bit and then shift it off.  */
2405         extui   a5, a2, 31, 1
2406         slli    a5, a5, 31
2407         slli    a4, a2, 1
2408
2409         /* Extract and check the exponent.  */
2410         extui   a6, a2, 23, 8
2411         beqz    a6, .Lextend_expzero
2412         addi    a6, a6, 1
2413         beqi    a6, 256, .Lextend_nan_or_inf
2414
2415         /* Shift >> 3 into a4/xl.  */
2416         srli    a4, a4, 4
2417         slli    xl, a2, (32 - 3)
2418
2419         /* Adjust the exponent bias.  */
2420         movi    a6, (0x3ff - 0x7f) << 20
2421         add     a4, a4, a6
2422
2423         /* Add the sign bit.  */
2424         or      xh, a4, a5
2425         leaf_return
2426
2427 .Lextend_nan_or_inf:
2428         movi    a4, 0x7ff00000
2429
2430         /* Check for NaN.  */
2431         slli    a7, a2, 9
2432         beqz    a7, 1f
2433
2434         slli    a6, a6, 11      /* 0x80000 */
2435         or      a4, a4, a6
2436
2437         /* Add the sign and return.  */
2438 1:      or      xh, a4, a5
2439         movi    xl, 0
2440         leaf_return
2441
2442 .Lextend_expzero:
2443         beqz    a4, 1b
2444
2445         /* Normalize it to have 8 zero bits before the first 1 bit.  */
2446         do_nsau a7, a4, a2, a3
2447         addi    a7, a7, -8
2448         ssl     a7
2449         sll     a4, a4
2450
2451         /* Shift >> 3 into a4/xl.  */
2452         slli    xl, a4, (32 - 3)
2453         srli    a4, a4, 3
2454
2455         /* Set the exponent.  */
2456         movi    a6, 0x3fe - 0x7f
2457         sub     a6, a6, a7
2458         slli    a6, a6, 20
2459         add     a4, a4, a6
2460
2461         /* Add the sign and return.  */
2462         or      xh, a4, a5
2463         leaf_return
2464
2465 #endif /* L_extendsfdf2 */
2466
2467
2468 #if XCHAL_HAVE_DFP_SQRT
2469 #ifdef L_sqrt
2470
2471         .text
2472         .align 4
2473         .global __ieee754_sqrt
2474         .type   __ieee754_sqrt, @function
2475 __ieee754_sqrt:
2476         leaf_entry      sp, 16
2477
2478         wfrd            f1, xh, xl
2479
2480         sqrt0.d         f2, f1
2481         const.d         f4, 0
2482         maddn.d         f4, f2, f2
2483         nexp01.d        f3, f1
2484         const.d         f0, 3
2485         addexp.d        f3, f0
2486         maddn.d         f0, f4, f3
2487         nexp01.d        f4, f1
2488         maddn.d         f2, f0, f2
2489         const.d         f5, 0
2490         maddn.d         f5, f2, f3
2491         const.d         f0, 3
2492         maddn.d         f0, f5, f2
2493         neg.d           f6, f4
2494         maddn.d         f2, f0, f2
2495         const.d         f0, 0
2496         const.d         f5, 0
2497         const.d         f7, 0
2498         maddn.d         f0, f6, f2
2499         maddn.d         f5, f2, f3
2500         const.d         f3, 3
2501         maddn.d         f7, f3, f2
2502         maddn.d         f4, f0, f0
2503         maddn.d         f3, f5, f2
2504         neg.d           f2, f7
2505         maddn.d         f0, f4, f2
2506         maddn.d         f7, f3, f7
2507         mksadj.d        f2, f1
2508         nexp01.d        f1, f1
2509         maddn.d         f1, f0, f0
2510         neg.d           f3, f7
2511         addexpm.d       f0, f2
2512         addexp.d        f3, f2
2513         divn.d          f0, f1, f3
2514
2515         rfr             xl, f0
2516         rfrd            xh, f0
2517
2518         leaf_return
2519
2520 #endif /* L_sqrt */
2521 #endif /* XCHAL_HAVE_DFP_SQRT */
2522
2523 #if XCHAL_HAVE_DFP_RECIP
2524 #ifdef L_recipdf2
2525         /* Reciprocal */
2526
2527         .align  4
2528         .global __recipdf2
2529         .type   __recipdf2, @function
2530 __recipdf2:
2531         leaf_entry      sp, 16
2532
2533         wfrd            f1, xh, xl
2534
2535         recip0.d        f0, f1
2536         const.d         f2, 2
2537         msub.d          f2, f1, f0
2538         mul.d           f3, f1, f0
2539         const.d         f4, 2
2540         mul.d           f5, f0, f2
2541         msub.d          f4, f3, f2
2542         const.d         f2, 1
2543         mul.d           f0, f5, f4
2544         msub.d          f2, f1, f0
2545         maddn.d         f0, f0, f2
2546
2547         rfr             xl, f0
2548         rfrd            xh, f0
2549
2550         leaf_return
2551
2552 #endif /* L_recipdf2 */
2553 #endif /* XCHAL_HAVE_DFP_RECIP */
2554
2555 #if XCHAL_HAVE_DFP_RSQRT
2556 #ifdef L_rsqrtdf2
2557         /* Reciprocal square root */
2558
2559         .align  4
2560         .global __rsqrtdf2
2561         .type   __rsqrtdf2, @function
2562 __rsqrtdf2:
2563         leaf_entry      sp, 16
2564
2565         wfrd            f1, xh, xl
2566
2567         rsqrt0.d        f0, f1
2568         mul.d           f2, f1, f0
2569         const.d         f3, 3
2570         mul.d           f4, f3, f0
2571         const.d         f5, 1
2572         msub.d          f5, f2, f0
2573         maddn.d         f0, f4, f5
2574         const.d         f2, 1
2575         mul.d           f4, f1, f0
2576         mul.d           f5, f3, f0
2577         msub.d          f2, f4, f0
2578         maddn.d         f0, f5, f2
2579         const.d         f2, 1
2580         mul.d           f1, f1, f0
2581         mul.d           f3, f3, f0
2582         msub.d          f2, f1, f0
2583         maddn.d         f0, f3, f2
2584
2585         rfr             xl, f0
2586         rfrd            xh, f0
2587
2588         leaf_return
2589
2590 #endif /* L_rsqrtdf2 */
2591 #endif /* XCHAL_HAVE_DFP_RSQRT */