1 /* IEEE-754 double-precision functions for Xtensa
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
47 .type __negdf2, @function
62 /* Handle NaNs and Infinities. (This code is placed before the
63 start of the function just to keep it in range of the limited
64 branch displacements.) */
67 /* If y is neither Infinity nor NaN, return x. */
68 bnall yh, a6, .Ladd_return_nan_or_inf
69 /* If x is a NaN, return it. Otherwise, return y. */
72 bnez a7, .Ladd_return_nan
79 .Ladd_return_nan_or_inf:
82 bnez a7, .Ladd_return_nan
86 movi a4, 0x80000 /* make it a quiet NaN */
91 /* Operand signs differ. Do a subtraction. */
98 .type __adddf3, @function
103 /* Check if the two operands have the same sign. */
105 bltz a7, .Ladd_opposite_signs
108 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
109 ball xh, a6, .Ladd_xnan_or_inf
110 ball yh, a6, .Ladd_ynan_or_inf
112 /* Compare the exponents. The smaller operand will be shifted
113 right by the exponent difference and added to the larger
117 bltu a7, a8, .Ladd_shiftx
120 /* Check if the smaller (or equal) exponent is zero. */
121 bnone yh, a6, .Ladd_yexpzero
123 /* Replace yh sign/exponent with 0x001. */
129 /* Compute the exponent difference. Optimize for difference < 32. */
131 bgeui a10, 32, .Ladd_bigshifty
133 /* Shift yh/yl right by the exponent difference. Any bits that are
134 shifted out of yl are saved in a9 for rounding the result. */
142 /* Do the 64-bit addition. */
148 /* Check if the add overflowed into the exponent. */
149 extui a10, xh, 20, 12
150 beq a10, a7, .Ladd_round
155 /* y is a subnormal value. Replace its sign/exponent with zero,
156 i.e., no implicit "1.0", and increment the apparent exponent
157 because subnormals behave as if they had the minimum (nonzero)
158 exponent. Test for the case when both exponents are zero. */
161 bnone xh, a6, .Ladd_bothexpzero
166 /* Both exponents are zero. Handle this as a special case. There
167 is no need to shift or round, and the normal code for handling
168 a carry into the exponent field will not work because it
169 assumes there is an implicit "1.0" that needs to be added. */
177 /* Exponent difference > 64 -- just return the bigger value. */
180 /* Shift yh/yl right by the exponent difference. Any bits that are
181 shifted out are saved in a9 for rounding the result. */
183 sll a11, yl /* lost bits shifted out of yl */
188 or a9, a9, a10 /* any positive, nonzero value will work */
192 /* Same as "yexpzero" except skip handling the case when both
193 exponents are zero. */
200 /* Same thing as the "shifty" code, but with x and y swapped. Also,
201 because the exponent difference is always nonzero in this version,
202 the shift sequence can use SLL and skip loading a constant zero. */
203 bnone xh, a6, .Ladd_xexpzero
211 bgeui a10, 32, .Ladd_bigshiftx
224 /* Check if the add overflowed into the exponent. */
225 extui a10, xh, 20, 12
226 bne a10, a8, .Ladd_carry
229 /* Round up if the leftover fraction is >= 1/2. */
232 beqz xl, .Ladd_roundcarry
234 /* Check if the leftover fraction is exactly 1/2. */
236 beqz a9, .Ladd_exactlyhalf
240 /* Mostly the same thing as "bigshifty".... */
241 bgeui a10, 64, .Ladd_returny
258 /* The addition has overflowed into the exponent field, so the
259 value needs to be renormalized. The mantissa of the result
260 can be recovered by subtracting the original exponent and
261 adding 0x100000 (which is the explicit "1.0" for the
262 mantissa of the non-shifted operand -- the "1.0" for the
263 shifted operand was already added). The mantissa can then
264 be shifted right by one bit. The explicit "1.0" of the
265 shifted mantissa then needs to be replaced by the exponent,
266 incremented by one to account for the normalizing shift.
267 It is faster to combine these operations: do the shift first
268 and combine the additions and subtractions. If x is the
269 original exponent, the result is:
270 shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
272 shifted mantissa + ((x + 1) << 19)
273 Note that the exponent is incremented here by leaving the
274 explicit "1.0" of the mantissa in the exponent field. */
276 /* Shift xh/xl right by one bit. Save the lsb of xl. */
282 /* See explanation above. The original exponent is in a8. */
287 /* Return an Infinity if the exponent overflowed. */
288 ball xh, a6, .Ladd_infinity
290 /* Same thing as the "round" code except the msb of the leftover
291 fraction is bit 0 of a10, with the rest of the fraction in a9. */
294 beqz xl, .Ladd_roundcarry
295 beqz a9, .Ladd_exactlyhalf
299 /* Clear the mantissa. */
304 /* The sign bit may have been lost in a carry-out. Put it back. */
310 /* Round down to the nearest even value. */
316 /* xl is always zero when the rounding increment overflows, so
317 there's no need to round it to an even value. */
319 /* Overflow to the exponent is OK. */
326 /* Handle NaNs and Infinities. (This code is placed before the
327 start of the function just to keep it in range of the limited
328 branch displacements.) */
331 /* If y is neither Infinity nor NaN, return x. */
332 bnall yh, a6, .Lsub_return_nan_or_inf
335 /* Both x and y are either NaN or Inf, so the result is NaN. */
336 movi a4, 0x80000 /* make it a quiet NaN */
341 /* Negate y and return it. */
346 .Lsub_return_nan_or_inf:
349 bnez a7, .Lsub_return_nan
352 .Lsub_opposite_signs:
353 /* Operand signs differ. Do an addition. */
360 .type __subdf3, @function
365 /* Check if the two operands have the same sign. */
367 bltz a7, .Lsub_opposite_signs
370 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
371 ball xh, a6, .Lsub_xnan_or_inf
372 ball yh, a6, .Lsub_ynan_or_inf
374 /* Compare the operands. In contrast to addition, the entire
375 value matters here. */
378 bltu xh, yh, .Lsub_xsmaller
379 beq xh, yh, .Lsub_compare_low
382 /* Check if the smaller (or equal) exponent is zero. */
383 bnone yh, a6, .Lsub_yexpzero
385 /* Replace yh sign/exponent with 0x001. */
391 /* Compute the exponent difference. Optimize for difference < 32. */
393 bgeui a10, 32, .Lsub_bigshifty
395 /* Shift yh/yl right by the exponent difference. Any bits that are
396 shifted out of yl are saved in a9 for rounding the result. */
404 /* Do the 64-bit subtraction. */
410 /* Subtract the leftover bits in a9 from zero and propagate any
411 borrow from xh/xl. */
418 /* Check if the subtract underflowed into the exponent. */
419 extui a10, xh, 20, 11
420 beq a10, a7, .Lsub_round
424 /* The high words are equal. Compare the low words. */
425 bltu xl, yl, .Lsub_xsmaller
426 bltu yl, xl, .Lsub_ysmaller
427 /* The operands are equal. Return 0.0. */
433 /* y is a subnormal value. Replace its sign/exponent with zero,
434 i.e., no implicit "1.0". Unless x is also a subnormal, increment
435 y's apparent exponent because subnormals behave as if they had
436 the minimum (nonzero) exponent. */
439 bnone xh, a6, .Lsub_yexpdiff
444 /* Exponent difference > 64 -- just return the bigger value. */
447 /* Shift yh/yl right by the exponent difference. Any bits that are
448 shifted out are saved in a9 for rounding the result. */
450 sll a11, yl /* lost bits shifted out of yl */
455 or a9, a9, a10 /* any positive, nonzero value will work */
459 /* Same thing as the "ysmaller" code, but with x and y swapped and
461 bnone xh, a6, .Lsub_xexpzero
469 bgeui a10, 32, .Lsub_bigshiftx
487 /* Subtract the leftover bits in a9 from zero and propagate any
488 borrow from xh/xl. */
495 /* Check if the subtract underflowed into the exponent. */
496 extui a10, xh, 20, 11
497 bne a10, a8, .Lsub_borrow
500 /* Round up if the leftover fraction is >= 1/2. */
503 beqz xl, .Lsub_roundcarry
505 /* Check if the leftover fraction is exactly 1/2. */
507 beqz a9, .Lsub_exactlyhalf
511 /* Same as "yexpzero". */
514 bnone yh, a6, .Lsub_xexpdiff
519 /* Mostly the same thing as "bigshifty", but with the sign bit of the
520 shifted value set so that the subsequent subtraction flips the
522 bgeui a10, 64, .Lsub_returny
528 slli xh, a6, 11 /* set sign bit of xh */
534 /* Negate and return y. */
541 /* The subtraction has underflowed into the exponent field, so the
542 value needs to be renormalized. Shift the mantissa left as
543 needed to remove any leading zeros and adjust the exponent
544 accordingly. If the exponent is not large enough to remove
545 all the leading zeros, the result will be a subnormal value. */
548 beqz a8, .Lsub_xhzero
549 do_nsau a6, a8, a7, a11
551 bge a6, a10, .Lsub_subnormal
555 /* Shift the mantissa (a8/xl/a9) left by a6. */
561 /* Combine the shifted mantissa with the sign and exponent,
562 decrementing the exponent by a6. (The exponent has already
563 been decremented by one due to the borrow from the subtraction,
564 but adding the mantissa will increment the exponent by one.) */
572 /* Round down to the nearest even value. */
578 /* xl is always zero when the rounding increment overflows, so
579 there's no need to round it to an even value. */
581 /* Overflow to the exponent is OK. */
585 /* When normalizing the result, all the mantissa bits in the high
586 word are zero. Shift by "20 + (leading zero count of xl) + 1". */
587 do_nsau a6, xl, a7, a11
589 blt a10, a6, .Lsub_subnormal
591 .Lsub_normalize_shift:
592 bltui a6, 32, .Lsub_shift_lt32
606 /* The exponent is too small to shift away all the leading zeros.
607 Set a6 to the current exponent (which has already been
608 decremented by the borrow) so that the exponent of the result
609 will be zero. Do not add 1 to a6 in this case, because: (1)
610 adding the mantissa will not increment the exponent, so there is
611 no need to subtract anything extra from the exponent to
612 compensate, and (2) the effective exponent of a subnormal is 1
613 not 0 so the shift amount must be 1 smaller than normal. */
615 j .Lsub_normalize_shift
617 #endif /* L_addsubdf3 */
622 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
623 #define XCHAL_NO_MUL 1
629 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
630 (This code is placed before the start of the function just to
631 keep it in range of the limited branch displacements.) */
634 /* Clear the sign bit of x. */
638 /* If x is zero, return zero. */
640 beqz a10, .Lmul_return_zero
642 /* Normalize x. Adjust the exponent in a8. */
643 beqz xh, .Lmul_xh_zero
644 do_nsau a10, xh, a11, a12
653 do_nsau a10, xl, a11, a12
658 bltz a10, .Lmul_xl_srl
668 /* Clear the sign bit of y. */
672 /* If y is zero, return zero. */
674 beqz a10, .Lmul_return_zero
676 /* Normalize y. Adjust the exponent in a9. */
677 beqz yh, .Lmul_yh_zero
678 do_nsau a10, yh, a11, a12
687 do_nsau a10, yl, a11, a12
692 bltz a10, .Lmul_yl_srl
702 /* Return zero with the appropriate sign bit. */
709 /* If y is zero, return NaN. */
712 beqz a8, .Lmul_return_nan
714 /* If y is NaN, return y. */
715 bnall yh, a6, .Lmul_returnx
718 beqz a8, .Lmul_returnx
727 bnez a8, .Lmul_return_nan
728 /* Set the sign bit and return. */
736 /* If x is zero, return NaN. */
737 bnez xl, .Lmul_returny
739 bnez a8, .Lmul_returny
743 movi a4, 0x80000 /* make it a quiet NaN */
749 .type __muldf3, @function
751 #if __XTENSA_CALL0_ABI__
759 /* This is not really a leaf function; allocate enough stack space
760 to allow CALL12s to a helper function. */
767 /* Get the sign of the result. */
770 /* Check for NaN and infinity. */
771 ball xh, a6, .Lmul_xnan_or_inf
772 ball yh, a6, .Lmul_ynan_or_inf
774 /* Extract the exponents. */
778 beqz a8, .Lmul_xexpzero
780 beqz a9, .Lmul_yexpzero
783 /* Add the exponents. */
786 /* Replace sign/exponent fields with explicit "1.0". */
793 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
794 The least-significant word of the result is thrown away except
795 that if it is nonzero, the lsb of a6 is set to 1. */
796 #if XCHAL_HAVE_MUL32_HIGH
798 /* Compute a6 with any carry-outs in a10. */
811 /* If the low word of the result is nonzero, set the lsb of a6. */
817 /* Compute xl with any carry-outs in a9. */
838 #else /* ! XCHAL_HAVE_MUL32_HIGH */
840 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
841 products. These partial products are:
866 where the input chunks are (hh, hl, lh, ll). If using the Mul16
867 or Mul32 multiplier options, these input chunks must be stored in
868 separate registers. For Mac16, the UMUL.AA.* opcodes can specify
869 that the inputs come from either half of the registers, so there
870 is no need to shift them out ahead of time. If there is no
871 multiply hardware, the 16-bit chunks can be extracted when setting
872 up the arguments to the separate multiply function. */
874 /* Save a7 since it is needed to hold a temporary value. */
876 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
877 /* Calling a separate multiply function will clobber a0 and requires
878 use of a8 as a temporary, so save those values now. (The function
879 uses a custom ABI so nothing else needs to be saved.) */
884 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
891 /* Get the high halves of the inputs into registers. */
902 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
903 /* Clear the high halves of the inputs. This does not matter
904 for MUL16 because the high bits are ignored. */
910 #endif /* MUL16 || MUL32 */
915 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
916 mul16u dst, xreg ## xhalf, yreg ## yhalf
918 #elif XCHAL_HAVE_MUL32
920 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
921 mull dst, xreg ## xhalf, yreg ## yhalf
923 #elif XCHAL_HAVE_MAC16
925 /* The preprocessor insists on inserting a space when concatenating after
926 a period in the definition of do_mul below. These macros are a workaround
927 using underscores instead of periods when doing the concatenation. */
928 #define umul_aa_ll umul.aa.ll
929 #define umul_aa_lh umul.aa.lh
930 #define umul_aa_hl umul.aa.hl
931 #define umul_aa_hh umul.aa.hh
933 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
934 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
937 #else /* no multiply hardware */
939 #define set_arg_l(dst, src) \
940 extui dst, src, 0, 16
941 #define set_arg_h(dst, src) \
944 #if __XTENSA_CALL0_ABI__
945 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
946 set_arg_ ## xhalf (a13, xreg); \
947 set_arg_ ## yhalf (a14, yreg); \
948 call0 .Lmul_mulsi3; \
951 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
952 set_arg_ ## xhalf (a14, xreg); \
953 set_arg_ ## yhalf (a15, yreg); \
954 call12 .Lmul_mulsi3; \
956 #endif /* __XTENSA_CALL0_ABI__ */
958 #endif /* no multiply hardware */
960 /* Add pp1 and pp2 into a10 with carry-out in a9. */
961 do_mul(a10, xl, l, yl, h) /* pp 1 */
962 do_mul(a11, xl, h, yl, l) /* pp 2 */
968 /* Initialize a6 with a9/a10 shifted into position. Note that
969 this value can be safely incremented without any carry-outs. */
973 /* Compute the low word into a10. */
974 do_mul(a11, xl, l, yl, l) /* pp 0 */
980 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
981 This is good enough to determine the low half of a6, so that any
982 nonzero bits from the low word of the result can be collapsed
983 into a6, freeing up a register. */
985 do_mul(a11, xl, l, yh, l) /* pp 3 */
990 do_mul(a11, xl, h, yl, h) /* pp 4 */
995 do_mul(a11, xh, l, yl, l) /* pp 5 */
1000 /* Collapse any nonzero bits from the low word into a6. */
1005 /* Add pp6-9 into a11 with carry-outs in a10. */
1006 do_mul(a7, xl, l, yh, h) /* pp 6 */
1007 do_mul(a11, xh, h, yl, l) /* pp 9 */
1013 do_mul(a7, xl, h, yh, l) /* pp 7 */
1018 do_mul(a7, xh, l, yl, h) /* pp 8 */
1023 /* Shift a10/a11 into position, and add low half of a11 to a6. */
1031 /* Add pp10-12 into xl with carry-outs in a9. */
1033 do_mul(xl, xl, h, yh, h) /* pp 10 */
1038 do_mul(a10, xh, l, yh, l) /* pp 11 */
1043 do_mul(a10, xh, h, yl, h) /* pp 12 */
1048 /* Add pp13-14 into a11 with carry-outs in a10. */
1049 do_mul(a11, xh, l, yh, h) /* pp 13 */
1050 do_mul(a7, xh, h, yh, l) /* pp 14 */
1056 /* Shift a10/a11 into position, and add low half of a11 to a6. */
1065 do_mul(xh, xh, h, yh, h) /* pp 15 */
1068 /* Restore values saved on the stack during the multiplication. */
1070 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1074 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1076 /* Shift left by 12 bits, unless there was a carry-out from the
1077 multiply, in which case, shift by 11 bits and increment the
1078 exponent. Note: It is convenient to use the constant 0x3ff
1079 instead of 0x400 when removing the extra exponent bias (so that
1080 it is easy to construct 0x7fe for the overflow check). Reverse
1081 the logic here to decrement the exponent sum by one unless there
1084 srli a5, xh, 21 - 12
1093 /* Subtract the extra bias from the exponent sum (plus one to account
1094 for the explicit "1.0" of the mantissa that will be added to the
1095 exponent in the final result). */
1099 /* Check for over/underflow. The value in a8 is one less than the
1100 final exponent, so values in the range 0..7fd are OK here. */
1101 slli a4, a4, 1 /* 0x7fe */
1102 bgeu a8, a4, .Lmul_overflow
1106 bgez a6, .Lmul_rounded
1108 beqz xl, .Lmul_roundcarry
1110 beqz a6, .Lmul_exactlyhalf
1113 /* Add the exponent to the mantissa. */
1118 /* Add the sign bit. */
1124 #if __XTENSA_CALL0_ABI__
1134 /* Round down to the nearest even value. */
1140 /* xl is always zero when the rounding increment overflows, so
1141 there's no need to round it to an even value. */
1143 /* Overflow is OK -- it will be added to the exponent. */
1147 bltz a8, .Lmul_underflow
1148 /* Return +/- Infinity. */
1149 addi a8, a4, 1 /* 0x7ff */
1155 /* Create a subnormal value, where the exponent field contains zero,
1156 but the effective exponent is 1. The value of a8 is one less than
1157 the actual exponent, so just negate it to get the shift amount. */
1161 bgeui a8, 32, .Lmul_bigshift
1163 /* Shift xh/xl right. Any bits that are shifted out of xl are saved
1164 in a6 (combined with the shifted-out bits currently in a6) for
1165 rounding the result. */
1172 bgeui a8, 64, .Lmul_flush_to_zero
1173 sll a10, xl /* lost bits shifted out of xl */
1179 /* Set the exponent to zero. */
1182 /* Pack any nonzero bits shifted out into a6. */
1183 beqz a9, .Lmul_round
1188 .Lmul_flush_to_zero:
1189 /* Return zero with the appropriate sign bit. */
1197 /* For Xtensa processors with no multiply hardware, this simplified
1198 version of _mulsi3 is used for multiplying 16-bit chunks of
1199 the floating-point mantissas. When using CALL0, this function
1200 uses a custom ABI: the inputs are passed in a13 and a14, the
1201 result is returned in a12, and a8 and a15 are clobbered. */
1205 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1207 1: add \tmp1, \src2, \dst
1208 extui \tmp2, \src1, 0, 1
1209 movnez \dst, \tmp1, \tmp2
1211 do_addx2 \tmp1, \src2, \dst, \tmp1
1212 extui \tmp2, \src1, 1, 1
1213 movnez \dst, \tmp1, \tmp2
1215 do_addx4 \tmp1, \src2, \dst, \tmp1
1216 extui \tmp2, \src1, 2, 1
1217 movnez \dst, \tmp1, \tmp2
1219 do_addx8 \tmp1, \src2, \dst, \tmp1
1220 extui \tmp2, \src1, 3, 1
1221 movnez \dst, \tmp1, \tmp2
1223 srli \src1, \src1, 4
1224 slli \src2, \src2, 4
1227 #if __XTENSA_CALL0_ABI__
1228 mul_mulsi3_body a12, a13, a14, a15, a8
1230 /* The result will be written into a2, so save that argument in a4. */
1232 mul_mulsi3_body a2, a4, a3, a5, a6
1235 #endif /* XCHAL_NO_MUL */
1236 #endif /* L_muldf3 */
1242 #if XCHAL_HAVE_DFP_DIV
1247 .type __divdf3, @function
1294 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1295 (This code is placed before the start of the function just to
1296 keep it in range of the limited branch displacements.) */
1299 /* Clear the sign bit of y. */
1303 /* Check for division by zero. */
1305 beqz a10, .Ldiv_yzero
1307 /* Normalize y. Adjust the exponent in a9. */
1308 beqz yh, .Ldiv_yh_zero
1309 do_nsau a10, yh, a11, a9
1318 do_nsau a10, yl, a11, a9
1323 bltz a10, .Ldiv_yl_srl
1333 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
1341 movi a4, 0x80000 /* make it a quiet NaN */
1347 /* Clear the sign bit of x. */
1351 /* If x is zero, return zero. */
1353 beqz a10, .Ldiv_return_zero
1355 /* Normalize x. Adjust the exponent in a8. */
1356 beqz xh, .Ldiv_xh_zero
1357 do_nsau a10, xh, a11, a8
1366 do_nsau a10, xl, a11, a8
1371 bltz a10, .Ldiv_xl_srl
1381 /* Return zero with the appropriate sign bit. */
1388 /* Set the sign bit of the result. */
1392 /* If y is NaN or Inf, return NaN. */
1393 ball yh, a6, .Ldiv_return_nan
1396 bnez a8, .Ldiv_return_nan
1400 /* If y is Infinity, return zero. */
1403 beqz a8, .Ldiv_return_zero
1404 /* y is NaN; return it. */
1409 movi a4, 0x80000 /* make it a quiet NaN */
1419 .type __divdf3, @function
1424 /* Get the sign of the result. */
1427 /* Check for NaN and infinity. */
1428 ball xh, a6, .Ldiv_xnan_or_inf
1429 ball yh, a6, .Ldiv_ynan_or_inf
1431 /* Extract the exponents. */
1432 extui a8, xh, 20, 11
1433 extui a9, yh, 20, 11
1435 beqz a9, .Ldiv_yexpzero
1437 beqz a8, .Ldiv_xexpzero
1440 /* Subtract the exponents. */
1443 /* Replace sign/exponent fields with explicit "1.0". */
1450 /* Set SAR for left shift by one. */
1453 /* The first digit of the mantissa division must be a one.
1454 Shift x (and adjust the exponent) as needed to make this true. */
1456 beq yh, xh, .Ldiv_highequal1
1461 /* Do the first subtraction and shift. */
1469 /* Put the quotient into a10/a11. */
1473 /* Divide one bit at a time for 52 bits. */
1475 #if XCHAL_HAVE_LOOPS
1476 loop a9, .Ldiv_loopend
1479 /* Shift the quotient << 1. */
1483 /* Is this digit a 0 or 1? */
1485 beq xh, yh, .Ldiv_highequal2
1487 /* Output a 1 and subtract. */
1494 /* Shift the dividend << 1. */
1498 #if !XCHAL_HAVE_LOOPS
1504 /* Add the exponent bias (less one to account for the explicit "1.0"
1505 of the mantissa that will be added to the exponent in the final
1510 /* Check for over/underflow. The value in a8 is one less than the
1511 final exponent, so values in the range 0..7fd are OK here. */
1512 addmi a9, a9, 0x400 /* 0x7fe */
1513 bgeu a8, a9, .Ldiv_overflow
1516 /* Round. The remainder (<< 1) is in xh/xl. */
1517 bltu xh, yh, .Ldiv_rounded
1518 beq xh, yh, .Ldiv_highequal3
1521 beqz a11, .Ldiv_roundcarry
1525 /* Add the exponent to the mantissa. */
1530 /* Add the sign bit. */
1541 bltu xl, yl, .Ldiv_rounded
1542 bne xl, yl, .Ldiv_roundup
1544 /* Remainder is exactly half the divisor. Round even. */
1546 beqz a11, .Ldiv_roundcarry
1552 bltz a8, .Ldiv_underflow
1553 /* Return +/- Infinity. */
1554 addi a8, a9, 1 /* 0x7ff */
1560 /* Create a subnormal value, where the exponent field contains zero,
1561 but the effective exponent is 1. The value of a8 is one less than
1562 the actual exponent, so just negate it to get the shift amount. */
1565 bgeui a8, 32, .Ldiv_bigshift
1567 /* Shift a10/a11 right. Any bits that are shifted out of a11 are
1568 saved in a6 for rounding the result. */
1575 bgeui a8, 64, .Ldiv_flush_to_zero
1576 sll a9, a11 /* lost bits shifted out of a11 */
1582 /* Set the exponent to zero. */
1585 /* Pack any nonzero remainder (in xh/xl) into a6. */
1591 /* Round a10/a11 based on the bits shifted out into a6. */
1592 1: bgez a6, .Ldiv_rounded
1594 beqz a11, .Ldiv_roundcarry
1596 bnez a6, .Ldiv_rounded
1602 /* a11 is always zero when the rounding increment overflows, so
1603 there's no need to round it to an even value. */
1605 /* Overflow to the exponent field is OK. */
1608 .Ldiv_flush_to_zero:
1609 /* Return zero with the appropriate sign bit. */
1615 #endif /* XCHAL_HAVE_DFP_DIV */
1617 #endif /* L_divdf3 */
1621 /* Equal and Not Equal */
1626 .set __nedf2, __eqdf2
1627 .type __eqdf2, @function
1633 /* The values are equal but NaN != NaN. Check the exponent. */
1645 /* Check if the mantissas are nonzero. */
1650 /* Check if x and y are zero with different signs. */
1653 or a7, a7, xl /* xl == yl here */
1655 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1656 or x when exponent(x) = 0x7ff and x == y. */
1667 .type __gtdf2, @function
1672 1: bnall yh, a6, .Lle_cmp
1674 /* Check if y is a NaN. */
1681 /* Check if x is a NaN. */
1689 /* Less Than or Equal */
1693 .type __ledf2, @function
1698 1: bnall yh, a6, .Lle_cmp
1700 /* Check if y is a NaN. */
1707 /* Check if x is a NaN. */
1715 /* Check if x and y have different signs. */
1717 bltz a7, .Lle_diff_signs
1719 /* Check if x is negative. */
1722 /* Check if x <= y. */
1730 /* Check if y <= x. */
1740 /* Check if both x and y are zero. */
1751 /* Greater Than or Equal */
1755 .type __gedf2, @function
1760 1: bnall yh, a6, .Llt_cmp
1762 /* Check if y is a NaN. */
1769 /* Check if x is a NaN. */
1781 .type __ltdf2, @function
1786 1: bnall yh, a6, .Llt_cmp
1788 /* Check if y is a NaN. */
1795 /* Check if x is a NaN. */
1803 /* Check if x and y have different signs. */
1805 bltz a7, .Llt_diff_signs
1807 /* Check if x is negative. */
1810 /* Check if x < y. */
1818 /* Check if y < x. */
1828 /* Check if both x and y are nonzero. */
1843 .type __unorddf2, @function
1864 #endif /* L_cmpdf2 */
1870 .type __fixdfsi, @function
1874 /* Check for NaN and Infinity. */
1876 ball xh, a6, .Lfixdfsi_nan_or_inf
1878 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
1879 extui a4, xh, 20, 11
1880 extui a5, a6, 19, 10 /* 0x3fe */
1882 bgei a4, 32, .Lfixdfsi_maxint
1883 blti a4, 1, .Lfixdfsi_zero
1885 /* Add explicit "1.0" and shift << 11. */
1890 /* Shift back to the right, based on the exponent. */
1891 ssl a4 /* shift by 32 - a4 */
1894 /* Negate the result if sign != 0. */
1899 .Lfixdfsi_nan_or_inf:
1900 /* Handle Infinity and NaN. */
1903 beqz a4, .Lfixdfsi_maxint
1905 /* Translate NaN to +maxint. */
1909 slli a4, a6, 11 /* 0x80000000 */
1910 addi a5, a4, -1 /* 0x7fffffff */
1919 #endif /* L_fixdfsi */
1925 .type __fixdfdi, @function
1929 /* Check for NaN and Infinity. */
1931 ball xh, a6, .Lfixdfdi_nan_or_inf
1933 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
1934 extui a4, xh, 20, 11
1935 extui a5, a6, 19, 10 /* 0x3fe */
1937 bgei a4, 64, .Lfixdfdi_maxint
1938 blti a4, 1, .Lfixdfdi_zero
1940 /* Add explicit "1.0" and shift << 11. */
1946 /* Shift back to the right, based on the exponent. */
1947 ssl a4 /* shift by 64 - a4 */
1948 bgei a4, 32, .Lfixdfdi_smallshift
1953 /* Negate the result if sign != 0. */
1961 .Lfixdfdi_smallshift:
1966 .Lfixdfdi_nan_or_inf:
1967 /* Handle Infinity and NaN. */
1970 beqz a4, .Lfixdfdi_maxint
1972 /* Translate NaN to +maxint. */
1976 slli a7, a6, 11 /* 0x80000000 */
1982 1: addi xh, a7, -1 /* 0x7fffffff */
1991 #endif /* L_fixdfdi */
1996 .global __fixunsdfsi
1997 .type __fixunsdfsi, @function
2001 /* Check for NaN and Infinity. */
2003 ball xh, a6, .Lfixunsdfsi_nan_or_inf
2005 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
2006 extui a4, xh, 20, 11
2007 extui a5, a6, 20, 10 /* 0x3ff */
2009 bgei a4, 32, .Lfixunsdfsi_maxint
2010 bltz a4, .Lfixunsdfsi_zero
2012 /* Add explicit "1.0" and shift << 11. */
2017 /* Shift back to the right, based on the exponent. */
2019 beqi a4, 32, .Lfixunsdfsi_bigexp
2020 ssl a4 /* shift by 32 - a4 */
2023 /* Negate the result if sign != 0. */
2028 .Lfixunsdfsi_nan_or_inf:
2029 /* Handle Infinity and NaN. */
2032 beqz a4, .Lfixunsdfsi_maxint
2034 /* Translate NaN to 0xffffffff. */
2038 .Lfixunsdfsi_maxint:
2039 slli a4, a6, 11 /* 0x80000000 */
2040 movi a5, -1 /* 0xffffffff */
2049 .Lfixunsdfsi_bigexp:
2050 /* Handle unsigned maximum exponent case. */
2052 mov a2, a5 /* no shift needed */
2055 /* Return 0x80000000 if negative. */
2059 #endif /* L_fixunsdfsi */
2064 .global __fixunsdfdi
2065 .type __fixunsdfdi, @function
2069 /* Check for NaN and Infinity. */
2071 ball xh, a6, .Lfixunsdfdi_nan_or_inf
2073 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
2074 extui a4, xh, 20, 11
2075 extui a5, a6, 20, 10 /* 0x3ff */
2077 bgei a4, 64, .Lfixunsdfdi_maxint
2078 bltz a4, .Lfixunsdfdi_zero
2080 /* Add explicit "1.0" and shift << 11. */
2086 /* Shift back to the right, based on the exponent. */
2088 beqi a4, 64, .Lfixunsdfdi_bigexp
2089 ssl a4 /* shift by 64 - a4 */
2090 bgei a4, 32, .Lfixunsdfdi_smallshift
2094 .Lfixunsdfdi_shifted:
2095 /* Negate the result if sign != 0. */
2103 .Lfixunsdfdi_smallshift:
2106 j .Lfixunsdfdi_shifted
2108 .Lfixunsdfdi_nan_or_inf:
2109 /* Handle Infinity and NaN. */
2112 beqz a4, .Lfixunsdfdi_maxint
2114 /* Translate NaN to 0xffffffff.... */
2119 .Lfixunsdfdi_maxint:
2121 2: slli xh, a6, 11 /* 0x80000000 */
2130 .Lfixunsdfdi_bigexp:
2131 /* Handle unsigned maximum exponent case. */
2133 leaf_return /* no shift needed */
2135 #endif /* L_fixunsdfdi */
2140 .global __floatunsidf
2141 .type __floatunsidf, @function
2144 beqz a2, .Lfloatsidf_return_zero
2146 /* Set the sign to zero and jump to the floatsidf code. */
2148 j .Lfloatsidf_normalize
2152 .type __floatsidf, @function
2156 /* Check for zero. */
2157 beqz a2, .Lfloatsidf_return_zero
2159 /* Save the sign. */
2162 /* Get the absolute value. */
2170 .Lfloatsidf_normalize:
2171 /* Normalize with the first 1 bit in the msb. */
2172 do_nsau a4, a2, a5, a6
2176 /* Shift the mantissa into position. */
2178 slli xl, a5, (32 - 11)
2180 /* Set the exponent. */
2181 movi a5, 0x41d /* 0x3fe + 31 */
2186 /* Add the sign and return. */
2191 .Lfloatsidf_return_zero:
2195 #endif /* L_floatsidf */
2200 .global __floatundidf
2201 .type __floatundidf, @function
2205 /* Check for zero. */
2209 /* Set the sign to zero and jump to the floatdidf code. */
2211 j .Lfloatdidf_normalize
2215 .type __floatdidf, @function
2219 /* Check for zero. */
2223 /* Save the sign. */
2226 /* Get the absolute value. */
2227 bgez xh, .Lfloatdidf_normalize
2230 beqz xl, .Lfloatdidf_normalize
2233 .Lfloatdidf_normalize:
2234 /* Normalize with the first 1 bit in the msb of xh. */
2235 beqz xh, .Lfloatdidf_bigshift
2236 do_nsau a4, xh, a5, a6
2241 .Lfloatdidf_shifted:
2242 /* Shift the mantissa into position, with rounding bits in a6. */
2248 /* Set the exponent. */
2249 movi a5, 0x43d /* 0x3fe + 63 */
2258 /* Round up if the leftover fraction is >= 1/2. */
2261 beqz xl, .Lfloatdidf_roundcarry
2263 /* Check if the leftover fraction is exactly 1/2. */
2265 beqz a6, .Lfloatdidf_exactlyhalf
2268 .Lfloatdidf_bigshift:
2269 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
2270 do_nsau a4, xl, a5, a6
2275 j .Lfloatdidf_shifted
2277 .Lfloatdidf_exactlyhalf:
2278 /* Round down to the nearest even value. */
2283 .Lfloatdidf_roundcarry:
2284 /* xl is always zero when the rounding increment overflows, so
2285 there's no need to round it to an even value. */
2287 /* Overflow to the exponent is OK. */
2290 #endif /* L_floatdidf */
2295 .global __truncdfsf2
2296 .type __truncdfsf2, @function
2300 /* Adjust the exponent bias. */
2301 movi a4, (0x3ff - 0x7f) << 20
2304 /* Check for underflow. */
2306 bltz a6, .Ltrunc_underflow
2307 extui a6, a5, 20, 11
2308 beqz a6, .Ltrunc_underflow
2310 /* Check for overflow. */
2312 bge a6, a4, .Ltrunc_overflow
2314 /* Shift a5/xl << 3 into a5/a4. */
2320 /* Add the sign bit. */
2325 /* Round up if the leftover fraction is >= 1/2. */
2328 /* Overflow to the exponent is OK. The answer will be correct. */
2330 /* Check if the leftover fraction is exactly 1/2. */
2332 beqz a4, .Ltrunc_exactlyhalf
2335 .Ltrunc_exactlyhalf:
2336 /* Round down to the nearest even value. */
2342 /* Check if exponent == 0x7ff. */
2346 /* Check if mantissa is nonzero. */
2351 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
2354 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
2355 /* Add the sign bit. */
2362 /* Find shift count for a subnormal. Flush to zero if >= 32. */
2363 extui a6, xh, 20, 11
2364 movi a5, 0x3ff - 0x7f
2369 /* Replace the exponent with an explicit "1.0". */
2370 slli a5, a5, 13 /* 0x700000 */
2375 /* Shift the mantissa left by 3 bits (into a5/a4). */
2380 /* Shift right by a6. */
2385 beqz a7, .Ltrunc_addsign
2386 or a4, a4, a6 /* any positive, nonzero value will work */
2389 /* Return +/- zero. */
2390 1: extui a2, xh, 31, 1
2394 #endif /* L_truncdfsf2 */
2396 #ifdef L_extendsfdf2
2399 .global __extendsfdf2
2400 .type __extendsfdf2, @function
2404 /* Save the sign bit and then shift it off. */
2409 /* Extract and check the exponent. */
2411 beqz a6, .Lextend_expzero
2413 beqi a6, 256, .Lextend_nan_or_inf
2415 /* Shift >> 3 into a4/xl. */
2417 slli xl, a2, (32 - 3)
2419 /* Adjust the exponent bias. */
2420 movi a6, (0x3ff - 0x7f) << 20
2423 /* Add the sign bit. */
2427 .Lextend_nan_or_inf:
2430 /* Check for NaN. */
2434 slli a6, a6, 11 /* 0x80000 */
2437 /* Add the sign and return. */
2445 /* Normalize it to have 8 zero bits before the first 1 bit. */
2446 do_nsau a7, a4, a2, a3
2451 /* Shift >> 3 into a4/xl. */
2452 slli xl, a4, (32 - 3)
2455 /* Set the exponent. */
2456 movi a6, 0x3fe - 0x7f
2461 /* Add the sign and return. */
2465 #endif /* L_extendsfdf2 */
2468 #if XCHAL_HAVE_DFP_SQRT
2473 .global __ieee754_sqrt
2474 .type __ieee754_sqrt, @function
2521 #endif /* XCHAL_HAVE_DFP_SQRT */
2523 #if XCHAL_HAVE_DFP_RECIP
2529 .type __recipdf2, @function
2552 #endif /* L_recipdf2 */
2553 #endif /* XCHAL_HAVE_DFP_RECIP */
2555 #if XCHAL_HAVE_DFP_RSQRT
2557 /* Reciprocal square root */
2561 .type __rsqrtdf2, @function
2590 #endif /* L_rsqrtdf2 */
2591 #endif /* XCHAL_HAVE_DFP_RSQRT */