1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
28 Boston, MA 02110-1301, USA. */
33 * The goal of this code is to be as fast as possible. This is
34 * not meant to be easy to understand for the casual reader.
36 * Only the default rounding mode is intended for best performances.
37 * Exceptions aren't supported yet, but that can be added quite easily
38 * if necessary without impacting performances.
44 ARM_FUNC_ALIAS aeabi_fneg negsf2
46 eor r0, r0, #0x80000000 @ flip sign bit
56 ARM_FUNC_START aeabi_frsub
58 eor r0, r0, #0x80000000 @ flip sign bit of first arg
62 ARM_FUNC_ALIAS aeabi_fsub subsf3
64 eor r1, r1, #0x80000000 @ flip sign bit of second arg
65 #if defined(__INTERWORKING_STUBS__)
66 b 1f @ Skip Thumb-code prologue
70 ARM_FUNC_ALIAS aeabi_fadd addsf3
72 1: @ Look for zeroes, equal values, INF, or NAN.
76 mvnnes ip, r2, asr #24
77 mvnnes ip, r3, asr #24
80 @ Compute exponent difference. Make largest exponent in r2,
81 @ corresponding arg in r0, and positive exponent difference in r3.
83 rsbs r3, r2, r3, lsr #24
90 @ If exponent difference is too large, return largest argument
91 @ already in r0. We need up to 25 bit to handle proper rounding
96 @ Convert mantissa to signed integer.
98 orr r0, r0, #0x00800000
99 bic r0, r0, #0xff000000
102 orr r1, r1, #0x00800000
103 bic r1, r1, #0xff000000
106 @ If exponent == difference, one or both args were denormalized.
107 @ Since this is not common case, rescale them off line.
112 @ Compensate for the exponent overlapping the mantissa MSB added later
115 @ Shift and add second arg to first arg in r0.
116 @ Keep leftover bits into r1.
117 adds r0, r0, r1, asr r3
121 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
122 and r3, r0, #0x80000000
127 @ Determine how to normalize the result.
134 @ Result needs to be shifted right.
139 @ Make sure we did not bust our exponent.
143 @ Our result is now properly aligned into r0, remaining bits in r1.
144 @ Pack final result together.
145 @ Round with MSB of r1. If halfway between two numbers, round towards
149 adc r0, r0, r2, lsl #23
154 @ Result must be shifted left and exponent adjusted.
162 @ No rounding necessary since r1 will always be 0 at this point.
168 moveq r0, r0, lsl #12
192 @ Final result with sign
193 @ If exponent negative, denormalize result.
194 addge r0, r0, r2, lsl #23
197 orrlt r0, r3, r0, lsr r2
200 @ Fixup and adjust bit position for denormalized arguments.
201 @ Note that r2 must not remain equal to 0.
204 eor r1, r1, #0x00800000
205 eoreq r0, r0, #0x00800000
214 mvnnes ip, r3, asr #24
220 @ Result is x + 0.0 = x or 0.0 + y = y.
227 @ Result is x - x = 0.
231 @ Result is x + x = 2x.
235 orrcs r0, r0, #0x80000000
237 2: adds r2, r2, #(2 << 24)
238 addcc r0, r0, #(1 << 23)
240 and r3, r0, #0x80000000
242 @ Overflow: return INF.
244 orr r0, r3, #0x7f000000
245 orr r0, r0, #0x00800000
248 @ At least one of r0/r1 is INF/NAN.
249 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
250 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
251 @ if r0 or r1 is NAN: return NAN
252 @ if opposite sign: return NAN
253 @ otherwise return r0 (which is INF or -INF)
257 mvneqs r3, r3, asr #24
260 moveqs r3, r1, lsl #9
262 orrne r0, r0, #0x00400000 @ quiet NAN
271 ARM_FUNC_START floatunsisf
272 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
277 ARM_FUNC_START floatsisf
278 ARM_FUNC_ALIAS aeabi_i2f floatsisf
280 ands r3, r0, #0x80000000
286 @ Add initial exponent to sign
287 orr r3, r3, #((127 + 23) << 23)
300 ARM_FUNC_START floatundisf
301 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
304 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
312 ARM_FUNC_START floatdisf
313 ARM_FUNC_ALIAS aeabi_l2f floatdisf
316 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
321 ands r3, ah, #0x80000000 @ sign bit in r3
326 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
327 @ For hard FPA code we want to return via the tail below so that
328 @ we can return the result in f0 as well as in r0 for backwards
339 @ Add initial exponent to sign
340 orr r3, r3, #((127 + 23 + 32) << 23)
341 subeq r3, r3, #(32 << 23)
342 2: sub r3, r3, #(1 << 23)
348 movhs ip, ip, lsr #16
358 sublo r2, r2, ip, lsr #1
359 subs r2, r2, ip, lsr #3
368 sub r3, r3, r2, lsl #23
371 add r3, r3, ah, lsl r2
375 adc r0, r3, al, lsr r2
382 orrs al, al, ip, lsl #1
383 adc r0, r3, ah, lsr r2
384 biceq r0, r0, ip, lsr #31
387 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
401 #endif /* L_addsubsf3 */
405 ARM_FUNC_START mulsf3
406 ARM_FUNC_ALIAS aeabi_fmul mulsf3
408 @ Mask out exponents, trap any zero/denormal/INF/NAN.
410 ands r2, ip, r0, lsr #23
411 andnes r3, ip, r1, lsr #23
417 @ Add exponents together
420 @ Determine final sign.
423 @ Convert mantissa to unsigned integer.
424 @ If power of two, branch to a separate path.
425 @ Make up for final alignment.
427 movnes r1, r1, lsl #9
430 orr r0, r3, r0, lsr #5
431 orr r1, r3, r1, lsr #5
435 @ Put sign bit in r3, which will be restored into r0 later.
436 and r3, ip, #0x80000000
438 @ Well, no way to make it shorter without the umull instruction.
439 stmfd sp!, {r3, r4, r5}
442 bic r0, r0, r4, lsl #16
443 bic r1, r1, r5, lsl #16
448 adds r3, r3, r0, lsl #16
449 adc r1, ip, r0, lsr #16
450 ldmfd sp!, {r0, r4, r5}
454 @ The actual multiplication.
457 @ Put final sign in r0.
458 and r0, ip, #0x80000000
462 @ Adjust result upon the MSB position.
465 orrcc r1, r1, r3, lsr #31
468 @ Add sign to result.
471 @ Apply exponent bias, check for under/overflow.
476 @ Round the result, merge final exponent.
478 adc r0, r0, r2, lsl #23
482 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
485 and ip, ip, #0x80000000
487 orr r0, ip, r0, lsr #9
488 orr r0, r0, r1, lsr #9
491 orrgt r0, r0, r2, lsl #23
494 @ Under/overflow: fix things up for the code below.
495 orr r0, r0, #0x00800000
503 @ Check if denormalized result is possible, otherwise return signed 0.
505 bicle r0, r0, #0x7fffffff
508 @ Shift value right, round, etc.
516 orrs r3, r3, ip, lsl #1
517 biceq r0, r0, ip, lsr #31
520 @ One or both arguments are denormalized.
521 @ Scale them leftwards and preserve sign bit.
524 and ip, r0, #0x80000000
525 1: moveq r0, r0, lsl #1
526 tsteq r0, #0x00800000
531 and ip, r1, #0x80000000
532 2: moveq r1, r1, lsl #1
533 tsteq r1, #0x00800000
540 @ Isolate the INF and NAN cases away
541 and r3, ip, r1, lsr #23
546 @ Here, one or more arguments are either denormalized or zero.
547 bics ip, r0, #0x80000000
548 bicnes ip, r1, #0x80000000
551 @ Result is 0, but determine sign anyway.
554 bic r0, r0, #0x7fffffff
557 1: @ One or both args are INF or NAN.
559 teqne r0, #0x80000000
562 teqne r1, #0x80000000
563 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
567 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
572 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
574 @ Result is INF, but we need to determine its sign.
578 @ Overflow: return INF (sign already in r0).
580 and r0, r0, #0x80000000
581 orr r0, r0, #0x7f000000
582 orr r0, r0, #0x00800000
585 @ Return a quiet NAN.
587 orr r0, r0, #0x7f000000
588 orr r0, r0, #0x00c00000
594 ARM_FUNC_START divsf3
595 ARM_FUNC_ALIAS aeabi_fdiv divsf3
597 @ Mask out exponents, trap any zero/denormal/INF/NAN.
599 ands r2, ip, r0, lsr #23
600 andnes r3, ip, r1, lsr #23
606 @ Substract divisor exponent from dividend''s
609 @ Preserve final sign into ip.
612 @ Convert mantissa to unsigned integer.
613 @ Dividend -> r3, divisor -> r1.
618 orr r1, r3, r1, lsr #4
619 orr r3, r3, r0, lsr #4
621 @ Initialize r0 (result) with final sign bit.
622 and r0, ip, #0x80000000
624 @ Ensure result will land to known bit position.
625 @ Apply exponent bias accordingly.
628 adc r2, r2, #(127 - 2)
630 @ The actual division loop.
636 subcs r3, r3, r1, lsr #1
637 orrcs r0, r0, ip, lsr #1
639 subcs r3, r3, r1, lsr #2
640 orrcs r0, r0, ip, lsr #2
642 subcs r3, r3, r1, lsr #3
643 orrcs r0, r0, ip, lsr #3
645 movnes ip, ip, lsr #4
648 @ Check exponent for under/overflow.
652 @ Round the result, merge final exponent.
654 adc r0, r0, r2, lsl #23
658 @ Division by 0x1p*: let''s shortcut a lot of code.
660 and ip, ip, #0x80000000
661 orr r0, ip, r0, lsr #9
664 orrgt r0, r0, r2, lsl #23
667 orr r0, r0, #0x00800000
672 @ One or both arguments are denormalized.
673 @ Scale them leftwards and preserve sign bit.
676 and ip, r0, #0x80000000
677 1: moveq r0, r0, lsl #1
678 tsteq r0, #0x00800000
683 and ip, r1, #0x80000000
684 2: moveq r1, r1, lsl #1
685 tsteq r1, #0x00800000
691 @ One or both arguments are either INF, NAN, zero or denormalized.
693 and r3, ip, r1, lsr #23
697 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
699 bne LSYM(Lml_i) @ INF / <anything> -> INF
701 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
705 beq LSYM(Lml_z) @ <anything> / INF -> 0
707 b LSYM(Lml_n) @ <anything> / NAN -> NAN
708 2: @ If both are nonzero, we need to normalize and resume above.
709 bics ip, r0, #0x80000000
710 bicnes ip, r1, #0x80000000
712 @ One or both arguments are zero.
713 bics r2, r0, #0x80000000
714 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
715 bics r3, r1, #0x80000000
716 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
717 b LSYM(Lml_n) @ 0 / 0 -> NAN
722 #endif /* L_muldivsf3 */
726 @ The return value in r0 is
728 @ 0 if the operands are equal
729 @ 1 if the first operand is greater than the second, or
730 @ the operands are unordered and the operation is
731 @ CMP, LT, LE, NE, or EQ.
732 @ -1 if the first operand is less than the second, or
733 @ the operands are unordered and the operation is GT
736 @ The Z flag will be set iff the operands are equal.
738 @ The following registers are clobbered by this function:
742 ARM_FUNC_ALIAS gesf2 gtsf2
747 ARM_FUNC_ALIAS lesf2 ltsf2
751 ARM_FUNC_START cmpsf2
752 ARM_FUNC_ALIAS nesf2 cmpsf2
753 ARM_FUNC_ALIAS eqsf2 cmpsf2
754 mov ip, #1 @ how should we specify unordered here?
758 @ Trap any INF/NAN first.
762 mvnnes ip, r3, asr #24
766 @ Note that 0.0 is equal to -0.0.
767 2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
768 teqne r0, r1 @ if not 0 compare sign
769 subpls r0, r2, r3 @ if same sign compare values, set r0
772 movhi r0, r1, asr #31
773 mvnlo r0, r1, asr #31
778 3: mvns ip, r2, asr #24
782 4: mvns ip, r3, asr #24
785 beq 2b @ r1 is not NAN
786 5: ldr r0, [sp, #-4] @ return unordered code.
797 ARM_FUNC_START aeabi_cfrcmple
804 ARM_FUNC_START aeabi_cfcmpeq
805 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
807 @ The status-returning routines are required to preserve all
808 @ registers except ip, lr, and cpsr.
809 6: stmfd sp!, {r0, r1, r2, r3, lr}
811 @ Set the Z flag correctly, and the C flag unconditionally.
813 @ Clear the C flag if the return value was -1, indicating
814 @ that the first operand was smaller than the second.
816 RETLDM "r0, r1, r2, r3"
818 FUNC_END aeabi_cfcmple
819 FUNC_END aeabi_cfcmpeq
820 FUNC_END aeabi_cfrcmple
822 ARM_FUNC_START aeabi_fcmpeq
825 ARM_CALL aeabi_cfcmple
826 moveq r0, #1 @ Equal to.
827 movne r0, #0 @ Less than, greater than, or unordered.
830 FUNC_END aeabi_fcmpeq
832 ARM_FUNC_START aeabi_fcmplt
835 ARM_CALL aeabi_cfcmple
836 movcc r0, #1 @ Less than.
837 movcs r0, #0 @ Equal to, greater than, or unordered.
840 FUNC_END aeabi_fcmplt
842 ARM_FUNC_START aeabi_fcmple
845 ARM_CALL aeabi_cfcmple
846 movls r0, #1 @ Less than or equal to.
847 movhi r0, #0 @ Greater than or unordered.
850 FUNC_END aeabi_fcmple
852 ARM_FUNC_START aeabi_fcmpge
855 ARM_CALL aeabi_cfrcmple
856 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
857 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
860 FUNC_END aeabi_fcmpge
862 ARM_FUNC_START aeabi_fcmpgt
865 ARM_CALL aeabi_cfrcmple
866 movcc r0, #1 @ Operand 2 is less than operand 1.
867 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
868 @ or they are unordered.
871 FUNC_END aeabi_fcmpgt
873 #endif /* L_cmpsf2 */
877 ARM_FUNC_START unordsf2
878 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
886 1: mvns ip, r3, asr #24
890 2: mov r0, #0 @ arguments are ordered.
892 3: mov r0, #1 @ arguments are unordered.
895 FUNC_END aeabi_fcmpun
898 #endif /* L_unordsf2 */
902 ARM_FUNC_START fixsfsi
903 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
905 @ check exponent range.
908 bcc 1f @ value is too small
910 subs r2, r3, r2, lsr #24
911 bls 2f @ value is too large
915 orr r3, r3, #0x80000000
916 tst r0, #0x80000000 @ the sign bit
924 2: cmp r2, #(127 + 31 - 0xff)
928 3: ands r0, r0, #0x80000000 @ the sign bit
929 moveq r0, #0x7fffffff @ the maximum signed positive si
932 4: mov r0, #0 @ What should we convert NAN to?
938 #endif /* L_fixsfsi */
942 ARM_FUNC_START fixunssfsi
943 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
945 @ check exponent range.
947 bcs 1f @ value is negative
949 bcc 1f @ value is too small
951 subs r2, r3, r2, lsr #24
952 bmi 2f @ value is too large
956 orr r3, r3, #0x80000000
963 2: cmp r2, #(127 + 31 - 0xff)
967 3: mov r0, #0xffffffff @ maximum unsigned si
970 4: mov r0, #0 @ What should we convert NAN to?
976 #endif /* L_fixunssfsi */