Fixed compatibility of output.
[AROS.git] / arch / m68k-all / libgcc1 / _mulsf3.s
blob1ff94d8e73f5063634e79e1024dc2dc0530c71d1
1 | single floating point multiplication routine
3 | written by Kai-Uwe Bloem (I5110401@dbstu1.bitnet).
4 | Based on a 80x86 floating point packet from comp.os.minix, written by P.Housel
7 | Revision 1.2, kub 01-90 :
8 | added support for denormalized numbers
10 | Revision 1.1, kub 12-89 :
11 | Created single float version for 68000. Code could be speed up by having
12 | the accumulator in the 68000 register set ...
14 | Revision 1.0:
15 | original 8088 code from P.S.Housel for double floats
17 BIAS4 = 0x7F-1
19 .text
20 .even
21 .globl __mulsf3
23 __mulsf3:
24 lea %sp@(4),%a0
25 moveml %d2-%d5,%sp@-
26 moveml %a0@,%d4/%d5 | %d4 = v, %d5 = u
27 subw #8,%sp | multiplication accumulator
29 movel %d5,%d0 | %d0 = u.exp
30 swap %d0
31 movew %d0,%d2 | %d2 = u.sign
32 lsrw #7,%d0
33 andw #0xff,%d0 | kill sign bit
35 movel %d4,%d1 | %d1 = v.exp
36 swap %d1
37 eorw %d1,%d2 | %d2 = u.sign ^ v.sign (in bit 31)
38 lsrw #7,%d1
39 andw #0xff,%d1 | kill sign bit
41 andl #0x7fffff,%d5 | remove exponent from u.mantissa
42 tstw %d0 | check for zero exponent - no leading "1"
43 beq L_00
44 orl #0x800000,%d5 | restore implied leading "1"
45 bra L_10
46 L_00: addw #1,%d0 | "normalize" exponent
47 L_10: tstl %d5
48 beq retz | multiplying zero
50 andl #0x7fffff,%d4 | remove exponent from v.mantissa
51 tstw %d1 | check for zero exponent - no leading "1"
52 beq L_01
53 orl #0x800000,%d4 | restore implied leading "1"
54 bra L_11
55 L_01: addw #1,%d1 | "normalize" exponent
56 L_11: tstl %d4
57 beq retz | multiply by zero
59 addw %d1,%d0 | add exponents,
60 subw #BIAS4+16-8,%d0 | remove excess bias, acnt for repositioning
62 clrl %sp@ | initialize 64-bit product to zero
63 clrl %sp@(4)
65 | see Knuth, Seminumerical Algorithms, section 4.3. algorithm M
67 movew %d4,%d3
68 mulu %d5,%d3 | mulitply with bigit from multiplier
69 movel %d3,%sp@(4) | store into result
71 movel %d4,%d3
72 swap %d3
73 mulu %d5,%d3
74 addl %d3,%sp@(2) | add to result
76 swap %d5 | [TOP 8 BITS SHOULD BE ZERO !]
78 movew %d4,%d3
79 mulu %d5,%d3 | mulitply with bigit from multiplier
80 addl %d3,%sp@(2) | store into result (no carry can occur here)
82 movel %d4,%d3
83 swap %d3
84 mulu %d5,%d3
85 addl %d3,%sp@ | add to result
86 | [TOP 16 BITS SHOULD BE ZERO !]
87 moveml %sp@(2),%d4-%d5 | get the 48 valid mantissa bits
88 clrw %d5 | (pad to 64)
89 L_2:
90 cmpl #0x0000ffff,%d4 | multiply (shift) until
91 bhi L_3 | 1 in upper 16 result bits
92 cmpw #9,%d0 | give up for denormalized numbers
93 ble L_3
94 swap %d4 | (we're getting here only when multiplying
95 swap %d5 | with a denormalized number; there's an
96 movew %d5,%d4 | eventual loss of 4 bits in the rounding
97 clrw %d5 | byte -- what a pity 8-)
98 subw #16,%d0 | decrement exponent
99 bra L_2
100 L_3:
101 movel %d5,%d1 | get rounding bits
102 roll #8,%d1
103 movel %d1,%d3 | see if sticky bit should be set
104 andl #0xffffff00,%d3
105 beq L_4
106 orb #1,%d1 | set "sticky bit" if any low-order set
107 L_4: addw #8,%sp | remove accumulator from stack
108 jmp norm_sf | (result in %d4)
110 retz: clrl %d0 | save zero as result
111 addw #8,%sp
112 moveml %sp@+,%d2-%d5
113 rts | no normalizing neccessary