arch/m68k-mac/libgcc1/_muldf3.s

   1 | double floating point multiplication routine
   2 |
   3 | written by Kai-Uwe Bloem (I5110401@dbstu1.bitnet).
   4 | Based on a 80x86 floating point packet from comp.os.minix, written by P.Housel
   5 |
   6 |
   7 | Revision 1.2, kub 01-90 :
   8 | added support for denormalized numbers
   9 |
  10 | Revision 1.1, kub 12-89 :
  11 | Ported over to 68k assembler
  12 |
  13 | Revision 1.0:
  14 | original 8088 code from P.S.Housel
  15
  16 BIAS8   =       0x3FF-1
  17
  18         .text
  19         .even
  20         .globl  __muldf3
  21
  22 __muldf3:
  23         lea     %sp@(4),%a0
  24         moveml  %d2-%d7,%sp@-
  25         moveml  %a0@,%d4-%d5/%d6-%d7 | %d4-%d5 = v, %d6-%d7 = u
  26         subw    #16,%sp         | multiplication accumulator
  27
  28         movel   %d6,%d0         | %d0 = u.exp
  29         swap    %d0
  30         movew   %d0,%d2         | %d2 = u.sign
  31         lsrw    #4,%d0
  32         andw    #0x07ff,%d0     | kill sign bit
  33
  34         movel   %d4,%d1         | %d1 = v.exp
  35         swap    %d1
  36         eorw    %d1,%d2         | %d2 = u.sign ^ v.sign (in bit 31)
  37         lsrw    #4,%d1
  38         andw    #0x07ff,%d1     | kill sign bit
  39
  40         andl    #0x0fffff,%d6   | remove exponent from u.mantissa
  41         tstw    %d0             | check for zero exponent - no leading "1"
  42         beq     L_00
  43         orl     #0x100000,%d6   | restore implied leading "1"
  44         bra     L_10
  45 L_00:   addw    #1,%d0          | "normalize" exponent
  46 L_10:   movel   %d6,%d3
  47         orl     %d7,%d3
  48         beq     retz            | multiplying by zero
  49
  50         andl    #0x0fffff,%d4   | remove exponent from v.mantissa
  51         tstw    %d1             | check for zero exponent - no leading "1"
  52         beq     L_01
  53         orl     #0x100000,%d4   | restore implied leading "1"
  54         bra     L_11
  55 L_01:   addw    #1,%d1          | "normalize" exponent
  56 L_11:   movel   %d4,%d3
  57         orl     %d5,%d3
  58         beq     retz            | multiplying by zero
  59
  60         addw    %d1,%d0         | add exponents,
  61         subw    #BIAS8+16-11,%d0        | remove excess bias, acnt for repositioning
  62
  63         clrl    %sp@            | initialize 128-bit product to zero
  64         clrl    %sp@(4)
  65         clrl    %sp@(8)
  66         clrl    %sp@(12)
  67         lea     %sp@(8),%a1     | accumulator pointer
  68
  69 | see Knuth, Seminumerical Algorithms, section 4.3. algorithm M
  70
  71         swap    %d2
  72         movew   #4-1,%d2
  73 L_12:
  74         movew   %d5,%d3
  75         mulu    %d7,%d3         | mulitply with bigit from multiplier
  76         addl    %d3,%a1@(4)     | store into result
  77         movew   %d4,%d3
  78         mulu    %d7,%d3
  79         movel   %a1@,%d1                | add to result
  80         addxl   %d3,%d1
  81         movel   %d1,%a1@
  82         roxlw   %a1@-           | rotate carry in
  83
  84         movel   %d5,%d3
  85         swap    %d3
  86         mulu    %d7,%d3
  87         addl    %d3,%a1@(4)     | add to result
  88         movel   %d4,%d3
  89         swap    %d3
  90         mulu    %d7,%d3
  91         movel   %a1@,%d1                | add to result
  92         addxl   %d3,%d1
  93         movel   %d1,%a1@
  94
  95         movew   %d6,%d7
  96         swap    %d6
  97         swap    %d7
  98         dbra    %d2,L_12
  99
 100         swap    %d2             | [TOP 16 BITS SHOULD BE ZERO !]
 101
 102         moveml  %sp@(2),%d4-%d7 | get the 112 valid bits
 103         clrw    %d7             | (pad to 128)
 104 L_2:
 105         cmpl    #0x0000ffff,%d4 | multiply (shift) until
 106         bhi     L_3             |  1 in upper 16 result bits
 107         cmpw    #9,%d0          | give up for denormalized numbers
 108         ble     L_3
 109         swap    %d4             | (we're getting here only when multiplying
 110         swap    %d5             |  with a denormalized number; there's an
 111         swap    %d6             |  eventual loss of 4 bits in the rounding
 112         swap    %d7             |  byte -- what a pity 8-)
 113         movew   %d5,%d4
 114         movew   %d6,%d5
 115         movew   %d7,%d6
 116         clrw    %d7
 117         subw    #16,%d0         | decrement exponent
 118         bra     L_2
 119 L_3:
 120         movel   %d6,%d1         | get rounding bits
 121         roll    #8,%d1
 122         movel   %d1,%d3         | see if sticky bit should be set
 123         orl     %d7,%d3         | (lower 16 bits of %d7 are guaranteed to be 0)
 124         andl    #0xffffff00,%d3
 125         beq     L_4
 126         orb     #1,%d1          | set "sticky bit" if any low-order set
 127 L_4:    addw    #16,%sp         | remove accumulator from stack
 128         jmp     norm_df         | (result in %d4/%d5)
 129
 130 retz:   clrl    %d0             | save zero as result
 131         clrl    %d1
 132         addw    #16,%sp
 133         moveml  %sp@+,%d2-%d7
 134         rts                     | no normalizing neccessary