sdcc/device/lib/_fsmul.c

   1 /*-------------------------------------------------------------------------
   2    _fsmul.c - Floating point library in optimized assembly for 8051
   3
   4    Copyright (c) 2004, Paul Stoffregen, paul@pjrc.com
   5
   6    This library is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by the
   8    Free Software Foundation; either version 2, or (at your option) any
   9    later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this library; see the file COPYING. If not, write to the
  18    Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  19    MA 02110-1301, USA.
  20
  21    As a special exception, if you link this library with other files,
  22    some of which are compiled with SDCC, to produce an executable,
  23    this library does not by itself cause the resulting executable to
  24    be covered by the GNU General Public License. This exception does
  25    not however invalidate any other reasons why the executable file
  26    might be covered by the GNU General Public License.
  27 -------------------------------------------------------------------------*/
  28
  29
  30 #define __SDCC_FLOAT_LIB
  31 #include <float.h>
  32
  33
  34 #ifdef FLOAT_ASM_MCS51
  35
  36 // float __fsmul (float a, float b) __reentrant
  37 static void dummy(void) __naked
  38 {
  39         __asm
  40         .globl  ___fsmul
  41 ___fsmul:
  42         // extract the two inputs, placing them into:
  43         //      sign     exponent   mantissa
  44         //      ----     --------   --------
  45         //  a:  sign_a   exp_a      r4/r3/r2
  46         //  b:  sign_b   exp_b      r7/r6/r5
  47
  48         lcall   fsgetargs
  49
  50         // first check if either input is zero
  51         cjne    r4, #0, 00002$
  52 00001$:
  53         ljmp    fs_return_zero
  54
  55 00002$:
  56         mov     a, r7
  57         jz      00001$
  58
  59         // compute final sign bit
  60         jnb     sign_b, 00003$
  61         cpl     sign_a
  62 00003$:
  63
  64         // check if either input is infinity
  65         mov     a, exp_b
  66         cjne    a, #0xFF, 00004$
  67         ljmp    fs_return_inf
  68 00004$:
  69         mov     a, exp_a
  70         cjne    a, #0xFF, 00005$
  71         ljmp    fs_return_inf
  72 00005$:
  73
  74         // add the exponents
  75         add     a, exp_b
  76         // if carry then no underflow
  77         jc      00006$
  78         add     a, #130
  79         jc      00007$
  80         ljmp    fs_return_zero
  81
  82 00006$:
  83         add     a, #131
  84         dec     a
  85         jnc     00007$
  86         ljmp    fs_return_inf
  87
  88 00007$:
  89         mov     exp_a, a
  90
  91         // now we need to multiply r4/r3/r2 * r7/r6/r5
  92         // ------------------------------------------
  93         //                              r2 * r5         << 0
  94         //                  r3 * r5  +  r2 * r6         << 8
  95         //      r4 * r5  +  r3 * r6  +  r2 * r7         << 16
  96         //      r4 * r6  +  r3 * r7                     << 24
  97         //      r4 * r7                                 << 32
  98         //
  99         // This adds quite a bit of code, but it is a LOT faster
 100         // than three calls to __mululong...
 101
 102         // output goes into r4/r3/r2/r1/r0/xx
 103
 104         mov     a, r2
 105         mov     b, r5
 106         mul     ab                      // r2 * r5
 107         // discard lowest 8 bits
 108         mov     r0, b
 109         // range 0-FE
 110
 111         mov     a, r2
 112         mov     b, r6
 113         mul     ab                      // r2 * r6
 114         add     a, r0
 115         mov     r0, a
 116         clr     a
 117         addc    a, b
 118         mov     r1, a
 119         // range 0-FEFF
 120
 121         mov     a, r3
 122         mov     b, r5
 123         mul     ab                      // r3 * r5
 124         add     a, r0
 125         // discard lowest 8 bits
 126         mov     a, r1
 127         addc    a, b
 128         mov     r1, a
 129         clr     a
 130         rlc     a
 131         xch     a, r2
 132         // range 0-1FD
 133
 134         mov     b, r7
 135         mul     ab                      // r2 * r7
 136         add     a, r1
 137         mov     r1, a
 138         mov     a, r2
 139         addc    a, b
 140         mov     r2, a
 141         // range 0-FFFE
 142
 143         mov     a, r3
 144         mov     r0, a
 145         mov     b, r6
 146         mul     ab                      // r3 * r6
 147         add     a, r1
 148         mov     r1, a
 149         mov     a, r2
 150         addc    a, b
 151         mov     r2, a
 152         clr     a
 153         rlc     a
 154         mov     r3, a
 155         // range 0-1FDFF
 156
 157         mov     a, r4
 158         mov     b, r5
 159         mul     ab                      // r4 * r5
 160         add     a, r1
 161         mov     r1, a
 162         mov     a, r2
 163         addc    a, b
 164         mov     r2, a
 165         clr     a
 166         addc    a, r3
 167         mov     r3, a
 168         // range 0-2FC00
 169
 170         mov     a, r0 // r3
 171         mov     b, r7
 172         mul     ab                      // r3 * r7
 173         add     a, r2
 174         mov     r2, a
 175         mov     a, r3
 176         addc    a, b
 177         mov     r3, a
 178         clr     a
 179         rlc     a
 180         xch     a, r4
 181         // range 0-100FD00
 182
 183         mov     r5, a
 184         mov     b, r6
 185         mul     ab                      // r4 * r6
 186         add     a, r2
 187         mov     r2, a
 188         mov     a, r3
 189         addc    a, b
 190         mov     r3, a
 191         clr     a
 192         addc    a, r4
 193         mov     r4, a
 194         // range 0-1FEFE00
 195
 196         mov     a, r5 // r4
 197         mov     b, r7
 198         mul     ab                      // r4 * r7
 199         add     a, r3
 200         mov     r3, a
 201         mov     a, r4
 202         addc    a, b
 203         mov     r4, a
 204         // range 40000000-FFFFFE00
 205
 206         jb      acc.7, 00010$
 207         lcall   fs_normalize_a
 208
 209 00010$:
 210         ljmp    fs_round_and_return
 211         __endasm;
 212 }
 213
 214 #else
 215
 216 /*
 217 ** libgcc support for software floating point.
 218 ** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
 219 ** Permission is granted to do *anything* you want with this file,
 220 ** commercial or otherwise, provided this message remains intact.  So there!
 221 ** I would appreciate receiving any updates/patches/changes that anyone
 222 ** makes, and am willing to be the repository for said changes (am I
 223 ** making a big mistake?).
 224 **
 225 ** Pat Wood
 226 ** Pipeline Associates, Inc.
 227 ** pipeline!phw@motown.com or
 228 ** sun!pipeline!phw or
 229 ** uunet!motown!pipeline!phw
 230 */
 231
 232 /* (c)2000/2001: hacked a little by johan.knol@iduna.nl for sdcc */
 233
 234 union float_long
 235   {
 236     float f;
 237     unsigned long l;
 238   };
 239
 240 /* multiply two floats */
 241 float __fsmul (float a1, float a2) __SDCC_FLOAT_NONBANKED
 242 {
 243   volatile union float_long fl1, fl2;
 244   unsigned long result;
 245   int exp;
 246   char sign;
 247
 248   fl1.f = a1;
 249   fl2.f = a2;
 250
 251   if (!fl1.l || !fl2.l)
 252     return (0);
 253
 254   /* compute sign and exponent */
 255   sign = SIGN (fl1.l) ^ SIGN (fl2.l);
 256   exp = EXP (fl1.l) - EXCESS;
 257   exp += EXP (fl2.l);
 258
 259   fl1.l = MANT (fl1.l);
 260   fl2.l = MANT (fl2.l);
 261
 262   /* the multiply is done as one 16x16 multiply and two 16x8 multiplies */
 263   result = (unsigned long)((unsigned short)(fl1.l >> 8)) * (unsigned short)(fl2.l >> 8);
 264   result += ((unsigned long)((unsigned short)(fl1.l & 0xff)) * (unsigned short)(fl2.l >> 8)) >> 8;
 265   result += ((unsigned long)((unsigned short)(fl2.l & 0xff)) * (unsigned short)(fl1.l >> 8)) >> 8;
 266
 267   /* round, phase 1 */
 268   result += 0x40;
 269
 270   if (result & SIGNBIT)
 271     {
 272       /* round, phase 2 */
 273       result += 0x40;
 274       result >>= 8;
 275     }
 276   else
 277     {
 278       result >>= 7;
 279       exp--;
 280     }
 281
 282   result &= ~HIDDEN;
 283
 284   /* pack up and go home */
 285   if (exp >= 0x100)
 286     fl1.l = (sign ? SIGNBIT : 0) | __INFINITY;
 287   else if (exp < 0)
 288     fl1.l = 0;
 289   else
 290     fl1.l = PACK (sign ? SIGNBIT : 0 , exp, result);
 291   return (fl1.f);
 292 }
 293
 294 #endif