sdcc/device/lib/_fsdiv.c

   1 /*-------------------------------------------------------------------------
   2    _fsdiv.c - Floating point library in optimized assembly for 8051
   3
   4    Copyright (c) 2004, Paul Stoffregen, paul@pjrc.com
   5
   6    This library is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by the
   8    Free Software Foundation; either version 2, or (at your option) any
   9    later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this library; see the file COPYING. If not, write to the
  18    Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  19    MA 02110-1301, USA.
  20
  21    As a special exception, if you link this library with other files,
  22    some of which are compiled with SDCC, to produce an executable,
  23    this library does not by itself cause the resulting executable to
  24    be covered by the GNU General Public License. This exception does
  25    not however invalidate any other reasons why the executable file
  26    might be covered by the GNU General Public License.
  27 -------------------------------------------------------------------------*/
  28
  29
  30 #define __SDCC_FLOAT_LIB
  31 #include <float.h>
  32 #include <sdcc-lib.h>
  33
  34 #ifdef FLOAT_ASM_MCS51
  35
  36 // float __fsdiv (float a, float b) __reentrant
  37 static void dummy(void) __naked
  38 {
  39         __asm
  40         .globl  ___fsdiv
  41 ___fsdiv:
  42         // extract the two inputs, placing them into:
  43         //      sign     exponent   mantissa
  44         //      ----     --------   --------
  45         //  a:  sign_a   exp_a      r4/r3/r2
  46         //  b:  sign_b   exp_b      r7/r6/r5
  47
  48         lcall   fsgetargs
  49
  50         // compute final sign bit
  51         jnb     sign_b, 00001$
  52         cpl     sign_a
  53 00001$:
  54
  55         // if divisor is zero, ...
  56         cjne    r7, #0, 00003$
  57         // if dividend is also zero, return NaN
  58         cjne    r4, #0, 00002$
  59         ljmp    fs_return_nan
  60 00002$:
  61         // but dividend is non-zero, return infinity
  62         ljmp    fs_return_inf
  63 00003$:
  64         // if dividend is zero, return zero
  65         cjne    r4, #0, 00004$
  66         ljmp    fs_return_zero
  67 00004$:
  68         // if divisor is infinity, ...
  69         mov     a, exp_b
  70         cjne    a, #0xFF, 00006$
  71         // and dividend is also infinity, return NaN
  72         mov     a, exp_a
  73         cjne    a, #0xFF, 00005$
  74         ljmp    fs_return_nan
  75 00005$:
  76         // but dividend is not infinity, return zero
  77         ljmp    fs_return_zero
  78 00006$:
  79         // if dividend is infinity, return infinity
  80         mov     a, exp_a
  81         cjne    a, #0xFF, 00007$
  82         ljmp    fs_return_inf
  83 00007$:
  84
  85         // subtract exponents
  86         clr     c
  87         subb    a, exp_b
  88         // if no carry then no underflow
  89         jnc     00008$
  90         add     a, #127
  91         jc      00009$
  92         ljmp    fs_return_zero
  93
  94 00008$:
  95         add     a, #128
  96         dec     a
  97         jnc     00009$
  98         ljmp    fs_return_inf
  99
 100 00009$:
 101         mov     exp_a, a
 102
 103         // need extra bits on a's mantissa
 104 #ifdef FLOAT_FULL_ACCURACY
 105         clr     c
 106         mov     a, r5
 107         subb    a, r2
 108         mov     a, r6
 109         subb    a, r3
 110         mov     a, r7
 111         subb    a, r4
 112         jc      00010$
 113         dec     exp_a
 114         clr     c
 115         mov     a, r2
 116         rlc     a
 117         mov     r1, a
 118         mov     a, r3
 119         rlc     a
 120         mov     r2, a
 121         mov     a, r4
 122         rlc     a
 123         mov     r3, a
 124         clr     a
 125         rlc     a
 126         mov     r4, a
 127         sjmp    00011$
 128 00010$:
 129 #endif
 130         clr     a
 131         xch     a, r4
 132         xch     a, r3
 133         xch     a, r2
 134         mov     r1, a
 135 00011$:
 136
 137         // begin long division
 138         push    exp_a
 139 #ifdef FLOAT_FULL_ACCURACY
 140         mov     b, #25
 141 #else
 142         mov     b, #24
 143 #endif
 144 00012$:
 145         // compare
 146         clr     c
 147         mov     a, r1
 148         subb    a, r5
 149         mov     a, r2
 150         subb    a, r6
 151         mov     a, r3
 152         subb    a, r7
 153         mov     a, r4
 154         subb    a, #0           // carry==0 if mant1 >= mant2
 155
 156 #ifdef FLOAT_FULL_ACCURACY
 157         djnz    b, 00013$
 158         sjmp    00015$
 159 00013$:
 160 #endif
 161         jc      00014$
 162         // subtract
 163         mov     a, r1
 164         subb    a, r5
 165         mov     r1, a
 166         mov     a, r2
 167         subb    a, r6
 168         mov     r2, a
 169         mov     a, r3
 170         subb    a, r7
 171         mov     r3, a
 172         mov     a, r4
 173         subb    a, #0
 174         mov     r4, a
 175         clr     c
 176
 177 00014$:
 178         // shift result
 179         cpl     c
 180         mov     a, r0
 181         rlc     a
 182         mov     r0, a
 183         mov     a, dpl
 184         rlc     a
 185         mov     dpl, a
 186         mov     a, dph
 187         rlc     a
 188         mov     dph, a
 189
 190         // shift partial remainder
 191         clr     c
 192         mov     a, r1
 193         rlc     a
 194         mov     r1, a
 195         mov     a, r2
 196         rlc     a
 197         mov     r2, a
 198         mov     a, r3
 199         rlc     a
 200         mov     r3, a
 201         mov     a, r4
 202         rlc     a
 203         mov     r4, a
 204
 205 #ifdef FLOAT_FULL_ACCURACY
 206         sjmp    00012$
 207 00015$:
 208 #else
 209         djnz    b, 00012$
 210 #endif
 211
 212         // now we've got a division result, so all we need to do
 213         // is round off properly, normalize and output a float
 214
 215 #ifdef FLOAT_FULL_ACCURACY
 216         cpl     c
 217         clr     a
 218         mov     r1, a
 219         addc    a, r0
 220         mov     r2, a
 221         clr     a
 222         addc    a, dpl
 223         mov     r3, a
 224         clr     a
 225         addc    a, dph
 226         mov     r4, a
 227         pop     exp_a
 228         jnc     00016$
 229         inc     exp_a
 230         // incrementing exp_a without checking carry is dangerous
 231         mov     r4, #0x80
 232 00016$:
 233 #else
 234         mov     r1, #0
 235         mov     a, r0
 236         mov     r2, a
 237         mov     r3, dpl
 238         mov     r4, dph
 239         pop     exp_a
 240 #endif
 241
 242         lcall   fs_normalize_a
 243         ljmp    fs_zerocheck_return
 244         __endasm;
 245 }
 246
 247 #else
 248
 249 /*
 250 ** libgcc support for software floating point.
 251 ** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
 252 ** Permission is granted to do *anything* you want with this file,
 253 ** commercial or otherwise, provided this message remains intact.  So there!
 254 ** I would appreciate receiving any updates/patches/changes that anyone
 255 ** makes, and am willing to be the repository for said changes (am I
 256 ** making a big mistake?).
 257 **
 258 ** Pat Wood
 259 ** Pipeline Associates, Inc.
 260 ** pipeline!phw@motown.com or
 261 ** sun!pipeline!phw or
 262 ** uunet!motown!pipeline!phw
 263 */
 264
 265 /* (c)2000/2001: hacked a little by johan.knol@iduna.nl for sdcc */
 266
 267 union float_long
 268   {
 269     float f;
 270     long l;
 271   };
 272
 273 /* divide two floats */
 274 static float __fsdiv_org (float a1, float a2) __SDCC_FLOAT_NONBANKED
 275 {
 276   volatile union float_long fl1, fl2;
 277   long result;
 278   unsigned long mask;
 279   unsigned long mant1, mant2;
 280   int exp;
 281   char sign;
 282
 283   fl1.f = a1;
 284
 285   exp = EXP (fl1.l);
 286   /* numerator denormal??? */
 287   if (!exp)
 288     return (0);
 289
 290   fl2.f = a2;
 291   /* subtract exponents */
 292   exp -= EXP (fl2.l);
 293   exp += EXCESS;
 294
 295   /* compute sign */
 296   sign = SIGN (fl1.l) ^ SIGN (fl2.l);
 297
 298   /* now get mantissas */
 299   mant1 = MANT (fl1.l);
 300   mant2 = MANT (fl2.l);
 301
 302   /* this assures we have 24 bits of precision in the end */
 303   if (mant1 < mant2)
 304     {
 305       mask = 0x1000000;
 306     }
 307   else
 308     {
 309       mask = 0x0800000;
 310       exp++;
 311     }
 312
 313   if (exp < 1) /* denormal */
 314     return (0);
 315
 316   if (exp >= 255)
 317     {
 318       fl1.l = sign ? SIGNBIT | __INFINITY : __INFINITY;
 319     }
 320   else
 321     {
 322       /* now we perform repeated subtraction of fl2.l from fl1.l */
 323       result = 0;
 324       do
 325         {
 326           long diff = mant1 - mant2;
 327           if (diff >= 0)
 328             {
 329               mant1 = diff;
 330               result |= mask;
 331             }
 332           mant1 <<= 1;
 333           mask >>= 1;
 334         }
 335       while (mask);
 336
 337       /* round */
 338       if (mant1 >= mant2)
 339         result += 1;
 340
 341       result &= ~HIDDEN;
 342
 343       /* pack up and go home */
 344       fl1.l = PACK (sign ? SIGNBIT : 0 , exp, result);
 345     }
 346   return (fl1.f);
 347 }
 348
 349 float __fsdiv (float a1, float a2) __SDCC_FLOAT_NONBANKED
 350 {
 351   unsigned long _AUTOMEM *p2 = (unsigned long *) &a2;
 352
 353   if (EXP (*p2) == 0) // a2 is denormal or zero, treat as zero
 354     {
 355       float f;
 356       unsigned long _AUTOMEM *p = (unsigned long *) &f;
 357       if (a1 > 0.0f)
 358         *p = __INFINITY;           // +inf
 359       else if (a1 < 0.0f)
 360         *p = SIGNBIT | __INFINITY; // -inf
 361       else // a1 is denormal, zero or nan
 362         *p = __NAN;                // nan
 363       return f;
 364     }
 365   return __fsdiv_org (a1, a2);
 366 }
 367
 368 #endif