sdcc/device/lib/expf.c

   1 /*-------------------------------------------------------------------------
   2    expf.c - Computes e**x of a 32-bit float as outlined in [1]
   3
   4    Copyright (C) 2001, 2002, Jesus Calvino-Fraga, jesusc@ieee.org
   5
   6    This library is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by the
   8    Free Software Foundation; either version 2, or (at your option) any
   9    later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this library; see the file COPYING. If not, write to the
  18    Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  19    MA 02110-1301, USA.
  20
  21    As a special exception, if you link this library with other files,
  22    some of which are compiled with SDCC, to produce an executable,
  23    this library does not by itself cause the resulting executable to
  24    be covered by the GNU General Public License. This exception does
  25    not however invalidate any other reasons why the executable file
  26    might be covered by the GNU General Public License.
  27 -------------------------------------------------------------------------*/
  28
  29 /* [1] William James Cody and W.  M.  Waite.  _Software manual for the
  30    elementary functions_, Englewood Cliffs, N.J.:Prentice-Hall, 1980. */
  31
  32 /* Version 1.0 - Initial release */
  33
  34 #define __SDCC_MATH_LIB
  35 #include <math.h>
  36 #include <errno.h>
  37 #include <stdbool.h>
  38
  39 #ifdef MATH_ASM_MCS51
  40
  41 #define __SDCC_FLOAT_LIB
  42 #include <float.h>
  43
  44 // TODO: share with other temps
  45 static __bit sign_bit;
  46 static __data unsigned char expf_y[4];
  47 static __data unsigned char n;
  48
  49 float expf(float x)
  50 {
  51         x;
  52         __asm
  53         mov     c, acc.7
  54         mov     _sign_bit, c    // remember sign
  55         clr     acc.7           // and make input positive
  56         mov     r0, a
  57         mov     c, b.7
  58         rlc     a               // extract exponent
  59         add     a, #153
  60         jc      expf_not_zero
  61         // input is a very small number, so e^x is 1.000000
  62         mov     dptr, #0
  63         mov     b, #0x80
  64         mov     a, #0x3F
  65         ljmp    expf_exit
  66 expf_not_zero:
  67         // TODO: check exponent for very small values, and return zero
  68         mov     _n, #0
  69         mov     a, dpl
  70         add     a, #0xE8        // is x >= 0.69314718
  71         mov     a, dph
  72         addc    a, #0x8D
  73         mov     a, b
  74         addc    a, #0xCE
  75         mov     a, r0
  76         addc    a, #0xC0
  77         mov     a, r0
  78         jnc     expf_no_range_reduction
  79 expf_range_reduction:
  80         mov     (_expf_y + 0), dpl      // keep copy of x in "exp_y"
  81         mov     (_expf_y + 1), dph
  82         mov     (_expf_y + 2), b
  83         mov     (_expf_y + 3), a
  84         mov     r0, #0x3B
  85         push    ar0
  86         mov     r0, #0xAA
  87         push    ar0
  88         mov     r0, #0xB8
  89         push    ar0
  90         mov     r0, #0x3F
  91         push    ar0
  92         lcall   ___fsmul        // x * 1.442695041 = x / ln(2)
  93         dec     sp
  94         dec     sp
  95         dec     sp
  96         dec     sp
  97         lcall   ___fs2uchar     // n = int(x * 1.442695041)
  98         mov     a, dpl
  99         mov     _n, a
 100         add     a, #128
 101         jnc     expf_range_ok
 102         lcall   fs_return_inf   // exponent overflow
 103         ljmp    expf_exit
 104 expf_range_ok:
 105         mov     r0,#0x00
 106         mov     r1,#0x80
 107         mov     r2,#0x31
 108         mov     r3,#0xBF
 109         lcall   expf_scale_and_add
 110         mov     (_expf_y + 0), dpl
 111         mov     (_expf_y + 1), dph
 112         mov     (_expf_y + 2), b
 113         mov     (_expf_y + 3), a
 114         mov     r0,#0x83
 115         mov     r1,#0x80
 116         mov     r2,#0x5E
 117         mov     r3,#0x39
 118         lcall   expf_scale_and_add
 119 expf_no_range_reduction:
 120
 121
 122 // Compute e^x using the cordic algorithm.  This works over an
 123 // input range of 0 to 0.69314712.  Can be extended to work from
 124 // 0 to 1.0 if the results are normalized, but over the range
 125 // we use, the result is always from 1.0 to 1.99999988 (fixed
 126 // exponent of 127)
 127
 128 expf_cordic_begin:
 129         mov     c, b.7
 130         rlc     a               // extract exponent to acc
 131         setb    b.7
 132         mov     r1, dpl         // mantissa to r4/r3/r2/r1
 133         mov     r2, dph
 134         mov     r3, b
 135         mov     r4, #0
 136
 137         // first, align the input into a 32 bit long
 138         cjne    a, #121, exp_lshift
 139         sjmp    exp_noshift
 140 exp_lshift:
 141         jc      exp_rshift
 142         // exp_a is greater than 121, so left shift
 143         add     a, #135
 144         lcall   fs_lshift_a
 145         sjmp    exp_noshift
 146 exp_rshift:
 147         // exp_a is less than 121, so right shift
 148         cpl     a
 149         add     a, #122
 150         lcall   fs_rshift_a
 151 exp_noshift:                            // r4/r3/r2/r1 = x
 152         clr     a
 153         mov     (_expf_y + 0), a        // y = 1.0;
 154         mov     (_expf_y + 1), a
 155         mov     (_expf_y + 2), a
 156         mov     (_expf_y + 3), #0x20
 157         mov     dptr, #__fs_natural_log_table
 158         mov     r0, a                   // r0 = i
 159 exp_cordic_loop:
 160         clr     a
 161         movc    a, @a+dptr
 162         mov     b, a
 163         inc     dptr
 164         clr     a
 165         movc    a, @a+dptr              // r7/r6/r5/b = table[i]
 166         mov     r5, a
 167         inc     dptr
 168         clr     a
 169         movc    a, @a+dptr
 170         mov     r6, a
 171         inc     dptr
 172         clr     a
 173         movc    a, @a+dptr
 174         mov     r7, a
 175         inc     dptr
 176         clr     c
 177         mov     a, r1
 178         subb    a, b                    // compare x to table[i]
 179         mov     a, r2
 180         subb    a, r5
 181         mov     a, r3
 182         subb    a, r6
 183         mov     a, r4
 184         subb    a, r7
 185         jc      exp_cordic_skip         // if table[i] < x
 186         clr     c
 187         mov     a, r1
 188         subb    a, b
 189         mov     r1, a                   // x -= table[i]
 190         mov     a, r2
 191         subb    a, r5
 192         mov     r2, a
 193         mov     a, r3
 194         subb    a, r6
 195         mov     r3, a
 196         mov     a, r4
 197         subb    a, r7
 198         mov     r4, a
 199         mov     b,  (_expf_y + 0)
 200         mov     r5, (_expf_y + 1)       // r7/r6/r5/b = y >> i
 201         mov     r6, (_expf_y + 2)
 202         mov     r7, (_expf_y + 3)
 203         mov     a, r0
 204         lcall   __fs_cordic_rshift_r765_unsigned
 205         mov     a, (_expf_y + 0)
 206         add     a, b
 207         mov     (_expf_y + 0), a
 208         mov     a, (_expf_y + 1)
 209         addc    a, r5
 210         mov     (_expf_y + 1), a        // y += (y >> i)
 211         mov     a, (_expf_y + 2)
 212         addc    a, r6
 213         mov     (_expf_y + 2), a
 214         mov     a, (_expf_y + 3)
 215         addc    a, r7
 216         mov     (_expf_y + 3), a
 217 exp_cordic_skip:
 218         inc     r0
 219         cjne    r0, #27, exp_cordic_loop
 220         mov     r4, (_expf_y + 3)
 221         mov     r3, (_expf_y + 2)
 222         mov     r2, (_expf_y + 1)
 223         mov     r1, (_expf_y + 0)
 224         mov     exp_a, #121
 225         lcall   fs_normalize_a          // end of cordic
 226
 227         mov     a, #127
 228         add     a, _n                   // ldexpf(x, n)
 229         mov     exp_a, a
 230         lcall   fs_round_and_return
 231
 232         jnb     _sign_bit, expf_done
 233         push    dpl
 234         push    dph
 235         push    b
 236         push    acc
 237         mov     dptr, #0
 238         mov     b, #0x80
 239         mov     a, #0x3F
 240         lcall   ___fsdiv                // 1.0 / x
 241         dec     sp
 242         dec     sp
 243         dec     sp
 244         dec     sp
 245 expf_done:
 246         clr     acc.7           // Result is always positive!
 247 expf_exit:
 248         __endasm;
 249 #pragma less_pedantic
 250 }
 251
 252 static void dummy1(void) __naked
 253 {
 254         __asm
 255         .globl  fs_lshift_a
 256 expf_scale_and_add:
 257         push    ar0
 258         push    ar1
 259         push    ar2
 260         push    ar3
 261         mov     dpl, _n
 262         lcall   ___uchar2fs     // turn n into float
 263         lcall   ___fsmul        // n * scale_factor
 264         dec     sp
 265         dec     sp
 266         dec     sp
 267         dec     sp
 268         push    dpl
 269         push    dph
 270         push    b
 271         push    acc
 272         mov     dpl, (_expf_y + 0)
 273         mov     dph, (_expf_y + 1)
 274         mov     b,   (_expf_y + 2)
 275         mov     a,   (_expf_y + 3)
 276         lcall   ___fsadd        // x += (n * scale_factor)
 277         dec     sp
 278         dec     sp
 279         dec     sp
 280         dec     sp
 281         ret
 282         __endasm;
 283 }
 284
 285 static void dummy(void) __naked
 286 {
 287         __asm
 288         .globl  fs_lshift_a
 289 fs_lshift_a:
 290         jz      fs_lshift_done
 291         push    ar0
 292         mov     r0, a
 293 fs_lshift_loop:
 294         clr     c
 295         mov     a, r1
 296         rlc     a
 297         mov     r1, a
 298         mov     a, r2
 299         rlc     a
 300         mov     r2, a
 301         mov     a, r3
 302         rlc     a
 303         mov     r3, a
 304         mov     a, r4
 305         rlc     a
 306         mov     r4, a
 307         djnz    r0, fs_lshift_loop
 308         pop     ar0
 309 fs_lshift_done:
 310         ret
 311         __endasm;
 312 }
 313
 314 #else // not MATH_ASM_MCS51
 315
 316 #define P0      0.2499999995E+0
 317 #define P1      0.4160288626E-2
 318 #define Q0      0.5000000000E+0
 319 #define Q1      0.4998717877E-1
 320
 321 #define P(z) ((P1*z)+P0)
 322 #define Q(z) ((Q1*z)+Q0)
 323
 324 #define C1       0.693359375
 325 #define C2      -2.1219444005469058277e-4
 326
 327 #define BIGX    88.72283911  /* ln(HUGE_VALF) */
 328 #define EXPEPS  1.0E-7       /* exp(1.0E-7)=0.0000001 */
 329 #define K1      1.4426950409 /* 1/ln(2) */
 330
 331 float expf(float x) _FLOAT_FUNC_REENTRANT
 332 {
 333     int n;
 334     float xn, g, r, z, y;
 335     bool sign;
 336
 337     if(x>=0.0)
 338         { y=x;  sign=0; }
 339     else
 340         { y=-x; sign=1; }
 341
 342     if(y<EXPEPS) return 1.0;
 343
 344     if(y>BIGX)
 345     {
 346         if(sign)
 347         {
 348             errno=ERANGE;
 349             return HUGE_VALF
 350             ;
 351         }
 352         else
 353         {
 354             return 0.0;
 355         }
 356     }
 357
 358     z=y*K1;
 359     n=z;
 360
 361     if(n<0) --n;
 362     if(z-n>=0.5) ++n;
 363     xn=n;
 364     g=((y-xn*C1))-xn*C2;
 365     z=g*g;
 366     r=P(z)*g;
 367     r=0.5+(r/(Q(z)-r));
 368
 369     n++;
 370     z=ldexpf(r, n);
 371     if(sign)
 372         return 1.0/z;
 373     else
 374         return z;
 375 }
 376
 377 #endif