lib/libm/arch/i387/s_log1pf.S

   1 /*
   2  * Written by J.T. Conklin <jtc@NetBSD.org>.
   3  * Public domain.
   4  */
   5
   6 /*
   7  * Modified by Lex Wennmacher <wennmach@NetBSD.org>
   8  * Still public domain.
   9  */
  10
  11 #include <machine/asm.h>
  12
  13 #include "abi.h"
  14
  15 RCSID("$NetBSD: s_log1pf.S,v 1.10 2003/09/16 18:17:11 wennmach Exp $")
  16
  17 /*
  18  * The log1pf() function is provided to compute an accurate value of
  19  * log(1 + x), even for tiny values of x. The i387 FPU provides the
  20  * fyl2xp1 instruction for this purpose. However, the range of this
  21  * instruction is limited to:
  22  *              -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
  23  *                         -0.292893 <= x <= 0.414214
  24  * at least on older processor versions.
  25  *
  26  * log1pf() is implemented by testing the range of the argument.
  27  * If it is appropriate for fyl2xp1, this instruction is used.
  28  * Else, we compute log1pf(x) = ln(2)*ld(1 + x) the traditional way
  29  * (using fyl2x).
  30  *
  31  * The range testing costs speed, but as the rationale for the very
  32  * existence of this function is accuracy, we accept that.
  33  *
  34  * In order to reduce the cost for testing the range, we check if
  35  * the argument is in the range
  36  *                             -0.25 <= x <= 0.25
  37  * which can be done with just one conditional branch. If x is
  38  * inside this range, we use fyl2xp1. Outside of this range,
  39  * the use of fyl2x is accurate enough.
  40  *
  41  */
  42
  43 .text
  44         .align  4
  45 ENTRY(log1pf)
  46         XMM_ONE_ARG_FLOAT_PROLOGUE
  47         flds    ARG_FLOAT_ONE
  48         fabs
  49         fld1                            /* ... x 1 */
  50         fadd    %st(0)                  /* ... x 2 */
  51         fadd    %st(0)                  /* ... x 4 */
  52         fld1                            /* ... 4 1 */
  53         fdivp                           /* ... x 0.25 */
  54         fcompp
  55         fnstsw  %ax
  56         andb    $69,%ah
  57         jne     use_fyl2x
  58         jmp     use_fyl2xp1
  59
  60         .align  4
  61 use_fyl2x:
  62         fldln2
  63         flds    ARG_FLOAT_ONE
  64         fld1
  65         faddp
  66         fyl2x
  67         XMM_FLOAT_EPILOGUE
  68         ret
  69
  70         .align  4
  71 use_fyl2xp1:
  72         fldln2
  73         flds    ARG_FLOAT_ONE
  74         fyl2xp1
  75         XMM_FLOAT_EPILOGUE
  76         ret