sysdeps/libm-i387/s_cbrtf.S

   1 /* Compute cubic root of float value.
   2    Copyright (C) 1997 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
   5    Ulrich Drepper <drepper@cygnus.com>, 1997.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Library General Public License as
   9    published by the Free Software Foundation; either version 2 of the
  10    License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Library General Public License for more details.
  16
  17    You should have received a copy of the GNU Library General Public
  18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22 #include <machine/asm.h>
  23
  24 #ifdef __ELF__
  25         .section .rodata
  26 #else
  27         .text
  28 #endif
  29
  30         .align ALIGNARG(4)
  31         ASM_TYPE_DIRECTIVE(f1,@object)
  32 f1:     .double 0.492659620528969547
  33         ASM_SIZE_DIRECTIVE(f1)
  34         ASM_TYPE_DIRECTIVE(f2,@object)
  35 f2:     .double 0.697570460207922770
  36         ASM_SIZE_DIRECTIVE(f2)
  37         ASM_TYPE_DIRECTIVE(f3,@object)
  38 f3:     .double 0.191502161678719066
  39         ASM_SIZE_DIRECTIVE(f3)
  40
  41 #define CBRT2 1.2599210498948731648
  42 #define SQR_CBRT2 1.5874010519681994748
  43
  44         ASM_TYPE_DIRECTIVE(factor,@object)
  45 factor: .double 1.0 / SQR_CBRT2
  46         .double 1.0 / CBRT2
  47         .double 1.0
  48         .double CBRT2
  49         .double SQR_CBRT2
  50         ASM_SIZE_DIRECTIVE(factor)
  51
  52         ASM_TYPE_DIRECTIVE(two25,@object)
  53 two25:  .byte 0, 0, 0, 0x4c
  54         ASM_SIZE_DIRECTIVE(two25)
  55
  56 #ifdef PIC
  57 #define MO(op) op##@GOTOFF(%ebx)
  58 #define MOX(op,x,f) op##@GOTOFF(%ebx,x,f)
  59 #else
  60 #define MO(op) op
  61 #define MOX(op,x,f) op(,x,f)
  62 #endif
  63
  64         .text
  65 ENTRY(__cbrtf)
  66         movl    4(%esp), %eax
  67         xorl    %ecx, %ecx
  68         movl    %eax, %edx
  69         andl    $0x7fffffff, %eax
  70         jz      1f
  71         cmpl    $0x7f800000, %eax
  72         jae     1f
  73
  74 #ifdef PIC
  75         pushl   %ebx
  76         call    3f
  77 3:      popl    %ebx
  78         addl    $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx
  79 #endif
  80
  81         cmpl    $0x00800000, %eax
  82         jae     2f
  83
  84 #ifdef PIC
  85         flds    8(%esp)
  86 #else
  87         flds    4(%esp)
  88 #endif
  89         fmuls   MO(two25)
  90         movl    $-25, %ecx
  91 #ifdef PIC
  92         fstps   8(%esp)
  93         movl    8(%esp), %eax
  94 #else
  95         fstps   4(%esp)
  96         movl    4(%esp), %eax
  97 #endif
  98         movl    %eax, %edx
  99         andl    $0x7fffffff, %eax
 100
 101 2:      shrl    $23, %eax
 102         andl    $0x807fffff, %edx
 103         subl    $126, %eax
 104         orl     $0x3f000000, %edx
 105         addl    %eax, %ecx
 106 #ifdef PIC
 107         movl    %edx, 8(%esp)
 108
 109         flds    8(%esp)                 /* xm */
 110 #else
 111         movl    %edx, 4(%esp)
 112
 113         flds    4(%esp)                 /* xm */
 114 #endif
 115         fabs
 116
 117         /* The following code has two track:
 118             a) compute the normalized cbrt value
 119             b) compute xe/3 and xe%3
 120            The right track computes the value for b) and this is done
 121            in an optimized way by avoiding division.  */
 122
 123         fld     %st(0)                  /* xm : xm */
 124         fmull   MO(f3)                  /* f3*xm : xm */
 125                         movl    $1431655766, %eax
 126         fsubrl  MO(f2)                  /* f2-f3*xm : xm */
 127                         imull   %ecx
 128         fmul    %st(1)                  /* (f2-f3*xm)*xm : xm */
 129                         movl    %ecx, %eax
 130         faddl   MO(f1)                  /* u:=f1+(f2-f3*xm)*xm : xm */
 131                         sarl    $31, %eax
 132         fld     %st                     /* u : u : xm */
 133                         subl    %eax, %edx
 134         fmul    %st(1)                  /* u*u : u : xm */
 135         fld     %st(2)                  /* xm : u*u : u : xm */
 136         fadd    %st                     /* 2*xm : u*u : u : xm */
 137         fxch    %st(1)                  /* u*u : 2*xm : u : xm */
 138         fmul    %st(2)                  /* t2:=u*u*u : 2*xm : u : xm */
 139                         movl    %edx, %eax
 140         fadd    %st, %st(1)             /* t2 : t2+2*xm : u : xm */
 141                         leal    (%edx,%edx,2),%edx
 142         fadd    %st(0)                  /* 2*t2 : t2+2*xm : u : xm */
 143                         subl    %edx, %ecx
 144         faddp   %st, %st(3)             /* t2+2*xm : u : 2*t2+xm */
 145         fmulp                           /* u*(t2+2*xm) : 2*t2+xm */
 146         fdivp   %st, %st(1)             /* u*(t2+2*xm)/(2*t2+xm) */
 147         fmull   MOX(16+factor,%ecx,8)   /* u*(t2+2*xm)/(2*t2+xm)*FACT */
 148         pushl   %eax
 149         fildl   (%esp)                  /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
 150         fxch                            /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
 151         popl    %eax
 152         fscale                          /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
 153         fstp    %st(1)
 154 #ifdef PIC
 155         popl    %ebx
 156 #endif
 157         testl   $0x80000000, 4(%esp)
 158         jz      4f
 159         fchs
 160 4:      ret
 161
 162         /* Return the argument.  */
 163 1:      flds    4(%esp)
 164         ret
 165 END(__cbrtf)
 166 weak_alias (__cbrtf, cbrtf)