sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundisf.S

   1 // This file is dual licensed under the MIT and the University of Illinois Open
   2 // Source Licenses. See LICENSE.TXT for details.
   3
   4 #include "../assembly.h"
   5
   6 // float __floatundisf(du_int a);
   7
   8 // Note that there is a hardware instruction, fildll, that does most of what
   9 // this function needs to do.  However, because of our ia32 ABI, it will take
  10 // a write-small read-large stall, so the software implementation here is
  11 // actually several cycles faster.
  12
  13 // This is a branch-free implementation.  A branchy implementation might be
  14 // faster for the common case if you know something a priori about the input
  15 // distribution.
  16
  17 /* branch-free x87 implementation - one cycle slower than without x87.
  18
  19 #ifdef __i386__
  20
  21 .const
  22 .balign 3
  23
  24                 .quad   0x43f0000000000000
  25 twop64: .quad   0x0000000000000000
  26
  27 #define                 TWOp64                  twop64-0b(%ecx,%eax,8)
  28
  29 .text
  30 .balign 4
  31 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
  32         movl            8(%esp),                %eax
  33         movd            8(%esp),                %xmm1
  34         movd            4(%esp),                %xmm0
  35         punpckldq       %xmm1,                  %xmm0
  36         calll           0f
  37 0:      popl            %ecx
  38         sarl            $31,                    %eax
  39         movq            %xmm0,                  4(%esp)
  40         fildll          4(%esp)
  41         faddl           TWOp64
  42         fstps           4(%esp)
  43         flds            4(%esp)
  44         ret
  45 END_COMPILERRT_FUNCTION(__floatundisf)
  46
  47 #endif // __i386__
  48
  49 */
  50
  51 /* branch-free, x87-free implementation - faster at the expense of code size */
  52
  53 #ifdef __i386__
  54
  55 #if defined(__APPLE__)
  56         .const
  57 #elif defined(__ELF__)
  58         .section .rodata
  59 #else
  60         .section .rdata,"rd"
  61 #endif
  62
  63         .balign 16
  64 twop52:
  65         .quad 0x4330000000000000
  66         .quad 0x0000000000000fff
  67
  68         .balign 16
  69 sticky:
  70         .quad 0x0000000000000000
  71         .long 0x00000012
  72
  73         .balign 16
  74 twelve:
  75         .long 0x00000000
  76
  77 #define                 TWOp52                  twop52-0b(%ecx)
  78 #define                 STICKY                  sticky-0b(%ecx,%eax,8)
  79
  80 .text
  81 .balign 4
  82 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
  83         movl            8(%esp),                %eax
  84         movd            8(%esp),                %xmm1
  85         movd            4(%esp),                %xmm0
  86         punpckldq       %xmm1,                  %xmm0
  87
  88         calll           0f
  89 0:      popl            %ecx
  90         shrl            %eax                                    // high 31 bits of input as sint32
  91         addl            $0x7ff80000,    %eax
  92         sarl            $31,                    %eax    // (big input) ? -1 : 0
  93         movsd           STICKY,                 %xmm1   // (big input) ? 0xfff : 0
  94         movl            $12,                    %edx
  95         andl            %eax,                   %edx    // (big input) ? 12 : 0
  96         movd            %edx,                   %xmm3
  97         andpd           %xmm0,                  %xmm1   // (big input) ? input & 0xfff : 0
  98         movsd           TWOp52,                 %xmm2   // 0x1.0p52
  99         psrlq           %xmm3,                  %xmm0   // (big input) ? input >> 12 : input
 100         orpd            %xmm2,                  %xmm1   // 0x1.0p52 + ((big input) ? input & 0xfff : input)
 101         orpd            %xmm1,                  %xmm0   // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
 102         subsd           %xmm2,                  %xmm0   // (double)((big input) ? (input >> 12 | input & 0xfff) : input)
 103         cvtsd2ss        %xmm0,                  %xmm0   // (float)((big input) ? (input >> 12 | input & 0xfff) : input)
 104         pslld           $23,                    %xmm3
 105         paddd           %xmm3,                  %xmm0   // (float)input
 106         movd            %xmm0,                  4(%esp)
 107         flds            4(%esp)
 108         ret
 109 END_COMPILERRT_FUNCTION(__floatundisf)
 110
 111 #endif // __i386__