winsup/cygwin/math/ceilf.S

   1 /**
   2  * This file has no copyright assigned and is placed in the Public Domain.
   3  * This file is part of the mingw-w64 runtime package.
   4  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
   5  */
   6 #include <_mingw_mac.h>
   7
   8         .file   "ceilf.S"
   9         .text
  10         .align 4
  11         .globl __MINGW_USYMBOL(ceilf)
  12         .def    __MINGW_USYMBOL(ceilf); .scl    2;      .type   32;     .endef
  13 #ifdef __x86_64__
  14         .seh_proc       __MINGW_USYMBOL(ceilf)
  15 #endif
  16
  17 __MINGW_USYMBOL(ceilf):
  18 #if defined(_AMD64_) || defined(__x86_64__)
  19         subq    $24, %rsp
  20         .seh_stackalloc 24
  21         .seh_endprologue
  22         movd    %xmm0, 12(%rsp)
  23         movl    12(%rsp), %eax
  24         movl    %eax, %ecx
  25         movl    %eax, %edx
  26         sarl    $23, %ecx
  27         andl    $255, %ecx
  28         subl    $127, %ecx
  29         cmpl    $22, %ecx
  30         jg      .l4
  31         testl   %ecx, %ecx
  32         js      .l5
  33         movl    $8388607, %r8d
  34         sarl    %cl, %r8d
  35         testl   %eax, %r8d
  36         je      .l3
  37         addss   .hugeval(%rip), %xmm0
  38         ucomiss .zeroval(%rip), %xmm0
  39         jbe     .l2
  40         testl   %eax, %eax
  41         jle     .l1
  42         movl    $8388608, %eax
  43         sarl    %cl, %eax
  44         addl    %eax, %edx
  45 .l1:
  46         movl    %r8d, %eax
  47         notl    %eax
  48         andl    %edx, %eax
  49 .l2:
  50         movl    %eax, 8(%rsp)
  51         movss   8(%rsp), %xmm0
  52 .l3:
  53         addq    $24, %rsp
  54         ret
  55         .p2align 4,,10
  56 .l4:
  57         addl    $-128, %ecx
  58         jne     .l3
  59         addss   %xmm0, %xmm0
  60         addq    $24, %rsp
  61         ret
  62         .p2align 4,,10
  63 .l5:
  64         addss   .hugeval(%rip), %xmm0
  65         ucomiss .zeroval(%rip), %xmm0
  66         jbe     .islesseqzero
  67         testl   %eax, %eax
  68         js      .l6
  69         movl    $1065353216, %edx
  70         cmovne  %edx, %eax
  71 .islesseqzero:
  72         movl    %eax, 8(%rsp)
  73         movss   8(%rsp), %xmm0
  74         addq    $24, %rsp
  75         ret
  76         .p2align 4,,10
  77 .l6:
  78         movl    $-2147483648, 8(%rsp)
  79         movss   8(%rsp), %xmm0
  80         addq    $24, %rsp
  81         ret
  82         .seh_endproc
  83         .section .rdata,"dr"
  84         .align 4
  85 .hugeval:
  86         .long   1900671690
  87         .align 4
  88 .zeroval:
  89         .long   0
  90 #elif defined(_ARM_) || defined(__arm__)
  91         vmrs    r1, fpscr
  92         bic             r0, r1, #0x00c00000
  93         orr             r0, r0, #0x00400000 /* Round towards Plus Infinity */
  94         vmsr    fpscr, r0
  95         vcvt.s32.f32    s0, s0
  96         vcvt.f32.s32    s0, s0
  97         vmsr    fpscr, r1
  98         bx      lr
  99 #elif defined(_X86_) || defined(__i386__)
 100         flds    4(%esp)
 101         subl    $8,%esp
 102
 103         fstcw   4(%esp)                 /* store fpu control word */
 104
 105         /* We use here %edx although only the low 1 bits are defined.
 106            But none of the operations should care and they are faster
 107            than the 16 bit operations.  */
 108         movl    $0x0800,%edx            /* round towards +oo */
 109         orl     4(%esp),%edx
 110         andl    $0xfbff,%edx
 111         movl    %edx,(%esp)
 112         fldcw   (%esp)                  /* load modified control word */
 113
 114         frndint                         /* round */
 115
 116         fldcw   4(%esp)                 /* restore original control word */
 117
 118         addl    $8,%esp
 119         ret
 120 #endif