llvm/test/CodeGen/X86/sse-fcopysign.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
   3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
   4
   5 ;
   6 ; Library Functions
   7 ;
   8
   9 define float @tst1(float %a, float %b) nounwind {
  10 ; X86-LABEL: tst1:
  11 ; X86:       # %bb.0:
  12 ; X86-NEXT:    subl $8, %esp
  13 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  14 ; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  15 ; X86-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
  16 ; X86-NEXT:    movss %xmm0, (%esp)
  17 ; X86-NEXT:    calll copysignf
  18 ; X86-NEXT:    addl $8, %esp
  19 ; X86-NEXT:    retl
  20 ;
  21 ; X64-LABEL: tst1:
  22 ; X64:       # %bb.0:
  23 ; X64-NEXT:    movaps %xmm0, %xmm2
  24 ; X64-NEXT:    movaps %xmm1, %xmm0
  25 ; X64-NEXT:    movaps %xmm2, %xmm1
  26 ; X64-NEXT:    jmp copysignf # TAILCALL
  27   %tmp = tail call float @copysignf( float %b, float %a )
  28   ret float %tmp
  29 }
  30
  31 define double @tst2(double %a, float %b, float %c) nounwind {
  32 ; X86-LABEL: tst2:
  33 ; X86:       # %bb.0:
  34 ; X86-NEXT:    subl $16, %esp
  35 ; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
  36 ; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  37 ; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm1
  38 ; X86-NEXT:    cvtss2sd %xmm1, %xmm1
  39 ; X86-NEXT:    movsd %xmm0, (%esp)
  40 ; X86-NEXT:    movsd %xmm1, {{[0-9]+}}(%esp)
  41 ; X86-NEXT:    calll copysign
  42 ; X86-NEXT:    addl $16, %esp
  43 ; X86-NEXT:    retl
  44 ;
  45 ; X64-LABEL: tst2:
  46 ; X64:       # %bb.0:
  47 ; X64-NEXT:    addss %xmm2, %xmm1
  48 ; X64-NEXT:    cvtss2sd %xmm1, %xmm1
  49 ; X64-NEXT:    jmp copysign # TAILCALL
  50   %tmp1 = fadd float %b, %c
  51   %tmp2 = fpext float %tmp1 to double
  52   %tmp = tail call double @copysign( double %a, double %tmp2 )
  53   ret double %tmp
  54 }
  55
  56 define x86_fp80 @tst3(x86_fp80 %a, x86_fp80 %b) nounwind {
  57 ; X86-LABEL: tst3:
  58 ; X86:       # %bb.0:
  59 ; X86-NEXT:    subl $24, %esp
  60 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
  61 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
  62 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
  63 ; X86-NEXT:    fstpt (%esp)
  64 ; X86-NEXT:    calll copysignl
  65 ; X86-NEXT:    addl $24, %esp
  66 ; X86-NEXT:    retl
  67 ;
  68 ; X64-LABEL: tst3:
  69 ; X64:       # %bb.0:
  70 ; X64-NEXT:    subq $40, %rsp
  71 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
  72 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
  73 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
  74 ; X64-NEXT:    fstpt (%rsp)
  75 ; X64-NEXT:    callq copysignl
  76 ; X64-NEXT:    addq $40, %rsp
  77 ; X64-NEXT:    retq
  78   %tmp = tail call x86_fp80 @copysignl( x86_fp80 %b, x86_fp80 %a )
  79   ret x86_fp80 %tmp
  80 }
  81
  82 declare dso_local float @copysignf(float, float)
  83 declare dso_local double @copysign(double, double)
  84 declare dso_local x86_fp80 @copysignl(x86_fp80, x86_fp80)
  85
  86 ;
  87 ; LLVM Intrinsic
  88 ;
  89
  90 define float @int1(float %a, float %b) nounwind {
  91 ; X86-LABEL: int1:
  92 ; X86:       # %bb.0:
  93 ; X86-NEXT:    pushl %eax
  94 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  95 ; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
  96 ; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  97 ; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
  98 ; X86-NEXT:    orps %xmm0, %xmm1
  99 ; X86-NEXT:    movss %xmm1, (%esp)
 100 ; X86-NEXT:    flds (%esp)
 101 ; X86-NEXT:    popl %eax
 102 ; X86-NEXT:    retl
 103 ;
 104 ; X64-LABEL: int1:
 105 ; X64:       # %bb.0:
 106 ; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 107 ; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 108 ; X64-NEXT:    orps %xmm1, %xmm0
 109 ; X64-NEXT:    retq
 110   %tmp = tail call float @llvm.copysign.f32( float %b, float %a )
 111   ret float %tmp
 112 }
 113
 114 define double @int2(double %a, float %b, float %c) nounwind {
 115 ; X86-LABEL: int2:
 116 ; X86:       # %bb.0:
 117 ; X86-NEXT:    pushl %ebp
 118 ; X86-NEXT:    movl %esp, %ebp
 119 ; X86-NEXT:    andl $-8, %esp
 120 ; X86-NEXT:    subl $8, %esp
 121 ; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 122 ; X86-NEXT:    addss 20(%ebp), %xmm0
 123 ; X86-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 124 ; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
 125 ; X86-NEXT:    cvtss2sd %xmm0, %xmm0
 126 ; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
 127 ; X86-NEXT:    orps %xmm1, %xmm0
 128 ; X86-NEXT:    movlps %xmm0, (%esp)
 129 ; X86-NEXT:    fldl (%esp)
 130 ; X86-NEXT:    movl %ebp, %esp
 131 ; X86-NEXT:    popl %ebp
 132 ; X86-NEXT:    retl
 133 ;
 134 ; X64-LABEL: int2:
 135 ; X64:       # %bb.0:
 136 ; X64-NEXT:    addss %xmm2, %xmm1
 137 ; X64-NEXT:    cvtss2sd %xmm1, %xmm1
 138 ; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 139 ; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 140 ; X64-NEXT:    orps %xmm1, %xmm0
 141 ; X64-NEXT:    retq
 142   %tmp1 = fadd float %b, %c
 143   %tmp2 = fpext float %tmp1 to double
 144   %tmp = tail call double @llvm.copysign.f64( double %a, double %tmp2 )
 145   ret double %tmp
 146 }
 147
 148 define x86_fp80 @int3(x86_fp80 %a, x86_fp80 %b) nounwind {
 149 ; X86-LABEL: int3:
 150 ; X86:       # %bb.0:
 151 ; X86-NEXT:    subl $12, %esp
 152 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 153 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 154 ; X86-NEXT:    fstpt (%esp)
 155 ; X86-NEXT:    fabs
 156 ; X86-NEXT:    fld %st(0)
 157 ; X86-NEXT:    fchs
 158 ; X86-NEXT:    testb $-128, {{[0-9]+}}(%esp)
 159 ; X86-NEXT:    fxch %st(1)
 160 ; X86-NEXT:    fcmovne %st(1), %st
 161 ; X86-NEXT:    fstp %st(1)
 162 ; X86-NEXT:    addl $12, %esp
 163 ; X86-NEXT:    retl
 164 ;
 165 ; X64-LABEL: int3:
 166 ; X64:       # %bb.0:
 167 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 168 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 169 ; X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
 170 ; X64-NEXT:    fabs
 171 ; X64-NEXT:    fld %st(0)
 172 ; X64-NEXT:    fchs
 173 ; X64-NEXT:    testb $-128, -{{[0-9]+}}(%rsp)
 174 ; X64-NEXT:    fxch %st(1)
 175 ; X64-NEXT:    fcmovne %st(1), %st
 176 ; X64-NEXT:    fstp %st(1)
 177 ; X64-NEXT:    retq
 178   %tmp = tail call x86_fp80 @llvm.copysign.f80( x86_fp80 %b, x86_fp80 %a )
 179   ret x86_fp80 %tmp
 180 }
 181
 182 define float @cst1() nounwind {
 183 ; X86-LABEL: cst1:
 184 ; X86:       # %bb.0:
 185 ; X86-NEXT:    fld1
 186 ; X86-NEXT:    fchs
 187 ; X86-NEXT:    retl
 188 ;
 189 ; X64-LABEL: cst1:
 190 ; X64:       # %bb.0:
 191 ; X64-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
 192 ; X64-NEXT:    retq
 193   %tmp = tail call float @llvm.copysign.f32( float 1.0, float -2.0 )
 194   ret float %tmp
 195 }
 196
 197 define double @cst2() nounwind {
 198 ; X86-LABEL: cst2:
 199 ; X86:       # %bb.0:
 200 ; X86-NEXT:    fldz
 201 ; X86-NEXT:    fchs
 202 ; X86-NEXT:    retl
 203 ;
 204 ; X64-LABEL: cst2:
 205 ; X64:       # %bb.0:
 206 ; X64-NEXT:    movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
 207 ; X64-NEXT:    retq
 208   %tmp1 = fadd float -1.0, -1.0
 209   %tmp2 = fpext float %tmp1 to double
 210   %tmp = tail call double @llvm.copysign.f64( double 0.0, double %tmp2 )
 211   ret double %tmp
 212 }
 213
 214 define x86_fp80 @cst3() nounwind {
 215 ; X86-LABEL: cst3:
 216 ; X86:       # %bb.0:
 217 ; X86-NEXT:    fldz
 218 ; X86-NEXT:    fchs
 219 ; X86-NEXT:    retl
 220 ;
 221 ; X64-LABEL: cst3:
 222 ; X64:       # %bb.0:
 223 ; X64-NEXT:    fldz
 224 ; X64-NEXT:    fchs
 225 ; X64-NEXT:    retq
 226   %tmp1 = fadd float -1.0, -1.0
 227   %tmp2 = fpext float %tmp1 to x86_fp80
 228   %tmp = tail call x86_fp80 @llvm.copysign.f80( x86_fp80 zeroinitializer, x86_fp80 %tmp2 )
 229   ret x86_fp80 %tmp
 230 }
 231
 232 define void @PR41749() {
 233 ; X86-LABEL: PR41749:
 234 ; X86:       # %bb.0:
 235 ; X86-NEXT:    subl $12, %esp
 236 ; X86-NEXT:    .cfi_def_cfa_offset 16
 237 ; X86-NEXT:    fldz
 238 ; X86-NEXT:    fld %st(0)
 239 ; X86-NEXT:    fstpt (%esp)
 240 ; X86-NEXT:    testb $-128, {{[0-9]+}}(%esp)
 241 ; X86-NEXT:    fld %st(0)
 242 ; X86-NEXT:    fchs
 243 ; X86-NEXT:    fxch %st(1)
 244 ; X86-NEXT:    fcmovne %st(1), %st
 245 ; X86-NEXT:    fstp %st(1)
 246 ; X86-NEXT:    fstpt (%eax)
 247 ; X86-NEXT:    addl $12, %esp
 248 ; X86-NEXT:    .cfi_def_cfa_offset 4
 249 ; X86-NEXT:    retl
 250 ;
 251 ; X64-LABEL: PR41749:
 252 ; X64:       # %bb.0:
 253 ; X64-NEXT:    fldz
 254 ; X64-NEXT:    fld %st(0)
 255 ; X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
 256 ; X64-NEXT:    testb $-128, -{{[0-9]+}}(%rsp)
 257 ; X64-NEXT:    fld %st(0)
 258 ; X64-NEXT:    fchs
 259 ; X64-NEXT:    fxch %st(1)
 260 ; X64-NEXT:    fcmovne %st(1), %st
 261 ; X64-NEXT:    fstp %st(1)
 262 ; X64-NEXT:    fstpt (%rax)
 263 ; X64-NEXT:    retq
 264   %1 = call x86_fp80 @llvm.copysign.f80(x86_fp80 0xK00000000000000000000, x86_fp80 undef)
 265   store x86_fp80 %1, ptr undef, align 16
 266   ret void
 267 }
 268
 269 declare dso_local float     @llvm.copysign.f32(float  %Mag, float  %Sgn)
 270 declare dso_local double    @llvm.copysign.f64(double %Mag, double %Sgn)
 271 declare dso_local x86_fp80  @llvm.copysign.f80(x86_fp80 %Mag, x86_fp80 %Sgn)