llvm/test/CodeGen/X86/combine-fabs.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
   4
   5 ;
   6 ; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
   7 ; so we need to edit it to remove the NAN constant comments
   8 ;
   9
  10 ; fabs(c1) -> c2
  11 define float @combine_fabs_constant() {
  12 ; SSE-LABEL: combine_fabs_constant:
  13 ; SSE:       # %bb.0:
  14 ; SSE-NEXT:    movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
  15 ; SSE-NEXT:    retq
  16 ;
  17 ; AVX-LABEL: combine_fabs_constant:
  18 ; AVX:       # %bb.0:
  19 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
  20 ; AVX-NEXT:    retq
  21   %1 = call float @llvm.fabs.f32(float -2.0)
  22   ret float %1
  23 }
  24
  25 define <4 x float> @combine_vec_fabs_constant() {
  26 ; SSE-LABEL: combine_vec_fabs_constant:
  27 ; SSE:       # %bb.0:
  28 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
  29 ; SSE-NEXT:    retq
  30 ;
  31 ; AVX-LABEL: combine_vec_fabs_constant:
  32 ; AVX:       # %bb.0:
  33 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
  34 ; AVX-NEXT:    retq
  35   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
  36   ret <4 x float> %1
  37 }
  38
  39 ; fabs(fabs(x)) -> fabs(x)
  40 define float @combine_fabs_fabs(float %a) {
  41 ; SSE-LABEL: combine_fabs_fabs:
  42 ; SSE:       # %bb.0:
  43 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
  44 ; SSE-NEXT:    retq
  45 ;
  46 ; AVX-LABEL: combine_fabs_fabs:
  47 ; AVX:       # %bb.0:
  48 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
  49 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
  50 ; AVX-NEXT:    retq
  51   %1 = call float @llvm.fabs.f32(float %a)
  52   %2 = call float @llvm.fabs.f32(float %1)
  53   ret float %2
  54 }
  55
  56 define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) {
  57 ; SSE-LABEL: combine_vec_fabs_fabs:
  58 ; SSE:       # %bb.0:
  59 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
  60 ; SSE-NEXT:    retq
  61 ;
  62 ; AVX-LABEL: combine_vec_fabs_fabs:
  63 ; AVX:       # %bb.0:
  64 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
  65 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
  66 ; AVX-NEXT:    retq
  67   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
  68   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
  69   ret <4 x float> %2
  70 }
  71
  72 ; fabs(fneg(x)) -> fabs(x)
  73 define float @combine_fabs_fneg(float %a) {
  74 ; SSE-LABEL: combine_fabs_fneg:
  75 ; SSE:       # %bb.0:
  76 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
  77 ; SSE-NEXT:    retq
  78 ;
  79 ; AVX-LABEL: combine_fabs_fneg:
  80 ; AVX:       # %bb.0:
  81 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
  82 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
  83 ; AVX-NEXT:    retq
  84   %1 = fsub float -0.0, %a
  85   %2 = call float @llvm.fabs.f32(float %1)
  86   ret float %2
  87 }
  88
  89 define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) {
  90 ; SSE-LABEL: combine_vec_fabs_fneg:
  91 ; SSE:       # %bb.0:
  92 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
  93 ; SSE-NEXT:    retq
  94 ;
  95 ; AVX-LABEL: combine_vec_fabs_fneg:
  96 ; AVX:       # %bb.0:
  97 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
  98 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
  99 ; AVX-NEXT:    retq
 100   %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a
 101   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
 102   ret <4 x float> %2
 103 }
 104
 105 ; fabs(fcopysign(x, y)) -> fabs(x)
 106 define float @combine_fabs_fcopysign(float %a, float %b) {
 107 ; SSE-LABEL: combine_fabs_fcopysign:
 108 ; SSE:       # %bb.0:
 109 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 110 ; SSE-NEXT:    retq
 111 ;
 112 ; AVX-LABEL: combine_fabs_fcopysign:
 113 ; AVX:       # %bb.0:
 114 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 115 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 116 ; AVX-NEXT:    retq
 117   %1 = call float @llvm.copysign.f32(float %a, float %b)
 118   %2 = call float @llvm.fabs.f32(float %1)
 119   ret float %2
 120 }
 121
 122 define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {
 123 ; SSE-LABEL: combine_vec_fabs_fcopysign:
 124 ; SSE:       # %bb.0:
 125 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 126 ; SSE-NEXT:    retq
 127 ;
 128 ; AVX-LABEL: combine_vec_fabs_fcopysign:
 129 ; AVX:       # %bb.0:
 130 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 131 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 132 ; AVX-NEXT:    retq
 133   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
 134   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
 135   ret <4 x float> %2
 136 }
 137
 138 ; store(fabs(load())) - convert scalar to integer
 139 define void @combine_fabs_int_rmw_f64(ptr %ptr) {
 140 ; SSE-LABEL: combine_fabs_int_rmw_f64:
 141 ; SSE:       # %bb.0:
 142 ; SSE-NEXT:    andb $127, 7(%rdi)
 143 ; SSE-NEXT:    retq
 144 ;
 145 ; AVX-LABEL: combine_fabs_int_rmw_f64:
 146 ; AVX:       # %bb.0:
 147 ; AVX-NEXT:    andb $127, 7(%rdi)
 148 ; AVX-NEXT:    retq
 149   %1 = load double, ptr %ptr
 150   %2 = call double @llvm.fabs.f64(double %1)
 151   store double %2, ptr %ptr
 152   ret void
 153 }
 154
 155 define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
 156 ; SSE-LABEL: combine_fabs_int_f32:
 157 ; SSE:       # %bb.0:
 158 ; SSE-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
 159 ; SSE-NEXT:    andl (%rdi), %eax
 160 ; SSE-NEXT:    movl %eax, (%rsi)
 161 ; SSE-NEXT:    retq
 162 ;
 163 ; AVX-LABEL: combine_fabs_int_f32:
 164 ; AVX:       # %bb.0:
 165 ; AVX-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
 166 ; AVX-NEXT:    andl (%rdi), %eax
 167 ; AVX-NEXT:    movl %eax, (%rsi)
 168 ; AVX-NEXT:    retq
 169   %1 = load float, ptr %src
 170   %2 = call float @llvm.fabs.f32(float %1)
 171   store float %2, ptr %dst
 172   ret void
 173 }
 174
 175 define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
 176 ; SSE-LABEL: combine_fabs_int_rmw_bfloat:
 177 ; SSE:       # %bb.0:
 178 ; SSE-NEXT:    andb $127, 1(%rdi)
 179 ; SSE-NEXT:    retq
 180 ;
 181 ; AVX-LABEL: combine_fabs_int_rmw_bfloat:
 182 ; AVX:       # %bb.0:
 183 ; AVX-NEXT:    andb $127, 1(%rdi)
 184 ; AVX-NEXT:    retq
 185   %1 = load bfloat, ptr %ptr
 186   %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
 187   store bfloat %2, ptr %ptr
 188   ret void
 189 }
 190
 191 define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
 192 ; SSE-LABEL: combine_fabs_int_half:
 193 ; SSE:       # %bb.0:
 194 ; SSE-NEXT:    movzwl (%rdi), %eax
 195 ; SSE-NEXT:    andl $32767, %eax # imm = 0x7FFF
 196 ; SSE-NEXT:    movw %ax, (%rsi)
 197 ; SSE-NEXT:    retq
 198 ;
 199 ; AVX-LABEL: combine_fabs_int_half:
 200 ; AVX:       # %bb.0:
 201 ; AVX-NEXT:    movzwl (%rdi), %eax
 202 ; AVX-NEXT:    andl $32767, %eax # imm = 0x7FFF
 203 ; AVX-NEXT:    movw %ax, (%rsi)
 204 ; AVX-NEXT:    retq
 205   %1 = load half, ptr %src
 206   %2 = call half @llvm.fabs.f16(half %1)
 207   store half %2, ptr %dst
 208   ret void
 209 }
 210
 211 ; don't convert vector to scalar
 212 define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
 213 ; SSE-LABEL: combine_fabs_vec_int_v4f32:
 214 ; SSE:       # %bb.0:
 215 ; SSE-NEXT:    movaps (%rdi), %xmm0
 216 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 217 ; SSE-NEXT:    movaps %xmm0, (%rsi)
 218 ; SSE-NEXT:    retq
 219 ;
 220 ; AVX-LABEL: combine_fabs_vec_int_v4f32:
 221 ; AVX:       # %bb.0:
 222 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
 223 ; AVX-NEXT:    vandps (%rdi), %xmm0, %xmm0
 224 ; AVX-NEXT:    vmovaps %xmm0, (%rsi)
 225 ; AVX-NEXT:    retq
 226   %1 = load <4 x float>, ptr %src
 227   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
 228   store <4 x float> %2, ptr %dst
 229   ret void
 230 }
 231
 232 declare float @llvm.fabs.f32(float %p)
 233 declare float @llvm.copysign.f32(float %Mag, float %Sgn)
 234
 235 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
 236 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)