llvm/test/CodeGen/X86/combine-testps.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
   4
   5 ;
   6 ; testz(~X,Y) -> testc(X,Y)
   7 ;
   8
   9 define i32 @testpsz_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
  10 ; CHECK-LABEL: testpsz_128_invert0:
  11 ; CHECK:       # %bb.0:
  12 ; CHECK-NEXT:    movl %edi, %eax
  13 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
  14 ; CHECK-NEXT:    cmovael %esi, %eax
  15 ; CHECK-NEXT:    retq
  16   %t0 = bitcast <4 x float> %c to <2 x i64>
  17   %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
  18   %t2 = bitcast <2 x i64> %t1 to <4 x float>
  19   %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %t2, <4 x float> %d)
  20   %t4 = icmp ne i32 %t3, 0
  21   %t5 = select i1 %t4, i32 %a, i32 %b
  22   ret i32 %t5
  23 }
  24
  25 define i32 @testpsz_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
  26 ; CHECK-LABEL: testpsz_256_invert0:
  27 ; CHECK:       # %bb.0:
  28 ; CHECK-NEXT:    movl %edi, %eax
  29 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
  30 ; CHECK-NEXT:    cmovael %esi, %eax
  31 ; CHECK-NEXT:    vzeroupper
  32 ; CHECK-NEXT:    retq
  33   %t0 = bitcast <8 x float> %c to <4 x i64>
  34   %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
  35   %t2 = bitcast <4 x i64> %t1 to <8 x float>
  36   %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t2, <8 x float> %d)
  37   %t4 = icmp ne i32 %t3, 0
  38   %t5 = select i1 %t4, i32 %a, i32 %b
  39   ret i32 %t5
  40 }
  41
  42 ;
  43 ; testz(X,~Y) -> testc(Y,X)
  44 ;
  45
  46 define i32 @testpsz_128_invert1(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
  47 ; CHECK-LABEL: testpsz_128_invert1:
  48 ; CHECK:       # %bb.0:
  49 ; CHECK-NEXT:    movl %edi, %eax
  50 ; CHECK-NEXT:    vtestps %xmm0, %xmm1
  51 ; CHECK-NEXT:    cmovael %esi, %eax
  52 ; CHECK-NEXT:    retq
  53   %t0 = bitcast <4 x float> %d to <2 x i64>
  54   %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
  55   %t2 = bitcast <2 x i64> %t1 to <4 x float>
  56   %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %c, <4 x float> %t2)
  57   %t4 = icmp ne i32 %t3, 0
  58   %t5 = select i1 %t4, i32 %a, i32 %b
  59   ret i32 %t5
  60 }
  61
  62 define i32 @testpsz_256_invert1(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
  63 ; CHECK-LABEL: testpsz_256_invert1:
  64 ; CHECK:       # %bb.0:
  65 ; CHECK-NEXT:    movl %edi, %eax
  66 ; CHECK-NEXT:    vtestps %ymm0, %ymm1
  67 ; CHECK-NEXT:    cmovael %esi, %eax
  68 ; CHECK-NEXT:    vzeroupper
  69 ; CHECK-NEXT:    retq
  70   %t0 = bitcast <8 x float> %d to <4 x i64>
  71   %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
  72   %t2 = bitcast <4 x i64> %t1 to <8 x float>
  73   %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %c, <8 x float> %t2)
  74   %t4 = icmp ne i32 %t3, 0
  75   %t5 = select i1 %t4, i32 %a, i32 %b
  76   ret i32 %t5
  77 }
  78
  79 ;
  80 ; testc(~X,Y) -> testz(X,Y)
  81 ;
  82
  83 define i32 @testpsc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
  84 ; CHECK-LABEL: testpsc_128_invert0:
  85 ; CHECK:       # %bb.0:
  86 ; CHECK-NEXT:    movl %edi, %eax
  87 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
  88 ; CHECK-NEXT:    cmovnel %esi, %eax
  89 ; CHECK-NEXT:    retq
  90   %t0 = bitcast <4 x float> %c to <2 x i64>
  91   %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
  92   %t2 = bitcast <2 x i64> %t1 to <4 x float>
  93   %t3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %t2, <4 x float> %d)
  94   %t4 = icmp ne i32 %t3, 0
  95   %t5 = select i1 %t4, i32 %a, i32 %b
  96   ret i32 %t5
  97 }
  98
  99 define i32 @testpsc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
 100 ; CHECK-LABEL: testpsc_256_invert0:
 101 ; CHECK:       # %bb.0:
 102 ; CHECK-NEXT:    movl %edi, %eax
 103 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
 104 ; CHECK-NEXT:    cmovnel %esi, %eax
 105 ; CHECK-NEXT:    vzeroupper
 106 ; CHECK-NEXT:    retq
 107   %t0 = bitcast <8 x float> %c to <4 x i64>
 108   %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
 109   %t2 = bitcast <4 x i64> %t1 to <8 x float>
 110   %t3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %t2, <8 x float> %d)
 111   %t4 = icmp ne i32 %t3, 0
 112   %t5 = select i1 %t4, i32 %a, i32 %b
 113   ret i32 %t5
 114 }
 115
 116 ;
 117 ; testnzc(~X,Y) -> testnzc(X,Y)
 118 ;
 119
 120 define i32 @testpsnzc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
 121 ; CHECK-LABEL: testpsnzc_128_invert0:
 122 ; CHECK:       # %bb.0:
 123 ; CHECK-NEXT:    movl %edi, %eax
 124 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
 125 ; CHECK-NEXT:    cmovbel %esi, %eax
 126 ; CHECK-NEXT:    retq
 127   %t0 = bitcast <4 x float> %c to <2 x i64>
 128   %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
 129   %t2 = bitcast <2 x i64> %t1 to <4 x float>
 130   %t3 = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %t2, <4 x float> %d)
 131   %t4 = icmp ne i32 %t3, 0
 132   %t5 = select i1 %t4, i32 %a, i32 %b
 133   ret i32 %t5
 134 }
 135
 136 define i32 @testpsnzc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
 137 ; CHECK-LABEL: testpsnzc_256_invert0:
 138 ; CHECK:       # %bb.0:
 139 ; CHECK-NEXT:    movl %edi, %eax
 140 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
 141 ; CHECK-NEXT:    cmovbel %esi, %eax
 142 ; CHECK-NEXT:    vzeroupper
 143 ; CHECK-NEXT:    retq
 144   %t0 = bitcast <8 x float> %c to <4 x i64>
 145   %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1>
 146   %t2 = bitcast <4 x i64> %t1 to <8 x float>
 147   %t3 = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %t2, <8 x float> %d)
 148   %t4 = icmp ne i32 %t3, 0
 149   %t5 = select i1 %t4, i32 %a, i32 %b
 150   ret i32 %t5
 151 }
 152
 153 ;
 154 ; SimplifyDemandedBits - only the sign bit is required
 155 ;
 156
 157 define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) {
 158 ; CHECK-LABEL: testpsz_128_signbit:
 159 ; CHECK:       # %bb.0:
 160 ; CHECK-NEXT:    movl %edi, %eax
 161 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
 162 ; CHECK-NEXT:    cmovnel %esi, %eax
 163 ; CHECK-NEXT:    retq
 164   %t0 = bitcast <4 x float> %c to <4 x i32>
 165   %t1 = ashr <4 x i32> %t0, <i32 31, i32 31, i32 31, i32 31>
 166   %t2 = bitcast <4 x i32> %t1 to <4 x float>
 167   %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %t2, <4 x float> %d)
 168   %t4 = icmp ne i32 %t3, 0
 169   %t5 = select i1 %t4, i32 %a, i32 %b
 170   ret i32 %t5
 171 }
 172
 173 define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
 174 ; CHECK-LABEL: testpsnzc_256_signbit:
 175 ; CHECK:       # %bb.0:
 176 ; CHECK-NEXT:    movl %edi, %eax
 177 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
 178 ; CHECK-NEXT:    cmovnel %esi, %eax
 179 ; CHECK-NEXT:    vzeroupper
 180 ; CHECK-NEXT:    retq
 181   %t0 = bitcast <8 x float> %c to <8 x i32>
 182   %t1 = icmp sgt <8 x i32> zeroinitializer, %t0
 183   %t2 = sext <8 x i1> %t1 to <8 x i32>
 184   %t3 = bitcast <8 x i32> %t2 to <8 x float>
 185   %t4 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t3, <8 x float> %d)
 186   %t5 = icmp ne i32 %t4, 0
 187   %t6 = select i1 %t5, i32 %a, i32 %b
 188   ret i32 %t6
 189 }
 190
 191 define i32 @testpsc_256_signbit_multiuse(<8 x float> %c, i32 %a, i32 %b) {
 192 ; CHECK-LABEL: testpsc_256_signbit_multiuse:
 193 ; CHECK:       # %bb.0:
 194 ; CHECK-NEXT:    movl %edi, %eax
 195 ; CHECK-NEXT:    vtestps %ymm0, %ymm0
 196 ; CHECK-NEXT:    cmovnel %esi, %eax
 197 ; CHECK-NEXT:    vzeroupper
 198 ; CHECK-NEXT:    retq
 199   %t0 = bitcast <8 x float> %c to <8 x i32>
 200   %t1 = ashr <8 x i32> %t0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
 201   %t2 = bitcast <8 x i32> %t1 to <8 x float>
 202   %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t2, <8 x float> %t2)
 203   %t4 = icmp ne i32 %t3, 0
 204   %t5 = select i1 %t4, i32 %a, i32 %b
 205   ret i32 %t5
 206 }
 207
 208 define i1 @PR62171(<8 x float> %a0, <8 x float> %a1) {
 209 ; CHECK-LABEL: PR62171:
 210 ; CHECK:       # %bb.0:
 211 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm0
 212 ; CHECK-NEXT:    vtestps %ymm0, %ymm0
 213 ; CHECK-NEXT:    sete %al
 214 ; CHECK-NEXT:    vzeroupper
 215 ; CHECK-NEXT:    retq
 216   %cmp = fcmp oeq <8 x float> %a0, %a1
 217   %sext = sext <8 x i1> %cmp to <8 x i32>
 218   %extract = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 219   %extract1 = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 220   %or = or <4 x i32> %extract, %extract1
 221   %or1 = bitcast <4 x i32> %or to <16 x i8>
 222   %msk = icmp slt <16 x i8> %or1, zeroinitializer
 223   %msk1 = bitcast <16 x i1> %msk to i16
 224   %not = icmp eq i16 %msk1, 0
 225   ret i1 %not
 226 }
 227
 228 define void @combine_testp_v8f32(<8 x i32> %x){
 229 ; AVX-LABEL: combine_testp_v8f32:
 230 ; AVX:       # %bb.0: # %entry
 231 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 232 ; AVX-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
 233 ; AVX-NEXT:    vtestps %ymm1, %ymm0
 234 ; AVX-NEXT:    vzeroupper
 235 ; AVX-NEXT:    retq
 236 ;
 237 ; AVX2-LABEL: combine_testp_v8f32:
 238 ; AVX2:       # %bb.0: # %entry
 239 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 240 ; AVX2-NEXT:    vtestps %ymm1, %ymm0
 241 ; AVX2-NEXT:    vzeroupper
 242 ; AVX2-NEXT:    retq
 243 entry:
 244   %xor.i.i.i.i.i.i.i.i.i = xor <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
 245   %.cast.i.i.i.i.i.i = bitcast <8 x i32> %xor.i.i.i.i.i.i.i.i.i to <8 x float>
 246   %0 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %.cast.i.i.i.i.i.i, <8 x float> %.cast.i.i.i.i.i.i)
 247   %cmp.i.not.i.i.i.i.i.i = icmp eq i32 %0, 0
 248   br i1 %cmp.i.not.i.i.i.i.i.i, label %if.end3.i.i.i.i.i.i, label %end
 249
 250 if.end3.i.i.i.i.i.i:                              ; preds = %entry
 251   ret void
 252
 253 end: ; preds = %entry
 254   ret void
 255 }
 256
 257 define i32 @PR88958_1(ptr %0, <4 x float> %1) {
 258 ; SSE-LABEL: PR88958_1:
 259 ; SSE:       # %bb.0:
 260 ; SSE-NEXT:    xorl %eax, %eax
 261 ; SSE-NEXT:    ptest (%rdi), %xmm0
 262 ; SSE-NEXT:    sete %al
 263 ; SSE-NEXT:    retq
 264 ;
 265 ; CHECK-LABEL: PR88958_1:
 266 ; CHECK:       # %bb.0:
 267 ; CHECK-NEXT:    xorl %eax, %eax
 268 ; CHECK-NEXT:    vtestps (%rdi), %xmm0
 269 ; CHECK-NEXT:    sete %al
 270 ; CHECK-NEXT:    retq
 271   %3 = load <4 x float>, ptr %0
 272   %4 = tail call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %3, <4 x float> %1)
 273   ret i32 %4
 274 }
 275
 276 define i32 @PR88958_2(ptr %0, <4 x float> %1) {
 277 ; SSE-LABEL: PR88958_2:
 278 ; SSE:       # %bb.0:
 279 ; SSE-NEXT:    movdqa (%rdi), %xmm1
 280 ; SSE-NEXT:    xorl %eax, %eax
 281 ; SSE-NEXT:    ptest %xmm0, %xmm1
 282 ; SSE-NEXT:    setb %al
 283 ; SSE-NEXT:    retq
 284 ;
 285 ; CHECK-LABEL: PR88958_2:
 286 ; CHECK:       # %bb.0:
 287 ; CHECK-NEXT:    vmovaps (%rdi), %xmm1
 288 ; CHECK-NEXT:    xorl %eax, %eax
 289 ; CHECK-NEXT:    vtestps %xmm0, %xmm1
 290 ; CHECK-NEXT:    setb %al
 291 ; CHECK-NEXT:    retq
 292   %3 = load <4 x float>, ptr %0
 293   %4 = tail call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %3, <4 x float> %1)
 294   ret i32 %4
 295 }
 296
 297 define i32 @PR88958_3(ptr %0, <8 x float> %1) {
 298 ; SSE-LABEL: PR88958_1:
 299 ; SSE:       # %bb.0:
 300 ; SSE-NEXT:    xorl %eax, %eax
 301 ; SSE-NEXT:    ptest (%rdi), %xmm0
 302 ; SSE-NEXT:    sete %al
 303 ; SSE-NEXT:    retq
 304 ;
 305 ; CHECK-LABEL: PR88958_3:
 306 ; CHECK:       # %bb.0:
 307 ; CHECK-NEXT:    xorl %eax, %eax
 308 ; CHECK-NEXT:    vtestps (%rdi), %ymm0
 309 ; CHECK-NEXT:    sete %al
 310 ; CHECK-NEXT:    vzeroupper
 311 ; CHECK-NEXT:    retq
 312   %3 = load <8 x float>, ptr %0
 313   %4 = tail call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %3, <8 x float> %1)
 314   ret i32 %4
 315 }
 316
 317 define i32 @PR88958_4(ptr %0, <8 x float> %1) {
 318 ; SSE-LABEL: PR88958_2:
 319 ; SSE:       # %bb.0:
 320 ; SSE-NEXT:    movdqa (%rdi), %xmm1
 321 ; SSE-NEXT:    xorl %eax, %eax
 322 ; SSE-NEXT:    ptest %xmm0, %xmm1
 323 ; SSE-NEXT:    setb %al
 324 ; SSE-NEXT:    retq
 325 ;
 326 ; CHECK-LABEL: PR88958_4:
 327 ; CHECK:       # %bb.0:
 328 ; CHECK-NEXT:    vmovaps (%rdi), %ymm1
 329 ; CHECK-NEXT:    xorl %eax, %eax
 330 ; CHECK-NEXT:    vtestps %ymm0, %ymm1
 331 ; CHECK-NEXT:    setb %al
 332 ; CHECK-NEXT:    vzeroupper
 333 ; CHECK-NEXT:    retq
 334   %3 = load <8 x float>, ptr %0
 335   %4 = tail call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %3, <8 x float> %1)
 336   ret i32 %4
 337 }
 338
 339 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
 340 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
 341 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
 342
 343 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
 344 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
 345 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone