test/CodeGen/X86/vec_setcc.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
   4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
   5
   6 define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
   7 ; SSE-LABEL: v16i8_icmp_uge:
   8 ; SSE:       # %bb.0:
   9 ; SSE-NEXT:    pmaxub %xmm0, %xmm1
  10 ; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
  11 ; SSE-NEXT:    retq
  12 ;
  13 ; AVX-LABEL: v16i8_icmp_uge:
  14 ; AVX:       # %bb.0:
  15 ; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
  16 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
  17 ; AVX-NEXT:    retq
  18   %1 = icmp uge <16 x i8> %a, %b
  19   %2 = sext <16 x i1> %1 to <16 x i8>
  20   ret <16 x i8> %2
  21 }
  22
  23 define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
  24 ; SSE-LABEL: v16i8_icmp_ule:
  25 ; SSE:       # %bb.0:
  26 ; SSE-NEXT:    pminub %xmm0, %xmm1
  27 ; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
  28 ; SSE-NEXT:    retq
  29 ;
  30 ; AVX-LABEL: v16i8_icmp_ule:
  31 ; AVX:       # %bb.0:
  32 ; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm1
  33 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
  34 ; AVX-NEXT:    retq
  35   %1 = icmp ule <16 x i8> %a, %b
  36   %2 = sext <16 x i1> %1 to <16 x i8>
  37   ret <16 x i8> %2
  38 }
  39
  40 define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
  41 ; SSE2-LABEL: v8i16_icmp_uge:
  42 ; SSE2:       # %bb.0:
  43 ; SSE2-NEXT:    psubusw %xmm0, %xmm1
  44 ; SSE2-NEXT:    pxor %xmm0, %xmm0
  45 ; SSE2-NEXT:    pcmpeqw %xmm1, %xmm0
  46 ; SSE2-NEXT:    retq
  47 ;
  48 ; SSE41-LABEL: v8i16_icmp_uge:
  49 ; SSE41:       # %bb.0:
  50 ; SSE41-NEXT:    pmaxuw %xmm0, %xmm1
  51 ; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
  52 ; SSE41-NEXT:    retq
  53 ;
  54 ; AVX-LABEL: v8i16_icmp_uge:
  55 ; AVX:       # %bb.0:
  56 ; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
  57 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
  58 ; AVX-NEXT:    retq
  59   %1 = icmp uge <8 x i16> %a, %b
  60   %2 = sext <8 x i1> %1 to <8 x i16>
  61   ret <8 x i16> %2
  62 }
  63
  64 define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
  65 ; SSE2-LABEL: v8i16_icmp_ule:
  66 ; SSE2:       # %bb.0:
  67 ; SSE2-NEXT:    psubusw %xmm1, %xmm0
  68 ; SSE2-NEXT:    pxor %xmm1, %xmm1
  69 ; SSE2-NEXT:    pcmpeqw %xmm1, %xmm0
  70 ; SSE2-NEXT:    retq
  71 ;
  72 ; SSE41-LABEL: v8i16_icmp_ule:
  73 ; SSE41:       # %bb.0:
  74 ; SSE41-NEXT:    pminuw %xmm0, %xmm1
  75 ; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
  76 ; SSE41-NEXT:    retq
  77 ;
  78 ; AVX-LABEL: v8i16_icmp_ule:
  79 ; AVX:       # %bb.0:
  80 ; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
  81 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
  82 ; AVX-NEXT:    retq
  83   %1 = icmp ule <8 x i16> %a, %b
  84   %2 = sext <8 x i1> %1 to <8 x i16>
  85   ret <8 x i16> %2
  86 }
  87
  88 define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
  89 ; SSE2-LABEL: v4i32_icmp_uge:
  90 ; SSE2:       # %bb.0:
  91 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
  92 ; SSE2-NEXT:    pxor %xmm2, %xmm0
  93 ; SSE2-NEXT:    pxor %xmm1, %xmm2
  94 ; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
  95 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
  96 ; SSE2-NEXT:    pxor %xmm2, %xmm0
  97 ; SSE2-NEXT:    retq
  98 ;
  99 ; SSE41-LABEL: v4i32_icmp_uge:
 100 ; SSE41:       # %bb.0:
 101 ; SSE41-NEXT:    pmaxud %xmm0, %xmm1
 102 ; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
 103 ; SSE41-NEXT:    retq
 104 ;
 105 ; AVX-LABEL: v4i32_icmp_uge:
 106 ; AVX:       # %bb.0:
 107 ; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm1
 108 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 109 ; AVX-NEXT:    retq
 110   %1 = icmp uge <4 x i32> %a, %b
 111   %2 = sext <4 x i1> %1 to <4 x i32>
 112   ret <4 x i32> %2
 113 }
 114
 115 define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
 116 ; SSE2-LABEL: v4i32_icmp_ule:
 117 ; SSE2:       # %bb.0:
 118 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
 119 ; SSE2-NEXT:    pxor %xmm2, %xmm1
 120 ; SSE2-NEXT:    pxor %xmm2, %xmm0
 121 ; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
 122 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 123 ; SSE2-NEXT:    pxor %xmm1, %xmm0
 124 ; SSE2-NEXT:    retq
 125 ;
 126 ; SSE41-LABEL: v4i32_icmp_ule:
 127 ; SSE41:       # %bb.0:
 128 ; SSE41-NEXT:    pminud %xmm0, %xmm1
 129 ; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
 130 ; SSE41-NEXT:    retq
 131 ;
 132 ; AVX-LABEL: v4i32_icmp_ule:
 133 ; AVX:       # %bb.0:
 134 ; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm1
 135 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 136 ; AVX-NEXT:    retq
 137   %1 = icmp ule <4 x i32> %a, %b
 138   %2 = sext <4 x i1> %1 to <4 x i32>
 139   ret <4 x i32> %2
 140 }
 141
 142 ; At one point we were incorrectly constant-folding a setcc to 0x1 instead of
 143 ; 0xff, leading to a constpool load. The instruction doesn't matter here, but it
 144 ; should set all bits to 1.
 145 define <16 x i8> @test_setcc_constfold_vi8(<16 x i8> %l, <16 x i8> %r) {
 146 ; SSE-LABEL: test_setcc_constfold_vi8:
 147 ; SSE:       # %bb.0:
 148 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 149 ; SSE-NEXT:    retq
 150 ;
 151 ; AVX-LABEL: test_setcc_constfold_vi8:
 152 ; AVX:       # %bb.0:
 153 ; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 154 ; AVX-NEXT:    retq
 155   %test1 = icmp eq <16 x i8> %l, %r
 156   %mask1 = sext <16 x i1> %test1 to <16 x i8>
 157   %test2 = icmp ne <16 x i8> %l, %r
 158   %mask2 = sext <16 x i1> %test2 to <16 x i8>
 159   %res = or <16 x i8> %mask1, %mask2
 160   ret <16 x i8> %res
 161 }
 162
 163 ; Make sure sensible results come from doing extension afterwards
 164 define <16 x i8> @test_setcc_constfold_vi1(<16 x i8> %l, <16 x i8> %r) {
 165 ; SSE-LABEL: test_setcc_constfold_vi1:
 166 ; SSE:       # %bb.0:
 167 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 168 ; SSE-NEXT:    retq
 169 ;
 170 ; AVX-LABEL: test_setcc_constfold_vi1:
 171 ; AVX:       # %bb.0:
 172 ; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 173 ; AVX-NEXT:    retq
 174   %test1 = icmp eq <16 x i8> %l, %r
 175   %test2 = icmp ne <16 x i8> %l, %r
 176   %res = or <16 x i1> %test1, %test2
 177   %mask = sext <16 x i1> %res to <16 x i8>
 178   ret <16 x i8> %mask
 179 }
 180
 181 ; 64-bit case is also particularly important, as the constant "-1" is probably
 182 ; just 32-bits wide.
 183 define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) {
 184 ; SSE-LABEL: test_setcc_constfold_vi64:
 185 ; SSE:       # %bb.0:
 186 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 187 ; SSE-NEXT:    retq
 188 ;
 189 ; AVX-LABEL: test_setcc_constfold_vi64:
 190 ; AVX:       # %bb.0:
 191 ; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 192 ; AVX-NEXT:    retq
 193   %test1 = icmp eq <2 x i64> %l, %r
 194   %mask1 = sext <2 x i1> %test1 to <2 x i64>
 195   %test2 = icmp ne <2 x i64> %l, %r
 196   %mask2 = sext <2 x i1> %test2 to <2 x i64>
 197   %res = or <2 x i64> %mask1, %mask2
 198   ret <2 x i64> %res
 199 }