test/Transforms/InstCombine/X86/blend_x86.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s
   3
   4 define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
   5 ; CHECK-LABEL: @constant_blendvpd(
   6 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[AB:%.*]], <2 x double> [[XY:%.*]], <2 x i32> <i32 0, i32 3>
   7 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
   8 ;
   9   %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
  10   ret <2 x double> %1
  11 }
  12
  13 define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
  14 ; CHECK-LABEL: @constant_blendvpd_zero(
  15 ; CHECK-NEXT:    ret <2 x double> [[XY:%.*]]
  16 ;
  17   %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
  18   ret <2 x double> %1
  19 }
  20
  21 define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
  22 ; CHECK-LABEL: @constant_blendvpd_dup(
  23 ; CHECK-NEXT:    ret <2 x double> [[XY:%.*]]
  24 ;
  25   %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
  26   ret <2 x double> %1
  27 }
  28
  29 define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
  30 ; CHECK-LABEL: @constant_blendvps(
  31 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[XYZW:%.*]], <4 x float> [[ABCD:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  32 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  33 ;
  34   %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
  35   ret <4 x float> %1
  36 }
  37
  38 define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
  39 ; CHECK-LABEL: @constant_blendvps_zero(
  40 ; CHECK-NEXT:    ret <4 x float> [[XYZW:%.*]]
  41 ;
  42   %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
  43   ret <4 x float> %1
  44 }
  45
  46 define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
  47 ; CHECK-LABEL: @constant_blendvps_dup(
  48 ; CHECK-NEXT:    ret <4 x float> [[XYZW:%.*]]
  49 ;
  50   %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
  51   ret <4 x float> %1
  52 }
  53
  54 define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
  55 ; CHECK-LABEL: @constant_pblendvb(
  56 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[XYZW:%.*]], <16 x i8> [[ABCD:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 21, i32 22, i32 7, i32 8, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 15>
  57 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
  58 ;
  59   %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
  60   ret <16 x i8> %1
  61 }
  62
  63 define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
  64 ; CHECK-LABEL: @constant_pblendvb_zero(
  65 ; CHECK-NEXT:    ret <16 x i8> [[XYZW:%.*]]
  66 ;
  67   %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
  68   ret <16 x i8> %1
  69 }
  70
  71 define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
  72 ; CHECK-LABEL: @constant_pblendvb_dup(
  73 ; CHECK-NEXT:    ret <16 x i8> [[XYZW:%.*]]
  74 ;
  75   %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
  76   ret <16 x i8> %1
  77 }
  78
  79 define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
  80 ; CHECK-LABEL: @constant_blendvpd_avx(
  81 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[AB:%.*]], <4 x double> [[XY:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  82 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
  83 ;
  84   %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
  85   ret <4 x double> %1
  86 }
  87
  88 define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
  89 ; CHECK-LABEL: @constant_blendvpd_avx_zero(
  90 ; CHECK-NEXT:    ret <4 x double> [[XY:%.*]]
  91 ;
  92   %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
  93   ret <4 x double> %1
  94 }
  95
  96 define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
  97 ; CHECK-LABEL: @constant_blendvpd_avx_dup(
  98 ; CHECK-NEXT:    ret <4 x double> [[XY:%.*]]
  99 ;
 100   %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
 101   ret <4 x double> %1
 102 }
 103
 104 define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
 105 ; CHECK-LABEL: @constant_blendvps_avx(
 106 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[XYZW:%.*]], <8 x float> [[ABCD:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 4, i32 5, i32 6, i32 15>
 107 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 108 ;
 109   %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
 110   ret <8 x float> %1
 111 }
 112
 113 define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
 114 ; CHECK-LABEL: @constant_blendvps_avx_zero(
 115 ; CHECK-NEXT:    ret <8 x float> [[XYZW:%.*]]
 116 ;
 117   %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
 118   ret <8 x float> %1
 119 }
 120
 121 define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
 122 ; CHECK-LABEL: @constant_blendvps_avx_dup(
 123 ; CHECK-NEXT:    ret <8 x float> [[XYZW:%.*]]
 124 ;
 125   %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
 126   ret <8 x float> %1
 127 }
 128
 129 define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
 130 ; CHECK-LABEL: @constant_pblendvb_avx2(
 131 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[XYZW:%.*]], <32 x i8> [[ABCD:%.*]], <32 x i32> <i32 0, i32 1, i32 34, i32 3, i32 36, i32 37, i32 38, i32 7, i32 8, i32 9, i32 42, i32 11, i32 44, i32 45, i32 46, i32 15, i32 16, i32 17, i32 50, i32 19, i32 52, i32 53, i32 54, i32 23, i32 24, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 31>
 132 ; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
 133 ;
 134   %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
 135   <32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
 136   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
 137   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
 138   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
 139   ret <32 x i8> %1
 140 }
 141
 142 define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
 143 ; CHECK-LABEL: @constant_pblendvb_avx2_zero(
 144 ; CHECK-NEXT:    ret <32 x i8> [[XYZW:%.*]]
 145 ;
 146   %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
 147   ret <32 x i8> %1
 148 }
 149
 150 define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
 151 ; CHECK-LABEL: @constant_pblendvb_avx2_dup(
 152 ; CHECK-NEXT:    ret <32 x i8> [[XYZW:%.*]]
 153 ;
 154   %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
 155   ret <32 x i8> %1
 156 }
 157
 158 define <4 x float> @sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i1> %cond) {
 159 ; CHECK-LABEL: @sel_v4f32(
 160 ; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[COND:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]]
 161 ; CHECK-NEXT:    ret <4 x float> [[R]]
 162 ;
 163   %s = sext <4 x i1> %cond to <4 x i32>
 164   %b = bitcast <4 x i32> %s to <4 x float>
 165   %r = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %x, <4 x float> %y, <4 x float> %b)
 166   ret <4 x float> %r
 167 }
 168
 169 define <2 x double> @sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i1> %cond) {
 170 ; CHECK-LABEL: @sel_v2f64(
 171 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[X:%.*]]
 172 ; CHECK-NEXT:    ret <2 x double> [[R]]
 173 ;
 174   %s = sext <2 x i1> %cond to <2 x i64>
 175   %b = bitcast <2 x i64> %s to <2 x double>
 176   %r = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %x, <2 x double> %y, <2 x double> %b)
 177   ret <2 x double> %r
 178 }
 179
 180 ; Bitcast X, Y, and the select and remove the intrinsic.
 181
 182 define <16 x i8> @sel_v4i32(<16 x i8> %x, <16 x i8> %y, <4 x i1> %cond) {
 183 ; CHECK-LABEL: @sel_v4i32(
 184 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32>
 185 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[Y:%.*]] to <4 x i32>
 186 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP1]]
 187 ; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
 188 ; CHECK-NEXT:    ret <16 x i8> [[R]]
 189 ;
 190   %s = sext <4 x i1> %cond to <4 x i32>
 191   %b = bitcast <4 x i32> %s to <16 x i8>
 192   %r = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b)
 193   ret <16 x i8> %r
 194 }
 195
 196 define <16 x i8> @sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i1> %cond) {
 197 ; CHECK-LABEL: @sel_v16i8(
 198 ; CHECK-NEXT:    [[R:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i8> [[Y:%.*]], <16 x i8> [[X:%.*]]
 199 ; CHECK-NEXT:    ret <16 x i8> [[R]]
 200 ;
 201   %s = sext <16 x i1> %cond to <16 x i8>
 202   %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %x, <16 x i8> %y, <16 x i8> %s)
 203   ret <16 x i8> %r
 204 }
 205
 206 ; PR38814: https://bugs.llvm.org/show_bug.cgi?id=38814
 207 ; Repeat the tests above using the minimal form that we expect when using C intrinsics in code.
 208 ; This verifies that nothing is interfering with the blend transform. This also tests the
 209 ; expected IR when 1 of the blend operands is a constant 0 vector. Potentially, this could
 210 ; be transformed to bitwise logic in IR, but currently that transform is left to the backend.
 211
 212 define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x float> %z) {
 213 ; CHECK-LABEL: @sel_v4f32_sse_reality(
 214 ; CHECK-NEXT:    [[LD:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
 215 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]]
 216 ; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[CMP]], <4 x float> zeroinitializer, <4 x float> [[LD]]
 217 ; CHECK-NEXT:    ret <4 x float> [[R]]
 218 ;
 219   %ld = load <4 x float>, <4 x float>* %x, align 16
 220   %cmp = fcmp olt <4 x float> %z, %y
 221   %sext = sext <4 x i1> %cmp to <4 x i32>
 222   %cond = bitcast <4 x i32> %sext to <4 x float>
 223   %r = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %ld, <4 x float> zeroinitializer, <4 x float> %cond)
 224   ret <4 x float> %r
 225 }
 226
 227 define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
 228 ; CHECK-LABEL: @sel_v2f64_sse_reality(
 229 ; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* [[X:%.*]], align 16
 230 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]]
 231 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x double> zeroinitializer, <2 x double> [[LD]]
 232 ; CHECK-NEXT:    ret <2 x double> [[R]]
 233 ;
 234   %ld = load <2 x double>, <2 x double>* %x, align 16
 235   %cmp = fcmp olt <2 x double> %z, %y
 236   %sext = sext <2 x i1> %cmp to <2 x i64>
 237   %cond = bitcast <2 x i64> %sext to <2 x double>
 238   %r = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %ld, <2 x double> zeroinitializer, <2 x double> %cond)
 239   ret <2 x double> %r
 240 }
 241
 242 ; Bitcast the inputs and the result and remove the intrinsic.
 243
 244 define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
 245 ; CHECK-LABEL: @sel_v4i32_sse_reality(
 246 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <4 x i32>*
 247 ; CHECK-NEXT:    [[LD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
 248 ; CHECK-NEXT:    [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
 249 ; CHECK-NEXT:    [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32>
 250 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]]
 251 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> [[LD1]]
 252 ; CHECK-NEXT:    [[RCAST:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
 253 ; CHECK-NEXT:    ret <2 x i64> [[RCAST]]
 254 ;
 255   %xcast = bitcast <2 x i64>* %x to <16 x i8>*
 256   %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
 257   %ycast = bitcast <2 x i64> %y to <4 x i32>
 258   %zcast = bitcast <2 x i64> %z to <4 x i32>
 259   %cmp = icmp sgt <4 x i32> %ycast, %zcast
 260   %sext = sext <4 x i1> %cmp to <4 x i32>
 261   %cond = bitcast <4 x i32> %sext to <16 x i8>
 262   %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %cond)
 263   %rcast = bitcast <16 x i8> %r to <2 x i64>
 264   ret <2 x i64> %rcast
 265 }
 266
 267 define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
 268 ; CHECK-LABEL: @sel_v16i8_sse_reality(
 269 ; CHECK-NEXT:    [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
 270 ; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16
 271 ; CHECK-NEXT:    [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8>
 272 ; CHECK-NEXT:    [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8>
 273 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]]
 274 ; CHECK-NEXT:    [[R:%.*]] = select <16 x i1> [[CMP]], <16 x i8> zeroinitializer, <16 x i8> [[LD]]
 275 ; CHECK-NEXT:    [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
 276 ; CHECK-NEXT:    ret <2 x i64> [[RCAST]]
 277 ;
 278   %xcast = bitcast <2 x i64>* %x to <16 x i8>*
 279   %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
 280   %ycast = bitcast <2 x i64> %y to <16 x i8>
 281   %zcast = bitcast <2 x i64> %z to <16 x i8>
 282   %cmp = icmp sgt <16 x i8> %ycast, %zcast
 283   %sext = sext <16 x i1> %cmp to <16 x i8>
 284   %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %sext)
 285   %rcast = bitcast <16 x i8> %r to <2 x i64>
 286   ret <2 x i64> %rcast
 287 }
 288
 289 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
 290 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
 291 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
 292
 293 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
 294 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
 295 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
 296