test/Transforms/InstCombine/X86/x86-insertps.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3
   4 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
   5
   6 ; If all zero mask bits are set, return a zero regardless of the other control bits.
   7
   8 define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
   9 ; CHECK-LABEL: @insertps_0x0f(
  10 ; CHECK-NEXT:    ret <4 x float> zeroinitializer
  11 ;
  12   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
  13   ret <4 x float> %res
  14 }
  15
  16 define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
  17 ; CHECK-LABEL: @insertps_0xff(
  18 ; CHECK-NEXT:    ret <4 x float> zeroinitializer
  19 ;
  20   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
  21   ret <4 x float> %res
  22 }
  23
  24 ; If some zero mask bits are set that do not override the insertion, we do not change anything.
  25
  26 define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
  27 ; CHECK-LABEL: @insertps_0x0c(
  28 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], i8 12)
  29 ; CHECK-NEXT:    ret <4 x float> [[RES]]
  30 ;
  31   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
  32   ret <4 x float> %res
  33 }
  34
  35 ; ...unless both input vectors are the same operand.
  36
  37 define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
  38 ; CHECK-LABEL: @insertps_0x15_single_input(
  39 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
  40 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  41 ;
  42   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
  43   ret <4 x float> %res
  44 }
  45
  46 ; The zero mask overrides the insertion lane.
  47
  48 define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
  49 ; CHECK-LABEL: @insertps_0x1a_single_input(
  50 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  51 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  52 ;
  53   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
  54   ret <4 x float> %res
  55 }
  56
  57 ; The zero mask overrides the insertion lane, so the second input vector is not used.
  58
  59 define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
  60 ; CHECK-LABEL: @insertps_0xc1(
  61 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[V1:%.*]], float 0.000000e+00, i32 0
  62 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  63 ;
  64   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
  65   ret <4 x float> %res
  66 }
  67
  68 ; If no zero mask bits are set, convert to a shuffle.
  69
  70 define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
  71 ; CHECK-LABEL: @insertps_0x00(
  72 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V2:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
  73 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  74 ;
  75   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
  76   ret <4 x float> %res
  77 }
  78
  79 define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
  80 ; CHECK-LABEL: @insertps_0x10(
  81 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
  82 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  83 ;
  84   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
  85   ret <4 x float> %res
  86 }
  87
  88 define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
  89 ; CHECK-LABEL: @insertps_0x20(
  90 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  91 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  92 ;
  93   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
  94   ret <4 x float> %res
  95 }
  96
  97 define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
  98 ; CHECK-LABEL: @insertps_0x30(
  99 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
 100 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 101 ;
 102   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
 103   ret <4 x float> %res
 104 }
 105
 106 define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
 107 ; CHECK-LABEL: @insertps_0xc0(
 108 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 7, i32 1, i32 2, i32 3>
 109 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 110 ;
 111   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
 112   ret <4 x float> %res
 113 }
 114
 115 define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
 116 ; CHECK-LABEL: @insertps_0xd0(
 117 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 7, i32 2, i32 3>
 118 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 119 ;
 120   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
 121   ret <4 x float> %res
 122 }
 123
 124 define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
 125 ; CHECK-LABEL: @insertps_0xe0(
 126 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 7, i32 3>
 127 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 128 ;
 129   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
 130   ret <4 x float> %res
 131 }
 132
 133 define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
 134 ; CHECK-LABEL: @insertps_0xf0(
 135 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 136 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 137 ;
 138   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
 139   ret <4 x float> %res
 140 }