llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
   3 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
   4
   5 declare void @use(<4 x i32>)
   6 declare void @usef(<4 x float>)
   7
   8 ; Eliminating an insert is profitable.
   9
  10 define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
  11 ; CHECK-LABEL: @ins0_ins0_add(
  12 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
  13 ; CHECK-NEXT:    [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
  14 ; CHECK-NEXT:    ret <16 x i8> [[R]]
  15 ;
  16   %i0 = insertelement <16 x i8> poison, i8 %x, i32 0
  17   %i1 = insertelement <16 x i8> poison, i8 %y, i32 0
  18   %r = add <16 x i8> %i0, %i1
  19   ret <16 x i8> %r
  20 }
  21
  22 ; Eliminating an insert is still profitable. Flags propagate. Mismatch types on index is ok.
  23
  24 define <8 x i16> @ins0_ins0_sub_flags(i16 %x, i16 %y) {
  25 ; CHECK-LABEL: @ins0_ins0_sub_flags(
  26 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = sub nuw nsw i16 [[X:%.*]], [[Y:%.*]]
  27 ; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i16> poison, i16 [[R_SCALAR]], i64 5
  28 ; CHECK-NEXT:    ret <8 x i16> [[R]]
  29 ;
  30   %i0 = insertelement <8 x i16> poison, i16 %x, i8 5
  31   %i1 = insertelement <8 x i16> poison, i16 %y, i32 5
  32   %r = sub nsw nuw <8 x i16> %i0, %i1
  33   ret <8 x i16> %r
  34 }
  35
  36 ; The new vector constant is calculated by constant folding.
  37 ; This is conservatively created as zero rather than undef for 'undef ^ undef'.
  38
  39 define <2 x i64> @ins1_ins1_xor(i64 %x, i64 %y) {
  40 ; CHECK-LABEL: @ins1_ins1_xor(
  41 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = xor i64 [[X:%.*]], [[Y:%.*]]
  42 ; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> poison, i64 [[R_SCALAR]], i64 1
  43 ; CHECK-NEXT:    ret <2 x i64> [[R]]
  44 ;
  45   %i0 = insertelement <2 x i64> poison, i64 %x, i64 1
  46   %i1 = insertelement <2 x i64> poison, i64 %y, i32 1
  47   %r = xor <2 x i64> %i0, %i1
  48   ret <2 x i64> %r
  49 }
  50
  51 define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) {
  52 ; CHECK-LABEL: @ins1_ins1_iterate(
  53 ; CHECK-NEXT:    [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]]
  54 ; CHECK-NEXT:    [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]]
  55 ; CHECK-NEXT:    [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]]
  56 ; CHECK-NEXT:    [[S2:%.*]] = insertelement <2 x i64> poison, i64 [[S2_SCALAR]], i64 1
  57 ; CHECK-NEXT:    ret <2 x i64> [[S2]]
  58 ;
  59   %i0 = insertelement <2 x i64> poison, i64 %w, i64 1
  60   %i1 = insertelement <2 x i64> poison, i64 %x, i32 1
  61   %s0 = sub <2 x i64> %i0, %i1
  62   %i2 = insertelement <2 x i64> poison, i64 %y, i32 1
  63   %s1 = or <2 x i64> %s0, %i2
  64   %i3 = insertelement <2 x i64> poison, i64 %z, i32 1
  65   %s2 = shl <2 x i64> %i3, %s1
  66   ret <2 x i64> %s2
  67 }
  68
  69 ; The inserts are free, but it's still better to scalarize.
  70
  71 define <2 x double> @ins0_ins0_fadd(double %x, double %y) {
  72 ; CHECK-LABEL: @ins0_ins0_fadd(
  73 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = fadd reassoc nsz double [[X:%.*]], [[Y:%.*]]
  74 ; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x double> poison, double [[R_SCALAR]], i64 0
  75 ; CHECK-NEXT:    ret <2 x double> [[R]]
  76 ;
  77   %i0 = insertelement <2 x double> poison, double %x, i32 0
  78   %i1 = insertelement <2 x double> poison, double %y, i32 0
  79   %r = fadd reassoc nsz <2 x double> %i0, %i1
  80   ret <2 x double> %r
  81 }
  82
  83 ; Negative test - mismatched indexes (but could fold this).
  84
  85 define <16 x i8> @ins1_ins0_add(i8 %x, i8 %y) {
  86 ; CHECK-LABEL: @ins1_ins0_add(
  87 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <16 x i8> poison, i8 [[X:%.*]], i32 1
  88 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <16 x i8> poison, i8 [[Y:%.*]], i32 0
  89 ; CHECK-NEXT:    [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
  90 ; CHECK-NEXT:    ret <16 x i8> [[R]]
  91 ;
  92   %i0 = insertelement <16 x i8> poison, i8 %x, i32 1
  93   %i1 = insertelement <16 x i8> poison, i8 %y, i32 0
  94   %r = add <16 x i8> %i0, %i1
  95   ret <16 x i8> %r
  96 }
  97
  98 ; Base vector does not have to be undef.
  99
 100 define <4 x i32> @ins0_ins0_mul(i32 %x, i32 %y) {
 101 ; CHECK-LABEL: @ins0_ins0_mul(
 102 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]]
 103 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
 104 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 105 ;
 106   %i0 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
 107   %i1 = insertelement <4 x i32> poison, i32 %y, i32 0
 108   %r = mul <4 x i32> %i0, %i1
 109   ret <4 x i32> %r
 110 }
 111
 112 ; It is safe to scalarize any binop (no extra UB/poison danger).
 113
 114 define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) {
 115 ; CHECK-LABEL: @ins1_ins1_sdiv(
 116 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = sdiv i64 [[X:%.*]], [[Y:%.*]]
 117 ; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 -6, i64 0>, i64 [[R_SCALAR]], i64 1
 118 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 119 ;
 120   %i0 = insertelement <2 x i64> <i64 42, i64 -42>, i64 %x, i64 1
 121   %i1 = insertelement <2 x i64> <i64 -7, i64 128>, i64 %y, i32 1
 122   %r = sdiv <2 x i64> %i0, %i1
 123   ret <2 x i64> %r
 124 }
 125
 126 ; Constant folding deals with undef per element - the entire value does not become undef.
 127
 128 define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) {
 129 ; CHECK-LABEL: @ins1_ins1_udiv(
 130 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]]
 131 ; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 6, i64 poison>, i64 [[R_SCALAR]], i64 1
 132 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 133 ;
 134   %i0 = insertelement <2 x i64> <i64 42, i64 undef>, i64 %x, i32 1
 135   %i1 = insertelement <2 x i64> <i64 7, i64 undef>, i64 %y, i32 1
 136   %r = udiv <2 x i64> %i0, %i1
 137   ret <2 x i64> %r
 138 }
 139
 140 ; This could be simplified -- creates immediate UB without the transform because
 141 ; divisor has an undef element -- but that is hidden after the transform.
 142
 143 define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
 144 ; CHECK-LABEL: @ins1_ins1_urem(
 145 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]]
 146 ; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[R_SCALAR]], i64 1
 147 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 148 ;
 149   %i0 = insertelement <2 x i64> <i64 42, i64 undef>, i64 %x, i64 1
 150   %i1 = insertelement <2 x i64> <i64 undef, i64 128>, i64 %y, i32 1
 151   %r = urem <2 x i64> %i0, %i1
 152   ret <2 x i64> %r
 153 }
 154
 155 ; Extra use is accounted for in cost calculation.
 156
 157 define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
 158 ; CHECK-LABEL: @ins0_ins0_xor(
 159 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
 160 ; CHECK-NEXT:    call void @use(<4 x i32> [[I0]])
 161 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
 162 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
 163 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 164 ;
 165   %i0 = insertelement <4 x i32> poison, i32 %x, i32 0
 166   call void @use(<4 x i32> %i0)
 167   %i1 = insertelement <4 x i32> poison, i32 %y, i32 0
 168   %r = xor <4 x i32> %i0, %i1
 169   ret <4 x i32> %r
 170 }
 171
 172 ; Extra use is accounted for in cost calculation.
 173
 174 define <4 x float> @ins1_ins1_fmul(float %x, float %y) {
 175 ; CHECK-LABEL: @ins1_ins1_fmul(
 176 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 1
 177 ; CHECK-NEXT:    call void @usef(<4 x float> [[I1]])
 178 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = fmul float [[X:%.*]], [[Y]]
 179 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> poison, float [[R_SCALAR]], i64 1
 180 ; CHECK-NEXT:    ret <4 x float> [[R]]
 181 ;
 182   %i0 = insertelement <4 x float> poison, float %x, i32 1
 183   %i1 = insertelement <4 x float> poison, float %y, i32 1
 184   call void @usef(<4 x float> %i1)
 185   %r = fmul <4 x float> %i0, %i1
 186   ret <4 x float> %r
 187 }
 188
 189 ; If the scalar binop is not cheaper than the vector binop, extra uses can prevent the transform.
 190
 191 define <4 x float> @ins2_ins2_fsub(float %x, float %y) {
 192 ; CHECK-LABEL: @ins2_ins2_fsub(
 193 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 2
 194 ; CHECK-NEXT:    call void @usef(<4 x float> [[I0]])
 195 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 2
 196 ; CHECK-NEXT:    call void @usef(<4 x float> [[I1]])
 197 ; CHECK-NEXT:    [[R:%.*]] = fsub <4 x float> [[I0]], [[I1]]
 198 ; CHECK-NEXT:    ret <4 x float> [[R]]
 199 ;
 200   %i0 = insertelement <4 x float> poison, float %x, i32 2
 201   call void @usef(<4 x float> %i0)
 202   %i1 = insertelement <4 x float> poison, float %y, i32 2
 203   call void @usef(<4 x float> %i1)
 204   %r = fsub <4 x float> %i0, %i1
 205   ret <4 x float> %r
 206 }
 207
 208 ; It may be worth scalarizing an expensive binop even if both inserts have extra uses.
 209
 210 define <4 x float> @ins3_ins3_fdiv(float %x, float %y) {
 211 ; SSE-LABEL: @ins3_ins3_fdiv(
 212 ; SSE-NEXT:    [[I0:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 3
 213 ; SSE-NEXT:    call void @usef(<4 x float> [[I0]])
 214 ; SSE-NEXT:    [[I1:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 3
 215 ; SSE-NEXT:    call void @usef(<4 x float> [[I1]])
 216 ; SSE-NEXT:    [[R_SCALAR:%.*]] = fdiv float [[X]], [[Y]]
 217 ; SSE-NEXT:    [[R:%.*]] = insertelement <4 x float> poison, float [[R_SCALAR]], i64 3
 218 ; SSE-NEXT:    ret <4 x float> [[R]]
 219 ;
 220 ; AVX-LABEL: @ins3_ins3_fdiv(
 221 ; AVX-NEXT:    [[I0:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 3
 222 ; AVX-NEXT:    call void @usef(<4 x float> [[I0]])
 223 ; AVX-NEXT:    [[I1:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i32 3
 224 ; AVX-NEXT:    call void @usef(<4 x float> [[I1]])
 225 ; AVX-NEXT:    [[R:%.*]] = fdiv <4 x float> [[I0]], [[I1]]
 226 ; AVX-NEXT:    ret <4 x float> [[R]]
 227 ;
 228   %i0 = insertelement <4 x float> poison, float %x, i32 3
 229   call void @usef(<4 x float> %i0)
 230   %i1 = insertelement <4 x float> poison, float %y, i32 3
 231   call void @usef(<4 x float> %i1)
 232   %r = fdiv <4 x float> %i0, %i1
 233   ret <4 x float> %r
 234 }