llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
   3
   4 define void @julia_2xdouble(ptr sret([2 x double]), ptr, ptr, ptr) {
   5 ; CHECK-LABEL: @julia_2xdouble(
   6 ; CHECK-NEXT:  top:
   7 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[TMP2:%.*]], align 4
   8 ; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[TMP3:%.*]], align 4
   9 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
  10 ; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr [[TMP1:%.*]], align 4
  11 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
  12 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
  13 ; CHECK-NEXT:    [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
  14 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
  15 ; CHECK-NEXT:    [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
  16 ; CHECK-NEXT:    store [2 x double] [[I1]], ptr [[TMP0:%.*]], align 4
  17 ; CHECK-NEXT:    ret void
  18 ;
  19 top:
  20   %x0 = load double, ptr %2, align 4
  21   %y0 = load double, ptr %3, align 4
  22   %m0 = fmul double %x0, %y0
  23   %px1 = getelementptr inbounds [2 x double], ptr %2, i64 0, i64 1
  24   %x1 = load double, ptr %px1, align 4
  25   %py1 = getelementptr inbounds [2 x double], ptr %3, i64 0, i64 1
  26   %y1 = load double, ptr %py1, align 4
  27   %m1 = fmul double %x1, %y1
  28   %z0 = load double, ptr %1, align 4
  29   %a0 = fadd double %m0, %z0
  30   %i0 = insertvalue [2 x double] undef, double %a0, 0
  31   %pz1 = getelementptr inbounds [2 x double], ptr %1, i64 0, i64 1
  32   %z1 = load double, ptr %pz1, align 4
  33   %a1 = fadd double %m1, %z1
  34   %i1 = insertvalue [2 x double] %i0, double %a1, 1
  35   store [2 x double] %i1, ptr %0, align 4
  36   ret void
  37 }
  38
  39 define void @julia_4xfloat(ptr sret([4 x float]), ptr, ptr, ptr) {
  40 ; CHECK-LABEL: @julia_4xfloat(
  41 ; CHECK-NEXT:  top:
  42 ; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[TMP2:%.*]], align 4
  43 ; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[TMP3:%.*]], align 4
  44 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
  45 ; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 4
  46 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
  47 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
  48 ; CHECK-NEXT:    [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
  49 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
  50 ; CHECK-NEXT:    [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
  51 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
  52 ; CHECK-NEXT:    [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
  53 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
  54 ; CHECK-NEXT:    [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
  55 ; CHECK-NEXT:    store [4 x float] [[I3]], ptr [[TMP0:%.*]], align 4
  56 ; CHECK-NEXT:    ret void
  57 ;
  58 top:
  59   %x0 = load float, ptr %2, align 4
  60   %y0 = load float, ptr %3, align 4
  61   %m0 = fmul float %x0, %y0
  62   %px1 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 1
  63   %x1 = load float, ptr %px1, align 4
  64   %py1 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 1
  65   %y1 = load float, ptr %py1, align 4
  66   %m1 = fmul float %x1, %y1
  67   %px2 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 2
  68   %x2 = load float, ptr %px2, align 4
  69   %py2 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 2
  70   %y2 = load float, ptr %py2, align 4
  71   %m2 = fmul float %x2, %y2
  72   %px3 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 3
  73   %x3 = load float, ptr %px3, align 4
  74   %py3 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 3
  75   %y3 = load float, ptr %py3, align 4
  76   %m3 = fmul float %x3, %y3
  77   %z0 = load float, ptr %1, align 4
  78   %a0 = fadd float %m0, %z0
  79   %i0 = insertvalue [4 x float] undef, float %a0, 0
  80   %pz1 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 1
  81   %z1 = load float, ptr %pz1, align 4
  82   %a1 = fadd float %m1, %z1
  83   %i1 = insertvalue [4 x float] %i0, float %a1, 1
  84   %pz2 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 2
  85   %z2 = load float, ptr %pz2, align 4
  86   %a2 = fadd float %m2, %z2
  87   %i2 = insertvalue [4 x float] %i1, float %a2, 2
  88   %pz3 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 3
  89   %z3 = load float, ptr %pz3, align 4
  90   %a3 = fadd float %m3, %z3
  91   %i3 = insertvalue [4 x float] %i2, float %a3, 3
  92   store [4 x float] %i3, ptr %0, align 4
  93   ret void
  94 }
  95
  96 define void @julia_load_array_of_float(ptr %a, ptr %b, ptr %c) {
  97 ; CHECK-LABEL: @julia_load_array_of_float(
  98 ; CHECK-NEXT:  top:
  99 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
 100 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 101 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
 102 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 103 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
 104 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
 105 ; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
 106 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
 107 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
 108 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
 109 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
 110 ; CHECK-NEXT:    store [4 x float] [[C_ARR3]], ptr [[C:%.*]], align 4
 111 ; CHECK-NEXT:    ret void
 112 ;
 113 top:
 114   %a_arr = load [4 x float], ptr %a, align 4
 115   %a0 = extractvalue [4 x float] %a_arr, 0
 116   %a2 = extractvalue [4 x float] %a_arr, 2
 117   %a1 = extractvalue [4 x float] %a_arr, 1
 118   %b_arr = load [4 x float], ptr %b, align 4
 119   %b0 = extractvalue [4 x float] %b_arr, 0
 120   %b2 = extractvalue [4 x float] %b_arr, 2
 121   %b1 = extractvalue [4 x float] %b_arr, 1
 122   %a3 = extractvalue [4 x float] %a_arr, 3
 123   %c1 = fsub float %a1, %b1
 124   %b3 = extractvalue [4 x float] %b_arr, 3
 125   %c0 = fsub float %a0, %b0
 126   %c2 = fsub float %a2, %b2
 127   %c_arr0 = insertvalue [4 x float] undef, float %c0, 0
 128   %c_arr1 = insertvalue [4 x float] %c_arr0, float %c1, 1
 129   %c3 = fsub float %a3, %b3
 130   %c_arr2 = insertvalue [4 x float] %c_arr1, float %c2, 2
 131   %c_arr3 = insertvalue [4 x float] %c_arr2, float %c3, 3
 132   store [4 x float] %c_arr3, ptr %c, align 4
 133   ret void
 134 }
 135
 136 define void @julia_load_array_of_i32(ptr %a, ptr %b, ptr %c) {
 137 ; CHECK-LABEL: @julia_load_array_of_i32(
 138 ; CHECK-NEXT:  top:
 139 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
 140 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 141 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
 142 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
 143 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
 144 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
 145 ; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
 146 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
 147 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
 148 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
 149 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
 150 ; CHECK-NEXT:    store [4 x i32] [[C_ARR3]], ptr [[C:%.*]], align 4
 151 ; CHECK-NEXT:    ret void
 152 ;
 153 top:
 154   %a_arr = load [4 x i32], ptr %a, align 4
 155   %a0 = extractvalue [4 x i32] %a_arr, 0
 156   %a2 = extractvalue [4 x i32] %a_arr, 2
 157   %a1 = extractvalue [4 x i32] %a_arr, 1
 158   %b_arr = load [4 x i32], ptr %b, align 4
 159   %b0 = extractvalue [4 x i32] %b_arr, 0
 160   %b2 = extractvalue [4 x i32] %b_arr, 2
 161   %b1 = extractvalue [4 x i32] %b_arr, 1
 162   %a3 = extractvalue [4 x i32] %a_arr, 3
 163   %c1 = sub i32 %a1, %b1
 164   %b3 = extractvalue [4 x i32] %b_arr, 3
 165   %c0 = sub i32 %a0, %b0
 166   %c2 = sub i32 %a2, %b2
 167   %c_arr0 = insertvalue [4 x i32] undef, i32 %c0, 0
 168   %c_arr1 = insertvalue [4 x i32] %c_arr0, i32 %c1, 1
 169   %c3 = sub i32 %a3, %b3
 170   %c_arr2 = insertvalue [4 x i32] %c_arr1, i32 %c2, 2
 171   %c_arr3 = insertvalue [4 x i32] %c_arr2, i32 %c3, 3
 172   store [4 x i32] %c_arr3, ptr %c, align 4
 173   ret void
 174 }
 175
 176 ; Almost identical to previous test, but for type that should NOT be vectorized.
 177 ;
 178 define void @julia_load_array_of_i16(ptr %a, ptr %b, ptr %c) {
 179 ; CHECK-LABEL: @julia_load_array_of_i16(
 180 ; CHECK-NEXT:  top:
 181 ; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i16], ptr [[A:%.*]], align 4
 182 ; CHECK-NEXT:    [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
 183 ; CHECK-NEXT:    [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
 184 ; CHECK-NEXT:    [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
 185 ; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i16], ptr [[B:%.*]], align 4
 186 ; CHECK-NEXT:    [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
 187 ; CHECK-NEXT:    [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
 188 ; CHECK-NEXT:    [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
 189 ; CHECK-NEXT:    [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
 190 ; CHECK-NEXT:    [[C1:%.*]] = sub i16 [[A1]], [[B1]]
 191 ; CHECK-NEXT:    [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
 192 ; CHECK-NEXT:    [[C0:%.*]] = sub i16 [[A0]], [[B0]]
 193 ; CHECK-NEXT:    [[C2:%.*]] = sub i16 [[A2]], [[B2]]
 194 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
 195 ; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
 196 ; CHECK-NEXT:    [[C3:%.*]] = sub i16 [[A3]], [[B3]]
 197 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
 198 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
 199 ; CHECK-NEXT:    store [4 x i16] [[C_ARR3]], ptr [[C:%.*]], align 4
 200 ; CHECK-NEXT:    ret void
 201 ;
 202 top:
 203   %a_arr = load [4 x i16], ptr %a, align 4
 204   %a0 = extractvalue [4 x i16] %a_arr, 0
 205   %a2 = extractvalue [4 x i16] %a_arr, 2
 206   %a1 = extractvalue [4 x i16] %a_arr, 1
 207   %b_arr = load [4 x i16], ptr %b, align 4
 208   %b0 = extractvalue [4 x i16] %b_arr, 0
 209   %b2 = extractvalue [4 x i16] %b_arr, 2
 210   %b1 = extractvalue [4 x i16] %b_arr, 1
 211   %a3 = extractvalue [4 x i16] %a_arr, 3
 212   %c1 = sub i16 %a1, %b1
 213   %b3 = extractvalue [4 x i16] %b_arr, 3
 214   %c0 = sub i16 %a0, %b0
 215   %c2 = sub i16 %a2, %b2
 216   %c_arr0 = insertvalue [4 x i16] undef, i16 %c0, 0
 217   %c_arr1 = insertvalue [4 x i16] %c_arr0, i16 %c1, 1
 218   %c3 = sub i16 %a3, %b3
 219   %c_arr2 = insertvalue [4 x i16] %c_arr1, i16 %c2, 2
 220   %c_arr3 = insertvalue [4 x i16] %c_arr2, i16 %c3, 3
 221   store [4 x i16] %c_arr3, ptr %c, align 4
 222   ret void
 223 }
 224
 225 %pseudovec = type { float, float, float, float }
 226
 227 define void @julia_load_struct_of_float(ptr %a, ptr %b, ptr %c) {
 228 ; CHECK-LABEL: @julia_load_struct_of_float(
 229 ; CHECK-NEXT:  top:
 230 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
 231 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 232 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
 233 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 234 ; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC:%.*]] undef, float [[TMP5]], 0
 235 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
 236 ; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1
 237 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
 238 ; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT1]], float [[TMP7]], 2
 239 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
 240 ; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT2]], float [[TMP8]], 3
 241 ; CHECK-NEXT:    store [[PSEUDOVEC]] [[C_STRUCT3]], ptr [[C:%.*]], align 4
 242 ; CHECK-NEXT:    ret void
 243 ;
 244 top:
 245   %a_struct = load %pseudovec, ptr %a, align 4
 246   %a0 = extractvalue %pseudovec %a_struct, 0
 247   %a1 = extractvalue %pseudovec %a_struct, 1
 248   %b_struct = load %pseudovec, ptr %b, align 4
 249   %a2 = extractvalue %pseudovec %a_struct, 2
 250   %b0 = extractvalue %pseudovec %b_struct, 0
 251   %a3 = extractvalue %pseudovec %a_struct, 3
 252   %c0 = fsub float %a0, %b0
 253   %b1 = extractvalue %pseudovec %b_struct, 1
 254   %b2 = extractvalue %pseudovec %b_struct, 2
 255   %c1 = fsub float %a1, %b1
 256   %c_struct0 = insertvalue %pseudovec undef, float %c0, 0
 257   %b3 = extractvalue %pseudovec %b_struct, 3
 258   %c3 = fsub float %a3, %b3
 259   %c_struct1 = insertvalue %pseudovec %c_struct0, float %c1, 1
 260   %c2 = fsub float %a2, %b2
 261   %c_struct2 = insertvalue %pseudovec %c_struct1, float %c2, 2
 262   %c_struct3 = insertvalue %pseudovec %c_struct2, float %c3, 3
 263   store %pseudovec %c_struct3, ptr %c, align 4
 264   ret void
 265 }