test/Transforms/SLPVectorizer/X86/insertvalue.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
   3
   4 define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
   5 ; CHECK-LABEL: @julia_2xdouble(
   6 ; CHECK-NEXT:  top:
   7 ; CHECK-NEXT:    [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
   8 ; CHECK-NEXT:    [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
   9 ; CHECK-NEXT:    [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
  10 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
  11 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
  12 ; CHECK-NEXT:    [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
  13 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
  14 ; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
  15 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
  16 ; CHECK-NEXT:    [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
  17 ; CHECK-NEXT:    [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
  18 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
  19 ; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
  20 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
  21 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
  22 ; CHECK-NEXT:    [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
  23 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
  24 ; CHECK-NEXT:    [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
  25 ; CHECK-NEXT:    store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
  26 ; CHECK-NEXT:    ret void
  27 ;
  28 top:
  29   %px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
  30   %x0 = load double, double* %px0, align 4
  31   %py0 = getelementptr inbounds [2 x double], [2 x double]* %3, i64 0, i64 0
  32   %y0 = load double, double* %py0, align 4
  33   %m0 = fmul double %x0, %y0
  34   %px1 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 1
  35   %x1 = load double, double* %px1, align 4
  36   %py1 = getelementptr inbounds [2 x double], [2 x double]* %3, i64 0, i64 1
  37   %y1 = load double, double* %py1, align 4
  38   %m1 = fmul double %x1, %y1
  39   %pz0 = getelementptr inbounds [2 x double], [2 x double]* %1, i64 0, i64 0
  40   %z0 = load double, double* %pz0, align 4
  41   %a0 = fadd double %m0, %z0
  42   %i0 = insertvalue [2 x double] undef, double %a0, 0
  43   %pz1 = getelementptr inbounds [2 x double], [2 x double]* %1, i64 0, i64 1
  44   %z1 = load double, double* %pz1, align 4
  45   %a1 = fadd double %m1, %z1
  46   %i1 = insertvalue [2 x double] %i0, double %a1, 1
  47   store [2 x double] %i1, [2 x double]* %0, align 4
  48   ret void
  49 }
  50
  51 define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
  52 ; CHECK-LABEL: @julia_4xfloat(
  53 ; CHECK-NEXT:  top:
  54 ; CHECK-NEXT:    [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
  55 ; CHECK-NEXT:    [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
  56 ; CHECK-NEXT:    [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
  57 ; CHECK-NEXT:    [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
  58 ; CHECK-NEXT:    [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
  59 ; CHECK-NEXT:    [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
  60 ; CHECK-NEXT:    [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
  61 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
  62 ; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
  63 ; CHECK-NEXT:    [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
  64 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
  65 ; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
  66 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
  67 ; CHECK-NEXT:    [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
  68 ; CHECK-NEXT:    [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
  69 ; CHECK-NEXT:    [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
  70 ; CHECK-NEXT:    [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
  71 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
  72 ; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
  73 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
  74 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
  75 ; CHECK-NEXT:    [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
  76 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
  77 ; CHECK-NEXT:    [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
  78 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
  79 ; CHECK-NEXT:    [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
  80 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
  81 ; CHECK-NEXT:    [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
  82 ; CHECK-NEXT:    store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
  83 ; CHECK-NEXT:    ret void
  84 ;
  85 top:
  86   %px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
  87   %x0 = load float, float* %px0, align 4
  88   %py0 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0
  89   %y0 = load float, float* %py0, align 4
  90   %m0 = fmul float %x0, %y0
  91   %px1 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1
  92   %x1 = load float, float* %px1, align 4
  93   %py1 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1
  94   %y1 = load float, float* %py1, align 4
  95   %m1 = fmul float %x1, %y1
  96   %px2 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2
  97   %x2 = load float, float* %px2, align 4
  98   %py2 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2
  99   %y2 = load float, float* %py2, align 4
 100   %m2 = fmul float %x2, %y2
 101   %px3 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3
 102   %x3 = load float, float* %px3, align 4
 103   %py3 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3
 104   %y3 = load float, float* %py3, align 4
 105   %m3 = fmul float %x3, %y3
 106   %pz0 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0
 107   %z0 = load float, float* %pz0, align 4
 108   %a0 = fadd float %m0, %z0
 109   %i0 = insertvalue [4 x float] undef, float %a0, 0
 110   %pz1 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1
 111   %z1 = load float, float* %pz1, align 4
 112   %a1 = fadd float %m1, %z1
 113   %i1 = insertvalue [4 x float] %i0, float %a1, 1
 114   %pz2 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2
 115   %z2 = load float, float* %pz2, align 4
 116   %a2 = fadd float %m2, %z2
 117   %i2 = insertvalue [4 x float] %i1, float %a2, 2
 118   %pz3 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3
 119   %z3 = load float, float* %pz3, align 4
 120   %a3 = fadd float %m3, %z3
 121   %i3 = insertvalue [4 x float] %i2, float %a3, 3
 122   store [4 x float] %i3, [4 x float]* %0, align 4
 123   ret void
 124 }
 125
 126 define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
 127 ; CHECK-LABEL: @julia_load_array_of_float(
 128 ; CHECK-NEXT:  top:
 129 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
 130 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
 131 ; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
 132 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
 133 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
 134 ; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
 135 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
 136 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 137 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
 138 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
 139 ; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
 140 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
 141 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
 142 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
 143 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
 144 ; CHECK-NEXT:    store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
 145 ; CHECK-NEXT:    ret void
 146 ;
 147 top:
 148   %a_arr = load [4 x float], [4 x float]* %a, align 4
 149   %a0 = extractvalue [4 x float] %a_arr, 0
 150   %a2 = extractvalue [4 x float] %a_arr, 2
 151   %a1 = extractvalue [4 x float] %a_arr, 1
 152   %b_arr = load [4 x float], [4 x float]* %b, align 4
 153   %b0 = extractvalue [4 x float] %b_arr, 0
 154   %b2 = extractvalue [4 x float] %b_arr, 2
 155   %b1 = extractvalue [4 x float] %b_arr, 1
 156   %a3 = extractvalue [4 x float] %a_arr, 3
 157   %c1 = fsub float %a1, %b1
 158   %b3 = extractvalue [4 x float] %b_arr, 3
 159   %c0 = fsub float %a0, %b0
 160   %c2 = fsub float %a2, %b2
 161   %c_arr0 = insertvalue [4 x float] undef, float %c0, 0
 162   %c_arr1 = insertvalue [4 x float] %c_arr0, float %c1, 1
 163   %c3 = fsub float %a3, %b3
 164   %c_arr2 = insertvalue [4 x float] %c_arr1, float %c2, 2
 165   %c_arr3 = insertvalue [4 x float] %c_arr2, float %c3, 3
 166   store [4 x float] %c_arr3, [4 x float]* %c, align 4
 167   ret void
 168 }
 169
 170 define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
 171 ; CHECK-LABEL: @julia_load_array_of_i32(
 172 ; CHECK-NEXT:  top:
 173 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
 174 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
 175 ; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
 176 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
 177 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
 178 ; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
 179 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
 180 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
 181 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
 182 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
 183 ; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
 184 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
 185 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
 186 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
 187 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
 188 ; CHECK-NEXT:    store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
 189 ; CHECK-NEXT:    ret void
 190 ;
 191 top:
 192   %a_arr = load [4 x i32], [4 x i32]* %a, align 4
 193   %a0 = extractvalue [4 x i32] %a_arr, 0
 194   %a2 = extractvalue [4 x i32] %a_arr, 2
 195   %a1 = extractvalue [4 x i32] %a_arr, 1
 196   %b_arr = load [4 x i32], [4 x i32]* %b, align 4
 197   %b0 = extractvalue [4 x i32] %b_arr, 0
 198   %b2 = extractvalue [4 x i32] %b_arr, 2
 199   %b1 = extractvalue [4 x i32] %b_arr, 1
 200   %a3 = extractvalue [4 x i32] %a_arr, 3
 201   %c1 = sub i32 %a1, %b1
 202   %b3 = extractvalue [4 x i32] %b_arr, 3
 203   %c0 = sub i32 %a0, %b0
 204   %c2 = sub i32 %a2, %b2
 205   %c_arr0 = insertvalue [4 x i32] undef, i32 %c0, 0
 206   %c_arr1 = insertvalue [4 x i32] %c_arr0, i32 %c1, 1
 207   %c3 = sub i32 %a3, %b3
 208   %c_arr2 = insertvalue [4 x i32] %c_arr1, i32 %c2, 2
 209   %c_arr3 = insertvalue [4 x i32] %c_arr2, i32 %c3, 3
 210   store [4 x i32] %c_arr3, [4 x i32]* %c, align 4
 211   ret void
 212 }
 213
 214 ; Almost identical to previous test, but for type that should NOT be vectorized.
 215 ;
 216 define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
 217 ; CHECK-LABEL: @julia_load_array_of_i16(
 218 ; CHECK-NEXT:  top:
 219 ; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
 220 ; CHECK-NEXT:    [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
 221 ; CHECK-NEXT:    [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
 222 ; CHECK-NEXT:    [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
 223 ; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
 224 ; CHECK-NEXT:    [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
 225 ; CHECK-NEXT:    [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
 226 ; CHECK-NEXT:    [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
 227 ; CHECK-NEXT:    [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
 228 ; CHECK-NEXT:    [[C1:%.*]] = sub i16 [[A1]], [[B1]]
 229 ; CHECK-NEXT:    [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
 230 ; CHECK-NEXT:    [[C0:%.*]] = sub i16 [[A0]], [[B0]]
 231 ; CHECK-NEXT:    [[C2:%.*]] = sub i16 [[A2]], [[B2]]
 232 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
 233 ; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
 234 ; CHECK-NEXT:    [[C3:%.*]] = sub i16 [[A3]], [[B3]]
 235 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
 236 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
 237 ; CHECK-NEXT:    store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
 238 ; CHECK-NEXT:    ret void
 239 ;
 240 top:
 241   %a_arr = load [4 x i16], [4 x i16]* %a, align 4
 242   %a0 = extractvalue [4 x i16] %a_arr, 0
 243   %a2 = extractvalue [4 x i16] %a_arr, 2
 244   %a1 = extractvalue [4 x i16] %a_arr, 1
 245   %b_arr = load [4 x i16], [4 x i16]* %b, align 4
 246   %b0 = extractvalue [4 x i16] %b_arr, 0
 247   %b2 = extractvalue [4 x i16] %b_arr, 2
 248   %b1 = extractvalue [4 x i16] %b_arr, 1
 249   %a3 = extractvalue [4 x i16] %a_arr, 3
 250   %c1 = sub i16 %a1, %b1
 251   %b3 = extractvalue [4 x i16] %b_arr, 3
 252   %c0 = sub i16 %a0, %b0
 253   %c2 = sub i16 %a2, %b2
 254   %c_arr0 = insertvalue [4 x i16] undef, i16 %c0, 0
 255   %c_arr1 = insertvalue [4 x i16] %c_arr0, i16 %c1, 1
 256   %c3 = sub i16 %a3, %b3
 257   %c_arr2 = insertvalue [4 x i16] %c_arr1, i16 %c2, 2
 258   %c_arr3 = insertvalue [4 x i16] %c_arr2, i16 %c3, 3
 259   store [4 x i16] %c_arr3, [4 x i16]* %c, align 4
 260   ret void
 261 }
 262
 263 %pseudovec = type { float, float, float, float }
 264
 265 define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
 266 ; CHECK-LABEL: @julia_load_struct_of_float(
 267 ; CHECK-NEXT:  top:
 268 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
 269 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
 270 ; CHECK-NEXT:    [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
 271 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
 272 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
 273 ; CHECK-NEXT:    [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
 274 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
 275 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 276 ; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
 277 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
 278 ; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
 279 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
 280 ; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
 281 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
 282 ; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
 283 ; CHECK-NEXT:    store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
 284 ; CHECK-NEXT:    ret void
 285 ;
 286 top:
 287   %a_struct = load %pseudovec, %pseudovec* %a, align 4
 288   %a0 = extractvalue %pseudovec %a_struct, 0
 289   %a1 = extractvalue %pseudovec %a_struct, 1
 290   %b_struct = load %pseudovec, %pseudovec* %b, align 4
 291   %a2 = extractvalue %pseudovec %a_struct, 2
 292   %b0 = extractvalue %pseudovec %b_struct, 0
 293   %a3 = extractvalue %pseudovec %a_struct, 3
 294   %c0 = fsub float %a0, %b0
 295   %b1 = extractvalue %pseudovec %b_struct, 1
 296   %b2 = extractvalue %pseudovec %b_struct, 2
 297   %c1 = fsub float %a1, %b1
 298   %c_struct0 = insertvalue %pseudovec undef, float %c0, 0
 299   %b3 = extractvalue %pseudovec %b_struct, 3
 300   %c3 = fsub float %a3, %b3
 301   %c_struct1 = insertvalue %pseudovec %c_struct0, float %c1, 1
 302   %c2 = fsub float %a2, %b2
 303   %c_struct2 = insertvalue %pseudovec %c_struct1, float %c2, 2
 304   %c_struct3 = insertvalue %pseudovec %c_struct2, float %c3, 3
 305   store %pseudovec %c_struct3, %pseudovec* %c, align 4
 306   ret void
 307 }