llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
   2 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
   3 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
   4
   5 %struct.zot = type { i32, i32, i32 }
   6
   7 define i1 @reorder_results(ptr %arg, i1 %arg1, ptr %arg2, i64 %arg3, ptr %arg4) {
   8 ; CHECK-LABEL: define i1 @reorder_results(
   9 ; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG1:%.*]], ptr [[ARG2:%.*]], i64 [[ARG3:%.*]], ptr [[ARG4:%.*]]) {
  10 ; CHECK-NEXT:  bb:
  11 ; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[ARG4]], align 8
  12 ; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[LOAD]], align 4
  13 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[LOAD]], i64 4
  14 ; CHECK-NEXT:    [[LOAD5:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4
  15 ; CHECK-NEXT:    [[GETELEMENTPTR6:%.*]] = getelementptr i8, ptr [[LOAD]], i64 8
  16 ; CHECK-NEXT:    [[LOAD7:%.*]] = load i32, ptr [[GETELEMENTPTR6]], align 4
  17 ; CHECK-NEXT:    br i1 [[ARG1]], label [[BB12:%.*]], label [[BB9:%.*]]
  18 ; CHECK:       bb8:
  19 ; CHECK-NEXT:    ret i1 false
  20 ; CHECK:       bb9:
  21 ; CHECK-NEXT:    [[FREEZE:%.*]] = freeze ptr [[ARG]]
  22 ; CHECK-NEXT:    store i32 [[LOAD4]], ptr [[FREEZE]], align 4
  23 ; CHECK-NEXT:    [[GETELEMENTPTR10:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 4
  24 ; CHECK-NEXT:    store i32 [[LOAD7]], ptr [[GETELEMENTPTR10]], align 4
  25 ; CHECK-NEXT:    [[GETELEMENTPTR11:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 8
  26 ; CHECK-NEXT:    store i32 [[LOAD5]], ptr [[GETELEMENTPTR11]], align 4
  27 ; CHECK-NEXT:    br label [[BB8:%.*]]
  28 ; CHECK:       bb12:
  29 ; CHECK-NEXT:    [[GETELEMENTPTR13:%.*]] = getelementptr [[STRUCT_ZOT:%.*]], ptr [[ARG2]], i64 [[ARG3]]
  30 ; CHECK-NEXT:    store i32 [[LOAD4]], ptr [[GETELEMENTPTR13]], align 4
  31 ; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 4
  32 ; CHECK-NEXT:    store i32 [[LOAD7]], ptr [[GETELEMENTPTR14]], align 4
  33 ; CHECK-NEXT:    [[GETELEMENTPTR15:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 8
  34 ; CHECK-NEXT:    store i32 [[LOAD5]], ptr [[GETELEMENTPTR15]], align 4
  35 ; CHECK-NEXT:    br label [[BB8]]
  36 ;
  37 bb:
  38   %load = load ptr, ptr %arg4, align 8
  39   %load4 = load i32, ptr %load, align 4
  40   %getelementptr = getelementptr i8, ptr %load, i64 4
  41   %load5 = load i32, ptr %getelementptr, align 4
  42   %getelementptr6 = getelementptr i8, ptr %load, i64 8
  43   %load7 = load i32, ptr %getelementptr6, align 4
  44   br i1 %arg1, label %bb12, label %bb9
  45
  46 bb8:                                              ; preds = %bb12, %bb9
  47   ret i1 false
  48
  49 bb9:                                              ; preds = %bb
  50   %freeze = freeze ptr %arg
  51   store i32 %load4, ptr %freeze, align 4
  52   %getelementptr10 = getelementptr i8, ptr %freeze, i64 4
  53   store i32 %load7, ptr %getelementptr10, align 4
  54   %getelementptr11 = getelementptr i8, ptr %freeze, i64 8
  55   store i32 %load5, ptr %getelementptr11, align 4
  56   br label %bb8
  57
  58 bb12:                                             ; preds = %bb
  59   %getelementptr13 = getelementptr %struct.zot, ptr %arg2, i64 %arg3
  60   store i32 %load4, ptr %getelementptr13, align 4
  61   %getelementptr14 = getelementptr i8, ptr %getelementptr13, i64 4
  62   store i32 %load7, ptr %getelementptr14, align 4
  63   %getelementptr15 = getelementptr i8, ptr %getelementptr13, i64 8
  64   store i32 %load5, ptr %getelementptr15, align 4
  65   br label %bb8
  66 }
  67
  68 define void @extract_mask(ptr %object, double %conv503, double %conv520) {
  69 ; CHECK-LABEL: define void @extract_mask(
  70 ; CHECK-SAME: ptr [[OBJECT:%.*]], double [[CONV503:%.*]], double [[CONV520:%.*]]) {
  71 ; CHECK-NEXT:  entry:
  72 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[OBJECT]], align 8
  73 ; CHECK-NEXT:    [[BBOX483:%.*]] = getelementptr float, ptr [[TMP0]]
  74 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[BBOX483]], align 8
  75 ; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
  76 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
  77 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[CONV503]], i32 0
  78 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <2 x double> [[TMP4]], <double 0.000000e+00, double -2.000000e+10>
  79 ; CHECK-NEXT:    [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> [[TMP3]], <2 x double> <double 0.000000e+00, double -2.000000e+10>
  80 ; CHECK-NEXT:    [[TMP7:%.*]] = fsub <2 x double> zeroinitializer, [[TMP6]]
  81 ; CHECK-NEXT:    [[TMP8:%.*]] = fptrunc <2 x double> [[TMP7]] to <2 x float>
  82 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
  83 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
  84 ; CHECK-NEXT:    [[MUL646:%.*]] = fmul float [[TMP9]], [[TMP10]]
  85 ; CHECK-NEXT:    [[CMP663:%.*]] = fcmp olt float [[MUL646]], 0.000000e+00
  86 ; CHECK-NEXT:    br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END668:%.*]]
  87 ; CHECK:       if.then665:
  88 ; CHECK-NEXT:    [[ARRAYIDX656:%.*]] = getelementptr float, ptr [[OBJECT]], i64 10
  89 ; CHECK-NEXT:    [[BBOX651:%.*]] = getelementptr float, ptr [[OBJECT]]
  90 ; CHECK-NEXT:    [[CONV621:%.*]] = fptrunc double [[CONV520]] to float
  91 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
  92 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[CONV503]], i32 0
  93 ; CHECK-NEXT:    [[TMP13:%.*]] = fptrunc <2 x double> [[TMP12]] to <2 x float>
  94 ; CHECK-NEXT:    store <2 x float> [[TMP13]], ptr [[BBOX651]], align 8
  95 ; CHECK-NEXT:    [[BBOX_SROA_8_0_BBOX666_SROA_IDX:%.*]] = getelementptr float, ptr [[OBJECT]], i64 2
  96 ; CHECK-NEXT:    store float [[CONV621]], ptr [[BBOX_SROA_8_0_BBOX666_SROA_IDX]], align 8
  97 ; CHECK-NEXT:    store <2 x float> [[TMP8]], ptr [[ARRAYIDX656]], align 8
  98 ; CHECK-NEXT:    br label [[IF_END668]]
  99 ; CHECK:       if.end668:
 100 ; CHECK-NEXT:    ret void
 101 ;
 102 entry:
 103   %0 = load ptr, ptr %object, align 8
 104   %bbox483 = getelementptr float, ptr %0
 105   %1 = load float, ptr %bbox483, align 8
 106   %conv486 = fpext float %1 to double
 107   %cmp487 = fcmp ogt double %conv486, -2.000000e+10
 108   %conv486.2 = select i1 %cmp487, double %conv486, double -2.000000e+10
 109   %arrayidx502 = getelementptr float, ptr %0, i64 1
 110   %2 = load float, ptr %arrayidx502, align 4
 111   %conv5033 = fpext float %2 to double
 112   %cmp504 = fcmp ogt double %conv503, 0.000000e+00
 113   %cond514 = select i1 %cmp504, double %conv5033, double 0.000000e+00
 114   %sub626 = fsub double 0.000000e+00, %conv486.2
 115   %conv627 = fptrunc double %sub626 to float
 116   %sub632 = fsub double 0.000000e+00, %cond514
 117   %conv633 = fptrunc double %sub632 to float
 118   %mul646 = fmul float %conv633, %conv627
 119   %cmp663 = fcmp olt float %mul646, 0.000000e+00
 120   br i1 %cmp663, label %if.then665, label %if.end668
 121
 122 if.then665:                                       ; preds = %entry
 123   %arrayidx656 = getelementptr float, ptr %object, i64 10
 124   %lengths652 = getelementptr float, ptr %object, i64 11
 125   %bbox651 = getelementptr float, ptr %object
 126   %conv621 = fptrunc double %conv520 to float
 127   %conv617 = fptrunc double %cond514 to float
 128   %conv613 = fptrunc double %conv503 to float
 129   store float %conv613, ptr %bbox651, align 8
 130   %bbox.sroa.6.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 1
 131   store float %conv617, ptr %bbox.sroa.6.0.bbox666.sroa_idx, align 4
 132   %bbox.sroa.8.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 2
 133   store float %conv621, ptr %bbox.sroa.8.0.bbox666.sroa_idx, align 8
 134   store float %conv627, ptr %lengths652, align 4
 135   store float %conv633, ptr %arrayidx656, align 8
 136   br label %if.end668
 137
 138 if.end668:                                        ; preds = %if.then665, %entry
 139   ret void
 140 }
 141
 142 define void @gather_2(ptr %mat1, float %0, float %1) {
 143 ; NON-POW2-LABEL: define void @gather_2(
 144 ; NON-POW2-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
 145 ; NON-POW2-NEXT:  entry:
 146 ; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0]], i32 0
 147 ; NON-POW2-NEXT:    [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <3 x i32> zeroinitializer
 148 ; NON-POW2-NEXT:    [[TMP4:%.*]] = insertelement <3 x float> <float 0.000000e+00, float poison, float poison>, float [[TMP1]], i32 1
 149 ; NON-POW2-NEXT:    [[TMP5:%.*]] = shufflevector <3 x float> [[TMP4]], <3 x float> poison, <3 x i32> <i32 0, i32 1, i32 1>
 150 ; NON-POW2-NEXT:    [[TMP6:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP3]], <3 x float> [[TMP5]], <3 x float> zeroinitializer)
 151 ; NON-POW2-NEXT:    [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1
 152 ; NON-POW2-NEXT:    [[TMP7:%.*]] = fmul <3 x float> [[TMP6]], zeroinitializer
 153 ; NON-POW2-NEXT:    store <3 x float> [[TMP7]], ptr [[ARRAYIDX163]], align 4
 154 ; NON-POW2-NEXT:    ret void
 155 ;
 156 ; POW2-ONLY-LABEL: define void @gather_2(
 157 ; POW2-ONLY-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
 158 ; POW2-ONLY-NEXT:  entry:
 159 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
 160 ; POW2-ONLY-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
 161 ; POW2-ONLY-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP1]], i32 1
 162 ; POW2-ONLY-NEXT:    [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x float> zeroinitializer)
 163 ; POW2-ONLY-NEXT:    [[TMP6:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float 0.000000e+00)
 164 ; POW2-ONLY-NEXT:    [[TMP7:%.*]] = fmul float [[TMP6]], 0.000000e+00
 165 ; POW2-ONLY-NEXT:    [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1
 166 ; POW2-ONLY-NEXT:    [[ARRAYIDX5_I_I_I280:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1, i64 2
 167 ; POW2-ONLY-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
 168 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP8]], ptr [[ARRAYIDX163]], align 4
 169 ; POW2-ONLY-NEXT:    store float [[TMP7]], ptr [[ARRAYIDX5_I_I_I280]], align 4
 170 ; POW2-ONLY-NEXT:    ret void
 171 ;
 172 entry:
 173   %2 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00)
 174   %3 = call float @llvm.fmuladd.f32(float %1, float %0, float 0.000000e+00)
 175   %4 = call float @llvm.fmuladd.f32(float %0, float %1, float 0.000000e+00)
 176   %5 = fmul float %2, 0.000000e+00
 177   %6 = fmul float %3, 0.000000e+00
 178   %7 = fmul float %4, 0.000000e+00
 179   %arrayidx163 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1
 180   %arrayidx2.i.i.i278 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 1
 181   %arrayidx5.i.i.i280 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 2
 182   store float %5, ptr %arrayidx163, align 4
 183   store float %6, ptr %arrayidx2.i.i.i278, align 4
 184   store float %7, ptr %arrayidx5.i.i.i280, align 4
 185   ret void
 186 }
 187
 188 define i32 @reorder_indices_1(float %0) {
 189 ; NON-POW2-LABEL: define i32 @reorder_indices_1(
 190 ; NON-POW2-SAME: float [[TMP0:%.*]]) {
 191 ; NON-POW2-NEXT:  entry:
 192 ; NON-POW2-NEXT:    [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4
 193 ; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[NOR1]], align 4
 194 ; NON-POW2-NEXT:    [[TMP3:%.*]] = fneg <3 x float> [[TMP1]]
 195 ; NON-POW2-NEXT:    [[TMP4:%.*]] = insertelement <3 x float> poison, float [[TMP0]], i32 0
 196 ; NON-POW2-NEXT:    [[TMP5:%.*]] = shufflevector <3 x float> [[TMP4]], <3 x float> poison, <3 x i32> zeroinitializer
 197 ; NON-POW2-NEXT:    [[TMP6:%.*]] = fmul <3 x float> [[TMP3]], [[TMP5]]
 198 ; NON-POW2-NEXT:    [[TMP10:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 199 ; NON-POW2-NEXT:    [[TMP7:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> zeroinitializer, <3 x float> [[TMP10]])
 200 ; NON-POW2-NEXT:    [[TMP8:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP5]], <3 x float> [[TMP7]], <3 x float> zeroinitializer)
 201 ; NON-POW2-NEXT:    [[TMP9:%.*]] = fmul <3 x float> [[TMP8]], zeroinitializer
 202 ; NON-POW2-NEXT:    store <3 x float> [[TMP9]], ptr [[NOR1]], align 4
 203 ; NON-POW2-NEXT:    ret i32 0
 204 ;
 205 ; POW2-ONLY-LABEL: define i32 @reorder_indices_1(
 206 ; POW2-ONLY-SAME: float [[TMP0:%.*]]) {
 207 ; POW2-ONLY-NEXT:  entry:
 208 ; POW2-ONLY-NEXT:    [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4
 209 ; POW2-ONLY-NEXT:    [[ARRAYIDX2_I265:%.*]] = getelementptr float, ptr [[NOR1]], i64 2
 210 ; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2_I265]], align 4
 211 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[NOR1]], align 4
 212 ; POW2-ONLY-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 213 ; POW2-ONLY-NEXT:    [[TMP4:%.*]] = fneg float [[TMP3]]
 214 ; POW2-ONLY-NEXT:    [[NEG11_I:%.*]] = fmul float [[TMP4]], [[TMP0]]
 215 ; POW2-ONLY-NEXT:    [[TMP5:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float 0.000000e+00, float [[NEG11_I]])
 216 ; POW2-ONLY-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 poison>
 217 ; POW2-ONLY-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1
 218 ; POW2-ONLY-NEXT:    [[TMP8:%.*]] = fneg <2 x float> [[TMP7]]
 219 ; POW2-ONLY-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
 220 ; POW2-ONLY-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer
 221 ; POW2-ONLY-NEXT:    [[TMP11:%.*]] = fmul <2 x float> [[TMP8]], [[TMP10]]
 222 ; POW2-ONLY-NEXT:    [[TMP12:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> zeroinitializer, <2 x float> [[TMP11]])
 223 ; POW2-ONLY-NEXT:    [[TMP13:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP10]], <2 x float> [[TMP12]], <2 x float> zeroinitializer)
 224 ; POW2-ONLY-NEXT:    [[TMP14:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP5]], float 0.000000e+00)
 225 ; POW2-ONLY-NEXT:    [[TMP15:%.*]] = fmul <2 x float> [[TMP13]], zeroinitializer
 226 ; POW2-ONLY-NEXT:    [[MUL6_I_I_I:%.*]] = fmul float [[TMP14]], 0.000000e+00
 227 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP15]], ptr [[NOR1]], align 4
 228 ; POW2-ONLY-NEXT:    store float [[MUL6_I_I_I]], ptr [[ARRAYIDX2_I265]], align 4
 229 ; POW2-ONLY-NEXT:    ret i32 0
 230 ;
 231 entry:
 232   %nor1 = alloca [0 x [3 x float]], i32 0, align 4
 233   %arrayidx.i = getelementptr float, ptr %nor1, i64 1
 234   %1 = load float, ptr %arrayidx.i, align 4
 235   %arrayidx2.i265 = getelementptr float, ptr %nor1, i64 2
 236   %2 = load float, ptr %arrayidx2.i265, align 4
 237   %3 = fneg float %2
 238   %neg.i267 = fmul float %3, %0
 239   %4 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float %neg.i267)
 240   %5 = load float, ptr %nor1, align 4
 241   %6 = fneg float %5
 242   %neg11.i = fmul float %6, %0
 243   %7 = call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float %neg11.i)
 244   %8 = fneg float %1
 245   %neg18.i = fmul float %8, %0
 246   %9 = call float @llvm.fmuladd.f32(float %5, float 0.000000e+00, float %neg18.i)
 247   %10 = call float @llvm.fmuladd.f32(float %0, float %9, float 0.000000e+00)
 248   %11 = call float @llvm.fmuladd.f32(float %0, float %4, float 0.000000e+00)
 249   %12 = call float @llvm.fmuladd.f32(float %0, float %7, float 0.000000e+00)
 250   %mul.i.i.i = fmul float %10, 0.000000e+00
 251   %mul3.i.i.i = fmul float %11, 0.000000e+00
 252   %mul6.i.i.i = fmul float %12, 0.000000e+00
 253   store float %mul.i.i.i, ptr %nor1, align 4
 254   store float %mul3.i.i.i, ptr %arrayidx.i, align 4
 255   store float %mul6.i.i.i, ptr %arrayidx2.i265, align 4
 256   ret i32 0
 257 }
 258
 259 define void @reorder_indices_2(ptr %spoint) {
 260 ; NON-POW2-LABEL: define void @reorder_indices_2(
 261 ; NON-POW2-SAME: ptr [[SPOINT:%.*]]) {
 262 ; NON-POW2-NEXT:  entry:
 263 ; NON-POW2-NEXT:    [[DSCO:%.*]] = getelementptr float, ptr [[SPOINT]], i64 0
 264 ; NON-POW2-NEXT:    [[TMP0:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> zeroinitializer, <3 x float> zeroinitializer, <3 x float> zeroinitializer)
 265 ; NON-POW2-NEXT:    [[TMP1:%.*]] = fmul <3 x float> [[TMP0]], zeroinitializer
 266 ; NON-POW2-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 267 ; NON-POW2-NEXT:    store <3 x float> [[TMP2]], ptr [[DSCO]], align 4
 268 ; NON-POW2-NEXT:    ret void
 269 ;
 270 ; POW2-ONLY-LABEL: define void @reorder_indices_2(
 271 ; POW2-ONLY-SAME: ptr [[SPOINT:%.*]]) {
 272 ; POW2-ONLY-NEXT:  entry:
 273 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> zeroinitializer, i64 0
 274 ; POW2-ONLY-NEXT:    [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float 0.000000e+00)
 275 ; POW2-ONLY-NEXT:    [[MUL4_I461:%.*]] = fmul float [[TMP1]], 0.000000e+00
 276 ; POW2-ONLY-NEXT:    [[DSCO:%.*]] = getelementptr float, ptr [[SPOINT]], i64 0
 277 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer)
 278 ; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], zeroinitializer
 279 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP3]], ptr [[DSCO]], align 4
 280 ; POW2-ONLY-NEXT:    [[ARRAYIDX5_I476:%.*]] = getelementptr float, ptr [[SPOINT]], i64 2
 281 ; POW2-ONLY-NEXT:    store float [[MUL4_I461]], ptr [[ARRAYIDX5_I476]], align 4
 282 ; POW2-ONLY-NEXT:    ret void
 283 ;
 284 entry:
 285   %0 = extractelement <3 x float> zeroinitializer, i64 1
 286   %1 = extractelement <3 x float> zeroinitializer, i64 2
 287   %2 = extractelement <3 x float> zeroinitializer, i64 0
 288   %3 = tail call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00)
 289   %4 = tail call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00)
 290   %5 = tail call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float 0.000000e+00)
 291   %mul.i457 = fmul float %3, 0.000000e+00
 292   %mul2.i459 = fmul float %4, 0.000000e+00
 293   %mul4.i461 = fmul float %5, 0.000000e+00
 294   %dsco = getelementptr float, ptr %spoint, i64 0
 295   store float %mul.i457, ptr %dsco, align 4
 296   %arrayidx3.i474 = getelementptr float, ptr %spoint, i64 1
 297   store float %mul2.i459, ptr %arrayidx3.i474, align 4
 298   %arrayidx5.i476 = getelementptr float, ptr %spoint, i64 2
 299   store float %mul4.i461, ptr %arrayidx5.i476, align 4
 300   ret void
 301 }
 302
 303 define void @reorder_indices_2x_load(ptr %png_ptr, ptr %info_ptr) {
 304 ; CHECK-LABEL: define void @reorder_indices_2x_load(
 305 ; CHECK-SAME: ptr [[PNG_PTR:%.*]], ptr [[INFO_PTR:%.*]]) {
 306 ; CHECK-NEXT:  entry:
 307 ; CHECK-NEXT:    [[BIT_DEPTH:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 0
 308 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BIT_DEPTH]], align 4
 309 ; CHECK-NEXT:    [[COLOR_TYPE:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 1
 310 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[COLOR_TYPE]], align 1
 311 ; CHECK-NEXT:    [[BIT_DEPTH37_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 11
 312 ; CHECK-NEXT:    store i8 [[TMP0]], ptr [[BIT_DEPTH37_I]], align 1
 313 ; CHECK-NEXT:    [[COLOR_TYPE39_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 10
 314 ; CHECK-NEXT:    store i8 [[TMP1]], ptr [[COLOR_TYPE39_I]], align 2
 315 ; CHECK-NEXT:    [[USR_BIT_DEPTH_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 12
 316 ; CHECK-NEXT:    store i8 [[TMP0]], ptr [[USR_BIT_DEPTH_I]], align 8
 317 ; CHECK-NEXT:    ret void
 318 ;
 319 entry:
 320   %bit_depth = getelementptr i8, ptr %info_ptr, i64 0
 321   %0 = load i8, ptr %bit_depth, align 4
 322   %color_type = getelementptr i8, ptr %info_ptr, i64 1
 323   %1 = load i8, ptr %color_type, align 1
 324   %bit_depth37.i = getelementptr i8, ptr %png_ptr, i64 11
 325   store i8 %0, ptr %bit_depth37.i, align 1
 326   %color_type39.i = getelementptr i8, ptr %png_ptr, i64 10
 327   store i8 %1, ptr %color_type39.i, align 2
 328   %usr_bit_depth.i = getelementptr i8, ptr %png_ptr, i64 12
 329   store i8 %0, ptr %usr_bit_depth.i, align 8
 330   ret void
 331 }
 332
 333 define void @reuse_shuffle_indidces_1(ptr %col, float %0, float %1) {
 334 ; NON-POW2-LABEL: define void @reuse_shuffle_indidces_1(
 335 ; NON-POW2-SAME: ptr [[COL:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
 336 ; NON-POW2-NEXT:  entry:
 337 ; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP1]], i32 0
 338 ; NON-POW2-NEXT:    [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[TMP0]], i32 1
 339 ; NON-POW2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> <i32 0, i32 1, i32 1>
 340 ; NON-POW2-NEXT:    [[TMP5:%.*]] = fmul <3 x float> [[TMP4]], zeroinitializer
 341 ; NON-POW2-NEXT:    [[TMP6:%.*]] = fadd <3 x float> [[TMP5]], zeroinitializer
 342 ; NON-POW2-NEXT:    store <3 x float> [[TMP6]], ptr [[COL]], align 4
 343 ; NON-POW2-NEXT:    ret void
 344 ;
 345 ; POW2-ONLY-LABEL: define void @reuse_shuffle_indidces_1(
 346 ; POW2-ONLY-SAME: ptr [[COL:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
 347 ; POW2-ONLY-NEXT:  entry:
 348 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
 349 ; POW2-ONLY-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1
 350 ; POW2-ONLY-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
 351 ; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], zeroinitializer
 352 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP5]], ptr [[COL]], align 4
 353 ; POW2-ONLY-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr float, ptr [[COL]], i64 2
 354 ; POW2-ONLY-NEXT:    [[MUL38:%.*]] = fmul float [[TMP0]], 0.000000e+00
 355 ; POW2-ONLY-NEXT:    [[TMP6:%.*]] = fadd float [[MUL38]], 0.000000e+00
 356 ; POW2-ONLY-NEXT:    store float [[TMP6]], ptr [[ARRAYIDX33]], align 4
 357 ; POW2-ONLY-NEXT:    ret void
 358 ;
 359 entry:
 360   %mul24 = fmul float %1, 0.000000e+00
 361   %2 = fadd float %mul24, 0.000000e+00
 362   store float %2, ptr %col, align 4
 363   %arrayidx26 = getelementptr float, ptr %col, i64 1
 364   %mul31 = fmul float %0, 0.000000e+00
 365   %3 = fadd float %mul31, 0.000000e+00
 366   store float %3, ptr %arrayidx26, align 4
 367   %arrayidx33 = getelementptr float, ptr %col, i64 2
 368   %mul38 = fmul float %0, 0.000000e+00
 369   %4 = fadd float %mul38, 0.000000e+00
 370   store float %4, ptr %arrayidx33, align 4
 371   ret void
 372 }
 373
 374 define void @reuse_shuffle_indices_2(ptr %inertia, double %0) {
 375 ; CHECK-LABEL: define void @reuse_shuffle_indices_2(
 376 ; CHECK-SAME: ptr [[INERTIA:%.*]], double [[TMP0:%.*]]) {
 377 ; CHECK-NEXT:  entry:
 378 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
 379 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
 380 ; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
 381 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
 382 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
 383 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
 384 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[TMP6]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
 385 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
 386 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 387 ; CHECK-NEXT:    store <3 x float> [[TMP9]], ptr [[INERTIA]], align 4
 388 ; CHECK-NEXT:    ret void
 389 ;
 390 entry:
 391   %1 = insertelement <2 x double> poison, double %0, i32 0
 392   %2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer
 393   %3 = fptrunc <2 x double> %2 to <2 x float>
 394   %4 = fmul <2 x float> %3, zeroinitializer
 395   %5 = shufflevector <2 x float> %4, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
 396   %6 = fadd <4 x float> %5, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
 397   %7 = fmul <4 x float> %6, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
 398   %8 = fadd <4 x float> %7, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
 399   %9 = shufflevector <4 x float> %8, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 400   store <3 x float> %9, ptr %inertia, align 4
 401   ret void
 402 }
 403
 404 define void @reuse_shuffle_indices_cost_crash_2(ptr %bezt, float %0) {
 405 ; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_2(
 406 ; CHECK-SAME: ptr [[BEZT:%.*]], float [[TMP0:%.*]]) {
 407 ; CHECK-NEXT:  entry:
 408 ; CHECK-NEXT:    [[FNEG:%.*]] = fmul float [[TMP0]], 0.000000e+00
 409 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[FNEG]], float 0.000000e+00)
 410 ; CHECK-NEXT:    store float [[TMP1]], ptr [[BEZT]], align 4
 411 ; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr float, ptr [[BEZT]], i64 1
 412 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
 413 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[FNEG]], i32 0
 414 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> zeroinitializer
 415 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> [[TMP4]], <2 x float> zeroinitializer)
 416 ; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[ARRAYIDX5_I]], align 4
 417 ; CHECK-NEXT:    ret void
 418 ;
 419 entry:
 420   %fneg = fmul float %0, 0.000000e+00
 421   %1 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00)
 422   store float %1, ptr %bezt, align 4
 423   %2 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00)
 424   %arrayidx5.i = getelementptr float, ptr %bezt, i64 1
 425   store float %2, ptr %arrayidx5.i, align 4
 426   %3 = tail call float @llvm.fmuladd.f32(float %fneg, float 0.000000e+00, float 0.000000e+00)
 427   %arrayidx8.i831 = getelementptr float, ptr %bezt, i64 2
 428   store float %3, ptr %arrayidx8.i831, align 4
 429   ret void
 430 }
 431
 432 define void @reuse_shuffle_indices_cost_crash_3(ptr %m, double %conv, double %conv2) {
 433 ; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_3(
 434 ; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) {
 435 ; CHECK-NEXT:  entry:
 436 ; CHECK-NEXT:    [[SUB19:%.*]] = fsub double 0.000000e+00, [[CONV2]]
 437 ; CHECK-NEXT:    [[CONV20:%.*]] = fptrunc double [[SUB19]] to float
 438 ; CHECK-NEXT:    store float [[CONV20]], ptr [[M]], align 4
 439 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 0.000000e+00
 440 ; CHECK-NEXT:    [[CONV239:%.*]] = fptrunc double [[ADD]] to float
 441 ; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1
 442 ; CHECK-NEXT:    store float [[CONV239]], ptr [[ARRAYIDX25]], align 4
 443 ; CHECK-NEXT:    [[ADD26:%.*]] = fsub double [[CONV]], [[CONV]]
 444 ; CHECK-NEXT:    [[CONV27:%.*]] = fptrunc double [[ADD26]] to float
 445 ; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2
 446 ; CHECK-NEXT:    store float [[CONV27]], ptr [[ARRAYIDX29]], align 4
 447 ; CHECK-NEXT:    ret void
 448 ;
 449 entry:
 450   %sub19 = fsub double 0.000000e+00, %conv2
 451   %conv20 = fptrunc double %sub19 to float
 452   store float %conv20, ptr %m, align 4
 453   %add = fadd double %conv, 0.000000e+00
 454   %conv239 = fptrunc double %add to float
 455   %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1
 456   store float %conv239, ptr %arrayidx25, align 4
 457   %add26 = fsub double %conv, %conv
 458   %conv27 = fptrunc double %add26 to float
 459   %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2
 460   store float %conv27, ptr %arrayidx29, align 4
 461   ret void
 462 }
 463
 464 define void @reuse_shuffle_indices_cost_crash_4(double %conv7.i) {
 465 ; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_4(
 466 ; CHECK-SAME: double [[CONV7_I:%.*]]) {
 467 ; CHECK-NEXT:  entry:
 468 ; CHECK-NEXT:    [[DATA_I111:%.*]] = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4
 469 ; CHECK-NEXT:    [[ARRAYIDX_2_I:%.*]] = getelementptr [3 x float], ptr [[DATA_I111]], i64 0, i64 2
 470 ; CHECK-NEXT:    [[MUL17_I_US:%.*]] = fmul double [[CONV7_I]], 0.000000e+00
 471 ; CHECK-NEXT:    [[MUL_2_I_I_US:%.*]] = fmul double [[MUL17_I_US]], 0.000000e+00
 472 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV7_I]], i32 0
 473 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
 474 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer
 475 ; CHECK-NEXT:    [[ADD_2_I_I_US:%.*]] = fadd double [[MUL_2_I_I_US]], 0.000000e+00
 476 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]]
 477 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], zeroinitializer
 478 ; CHECK-NEXT:    [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
 479 ; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[DATA_I111]], align 4
 480 ; CHECK-NEXT:    [[CONV_2_I46_US:%.*]] = fptrunc double [[ADD_2_I_I_US]] to float
 481 ; CHECK-NEXT:    store float [[CONV_2_I46_US]], ptr [[ARRAYIDX_2_I]], align 4
 482 ; CHECK-NEXT:    [[CALL2_I_US:%.*]] = load volatile ptr, ptr [[DATA_I111]], align 8
 483 ; CHECK-NEXT:    ret void
 484 ;
 485 entry:
 486   %data.i111 = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4
 487   %arrayidx.1.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 1
 488   %arrayidx.2.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 2
 489   %mul17.i.us = fmul double %conv7.i, 0.000000e+00
 490   %mul.2.i.i.us = fmul double %mul17.i.us, 0.000000e+00
 491   %add.i.i82.i.us = fadd double %conv7.i, 0.000000e+00
 492   %add.1.i.i84.i.us = fadd double %conv7.i, 0.000000e+00
 493   %mul.i.i91.i.us = fmul double %add.i.i82.i.us, %conv7.i
 494   %mul.1.i.i92.i.us = fmul double %add.1.i.i84.i.us, %conv7.i
 495   %add.i96.i.us = fadd double %mul.i.i91.i.us, 0.000000e+00
 496   %add.1.i.i.us = fadd double %mul.1.i.i92.i.us, 0.000000e+00
 497   %add.2.i.i.us = fadd double %mul.2.i.i.us, 0.000000e+00
 498   %conv.i42.us = fptrunc double %add.i96.i.us to float
 499   store float %conv.i42.us, ptr %data.i111, align 4
 500   %conv.1.i44.us = fptrunc double %add.1.i.i.us to float
 501   store float %conv.1.i44.us, ptr %arrayidx.1.i, align 4
 502   %conv.2.i46.us = fptrunc double %add.2.i.i.us to float
 503   store float %conv.2.i46.us, ptr %arrayidx.2.i, align 4
 504   %call2.i.us = load volatile ptr, ptr %data.i111, align 8
 505   ret void
 506 }
 507
 508 define void @common_mask(ptr %m, double %conv, double %conv2) {
 509 ; CHECK-LABEL: define void @common_mask(
 510 ; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) {
 511 ; CHECK-NEXT:  entry:
 512 ; CHECK-NEXT:    [[SUB19:%.*]] = fsub double [[CONV]], [[CONV]]
 513 ; CHECK-NEXT:    [[CONV20:%.*]] = fptrunc double [[SUB19]] to float
 514 ; CHECK-NEXT:    store float [[CONV20]], ptr [[M]], align 4
 515 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[CONV2]], 0.000000e+00
 516 ; CHECK-NEXT:    [[CONV239:%.*]] = fptrunc double [[ADD]] to float
 517 ; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1
 518 ; CHECK-NEXT:    store float [[CONV239]], ptr [[ARRAYIDX25]], align 4
 519 ; CHECK-NEXT:    [[ADD26:%.*]] = fsub double 0.000000e+00, [[CONV]]
 520 ; CHECK-NEXT:    [[CONV27:%.*]] = fptrunc double [[ADD26]] to float
 521 ; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2
 522 ; CHECK-NEXT:    store float [[CONV27]], ptr [[ARRAYIDX29]], align 4
 523 ; CHECK-NEXT:    ret void
 524 ;
 525 entry:
 526   %sub19 = fsub double %conv, %conv
 527   %conv20 = fptrunc double %sub19 to float
 528   store float %conv20, ptr %m, align 4
 529   %add = fadd double %conv2, 0.000000e+00
 530   %conv239 = fptrunc double %add to float
 531   %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1
 532   store float %conv239, ptr %arrayidx25, align 4
 533   %add26 = fsub double 0.000000e+00, %conv
 534   %conv27 = fptrunc double %add26 to float
 535   %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2
 536   store float %conv27, ptr %arrayidx29, align 4
 537   ret void
 538 }
 539
 540 define void @vec3_extract(<3 x i16> %pixel.sroa.0.4.vec.insert606, ptr %call3.i536) {
 541 ; CHECK-LABEL: define void @vec3_extract(
 542 ; CHECK-SAME: <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606:%.*]], ptr [[CALL3_I536:%.*]]) {
 543 ; CHECK-NEXT:  entry:
 544 ; CHECK-NEXT:    [[PIXEL_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 2
 545 ; CHECK-NEXT:    [[RED668:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 2
 546 ; CHECK-NEXT:    store i16 [[PIXEL_SROA_0_4_VEC_EXTRACT]], ptr [[RED668]], align 2
 547 ; CHECK-NEXT:    [[PIXEL_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 1
 548 ; CHECK-NEXT:    [[GREEN670:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 1
 549 ; CHECK-NEXT:    store i16 [[PIXEL_SROA_0_2_VEC_EXTRACT]], ptr [[GREEN670]], align 2
 550 ; CHECK-NEXT:    [[PIXEL_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 0
 551 ; CHECK-NEXT:    store i16 [[PIXEL_SROA_0_0_VEC_EXTRACT]], ptr [[CALL3_I536]], align 2
 552 ; CHECK-NEXT:    ret void
 553 ;
 554 entry:
 555   %pixel.sroa.0.4.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 2
 556   %red668 = getelementptr i16, ptr %call3.i536, i64 2
 557   store i16 %pixel.sroa.0.4.vec.extract, ptr %red668, align 2
 558   %pixel.sroa.0.2.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 1
 559   %green670 = getelementptr i16, ptr %call3.i536, i64 1
 560   store i16 %pixel.sroa.0.2.vec.extract, ptr %green670, align 2
 561   %pixel.sroa.0.0.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 0
 562   store i16 %pixel.sroa.0.0.vec.extract, ptr %call3.i536, align 2
 563   ret void
 564 }
 565
 566 define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) {
 567 ; NON-POW2-LABEL: define void @can_reorder_vec3_op_with_padding(
 568 ; NON-POW2-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) {
 569 ; NON-POW2-NEXT:  entry:
 570 ; NON-POW2-NEXT:    [[TMP1:%.*]] = fsub <3 x float> [[IN]], [[IN]]
 571 ; NON-POW2-NEXT:    [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> splat (float 2.000000e+00), <3 x float> splat (float 3.000000e+00))
 572 ; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], splat (float 3.000000e+00)
 573 ; NON-POW2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 574 ; NON-POW2-NEXT:    store <3 x float> [[TMP4]], ptr [[A]], align 4
 575 ; NON-POW2-NEXT:    ret void
 576 ;
 577 ; POW2-ONLY-LABEL: define void @can_reorder_vec3_op_with_padding(
 578 ; POW2-ONLY-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) {
 579 ; POW2-ONLY-NEXT:  entry:
 580 ; POW2-ONLY-NEXT:    [[ARRAYIDX42_I:%.*]] = getelementptr float, ptr [[A]], i64 2
 581 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> [[IN]], i64 0
 582 ; POW2-ONLY-NEXT:    [[SUB_I362:%.*]] = fsub float [[TMP0]], [[TMP0]]
 583 ; POW2-ONLY-NEXT:    [[TMP1:%.*]] = call float @llvm.fmuladd.f32(float [[SUB_I362]], float 2.000000e+00, float 3.000000e+00)
 584 ; POW2-ONLY-NEXT:    [[MUL6_I_I_I_I:%.*]] = fmul float [[TMP1]], 3.000000e+00
 585 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[IN]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
 586 ; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fsub <2 x float> [[TMP2]], [[TMP2]]
 587 ; POW2-ONLY-NEXT:    [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 3.000000e+00))
 588 ; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], splat (float 3.000000e+00)
 589 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP5]], ptr [[A]], align 4
 590 ; POW2-ONLY-NEXT:    store float [[MUL6_I_I_I_I]], ptr [[ARRAYIDX42_I]], align 4
 591 ; POW2-ONLY-NEXT:    ret void
 592 ;
 593 entry:
 594   %arrayidx42.i = getelementptr float, ptr %A, i64 2
 595   %arrayidx35.i = getelementptr float, ptr %A, i64 1
 596   %0 = extractelement <3 x float> %in, i64 0
 597   %1 = extractelement <3 x float> %in, i64 0
 598   %sub.i362 = fsub float %0, %1
 599   %2 = extractelement <3 x float> %in, i64 1
 600   %3 = extractelement <3 x float> %in, i64 1
 601   %sub5.i = fsub float %2, %3
 602   %4 = extractelement <3 x float> %in, i64 2
 603   %5 = extractelement <3 x float> %in, i64 2
 604   %sub9.i = fsub float %4, %5
 605   %6 = call float @llvm.fmuladd.f32(float %sub5.i, float 2.000000e+00, float 3.000000e+00)
 606   %7 = call float @llvm.fmuladd.f32(float %sub9.i, float 2.000000e+00, float 3.000000e+00)
 607   %8 = call float @llvm.fmuladd.f32(float %sub.i362, float 2.000000e+00, float 3.000000e+00)
 608   %mul.i.i.i.i373 = fmul float %6, 3.000000e+00
 609   %mul3.i.i.i.i = fmul float %7, 3.000000e+00
 610   %mul6.i.i.i.i = fmul float %8, 3.000000e+00
 611   store float %mul.i.i.i.i373, ptr %A, align 4
 612   store float %mul3.i.i.i.i, ptr %arrayidx35.i, align 4
 613   store float %mul6.i.i.i.i, ptr %arrayidx42.i, align 4
 614   ret void
 615 }
 616
 617 declare float @llvm.fmuladd.f32(float, float, float)
 618 declare double @llvm.fmuladd.f64(double, double, double)