1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
3 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
5 define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
6 ; CHECK-LABEL: @v3_load_i32_mul_by_constant_store(
8 ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
9 ; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
10 ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
11 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
12 ; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
13 ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
14 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
15 ; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
16 ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
17 ; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
18 ; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
19 ; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4
20 ; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
21 ; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
22 ; CHECK-NEXT: ret void
25 %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
26 %l.src.0 = load i32, ptr %gep.src.0, align 4
27 %mul.0 = mul nsw i32 %l.src.0, 10
29 %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
30 %l.src.1 = load i32, ptr %gep.src.1, align 4
31 %mul.1 = mul nsw i32 %l.src.1, 10
33 %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
34 %l.src.2 = load i32, ptr %gep.src.2, align 4
35 %mul.2 = mul nsw i32 %l.src.2, 10
37 store i32 %mul.0, ptr %dst
39 %dst.1 = getelementptr i32, ptr %dst, i32 1
40 store i32 %mul.1, ptr %dst.1
42 %dst.2 = getelementptr i32, ptr %dst, i32 2
43 store i32 %mul.2, ptr %dst.2
48 define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) {
49 ; CHECK-LABEL: @v3_load_i32_mul_store(
51 ; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
52 ; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4
53 ; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
54 ; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4
55 ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]]
56 ; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1
57 ; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4
58 ; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1
59 ; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4
60 ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]]
61 ; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
62 ; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
63 ; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
64 ; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
65 ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
66 ; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
67 ; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
68 ; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4
69 ; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
70 ; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
71 ; CHECK-NEXT: ret void
74 %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
75 %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
76 %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
77 %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
78 %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
80 %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
81 %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
82 %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
83 %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
84 %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
86 %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
87 %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
88 %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
89 %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
90 %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
92 store i32 %mul.0, ptr %dst
94 %dst.1 = getelementptr i32, ptr %dst, i32 1
95 store i32 %mul.1, ptr %dst.1
97 %dst.2 = getelementptr i32, ptr %dst, i32 2
98 store i32 %mul.2, ptr %dst.2
103 define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
104 ; CHECK-LABEL: @v3_load_i32_mul_add_const_store(
106 ; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
107 ; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4
108 ; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
109 ; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4
110 ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]]
111 ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], 9
112 ; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1
113 ; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4
114 ; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1
115 ; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4
116 ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]]
117 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], 9
118 ; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
119 ; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
120 ; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
121 ; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
122 ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
123 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9
124 ; CHECK-NEXT: store i32 [[ADD_0]], ptr [[DST:%.*]], align 4
125 ; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
126 ; CHECK-NEXT: store i32 [[ADD_1]], ptr [[DST_1]], align 4
127 ; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
128 ; CHECK-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4
129 ; CHECK-NEXT: ret void
132 %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
133 %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
134 %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
135 %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
136 %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
137 %add.0 = add i32 %mul.0, 9
139 %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
140 %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
141 %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
142 %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
143 %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
144 %add.1 = add i32 %mul.1, 9
146 %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
147 %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
148 %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
149 %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
150 %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
151 %add.2 = add i32 %mul.2, 9
153 store i32 %add.0, ptr %dst
155 %dst.1 = getelementptr i32, ptr %dst, i32 1
156 store i32 %add.1, ptr %dst.1
158 %dst.2 = getelementptr i32, ptr %dst, i32 2
159 store i32 %add.2, ptr %dst.2
164 define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
165 ; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
166 ; NON-POW2-NEXT: entry:
167 ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
168 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
169 ; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
170 ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
171 ; NON-POW2-NEXT: ret void
173 ; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
174 ; POW2-ONLY-NEXT: entry:
175 ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
176 ; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
177 ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
178 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
179 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
180 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
181 ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
182 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
183 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4
184 ; POW2-ONLY-NEXT: ret void
187 %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
188 %l.src.0 = load float , ptr %gep.src.0, align 4
189 %fadd.0 = fadd float %l.src.0, 10.0
191 %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1
192 %l.src.1 = load float, ptr %gep.src.1, align 4
193 %fadd.1 = fadd float %l.src.1, 10.0
195 %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
196 %l.src.2 = load float, ptr %gep.src.2, align 4
197 %fadd.2 = fadd float %l.src.2, 10.0
199 store float %fadd.0, ptr %dst
201 %dst.1 = getelementptr float, ptr %dst, i32 1
202 store float %fadd.1, ptr %dst.1
204 %dst.2 = getelementptr float, ptr %dst, i32 2
205 store float %fadd.2, ptr %dst.2
210 define void @phi_store3(ptr %dst) {
211 ; CHECK-LABEL: @phi_store3(
213 ; CHECK-NEXT: br label [[EXIT:%.*]]
214 ; CHECK: invoke.cont8.loopexit:
215 ; CHECK-NEXT: br label [[EXIT]]
217 ; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
218 ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
219 ; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
220 ; CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4
221 ; CHECK-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4
222 ; CHECK-NEXT: ret void
227 invoke.cont8.loopexit: ; No predecessors!
231 %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ]
232 %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ]
233 %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ]
235 %dst.1 = getelementptr i32, ptr %dst, i32 1
236 %dst.2 = getelementptr i32, ptr %dst, i32 2
238 store i32 %p.0, ptr %dst, align 4
239 store i32 %p.1, ptr %dst.1, align 4
240 store i32 %p.2, ptr %dst.2, align 4
244 define void @store_try_reorder(ptr %dst) {
245 ; CHECK-LABEL: @store_try_reorder(
247 ; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0
248 ; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
249 ; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
250 ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
251 ; CHECK-NEXT: ret void
255 store i32 %add, ptr %dst, align 4
256 %add207 = sub i32 0, 0
257 %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1
258 store i32 %add207, ptr %arrayidx.i1887, align 4
259 %add216 = sub i32 0, 0
260 %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2
261 store i32 %add216, ptr %arrayidx.i1891, align 4
265 define void @vec3_fpext_cost(ptr %Colour, float %0) {
266 ; CHECK-LABEL: @vec3_fpext_cost(
268 ; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
269 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0
270 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
271 ; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
272 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
273 ; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
274 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4
275 ; CHECK-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double
276 ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00)
277 ; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float
278 ; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
279 ; CHECK-NEXT: ret void
282 %arrayidx72 = getelementptr float, ptr %Colour, i64 1
283 %arrayidx80 = getelementptr float, ptr %Colour, i64 2
284 %conv62 = fpext float %0 to double
285 %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00)
286 %conv66 = fptrunc double %1 to float
287 store float %conv66, ptr %Colour, align 4
288 %conv70 = fpext float %0 to double
289 %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00)
290 %conv74 = fptrunc double %2 to float
291 store float %conv74, ptr %arrayidx72, align 4
292 %conv78 = fpext float %0 to double
293 %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00)
294 %conv82 = fptrunc double %3 to float
295 store float %conv82, ptr %arrayidx80, align 4
299 define void @fpext_gather(ptr %dst, double %conv) {
300 ; CHECK-LABEL: @fpext_gather(
302 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV:%.*]], i32 0
303 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
304 ; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
305 ; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0
306 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
307 ; CHECK-NEXT: store float [[TMP3]], ptr [[LENGTHS]], align 4
308 ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1
309 ; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[ARRAYIDX32]], align 4
310 ; CHECK-NEXT: ret void
313 %conv25 = fptrunc double %conv to float
314 %Lengths = getelementptr float, ptr %dst, i64 0
315 store float %conv25, ptr %Lengths, align 4
316 %arrayidx32 = getelementptr float, ptr %dst, i64 1
317 store float %conv25, ptr %arrayidx32, align 4
318 %conv34 = fptrunc double %conv to float
319 %arrayidx37 = getelementptr float, ptr %dst, i64 2
320 store float %conv34, ptr %arrayidx37, align 4
324 declare float @llvm.fmuladd.f32(float, float, float)
326 declare double @llvm.fmuladd.f64(double, double, double)