1 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
2 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
3 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK
5 define i64 @select_icmp_const_1(ptr nocapture readonly %a, i64 %n) {
6 ; CHECK-LABEL: define i64 @select_icmp_const_1
7 ; CHECK-NOT: vector.body:
12 for.body: ; preds = %entry, %for.body
13 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
14 %rdx = phi i64 [ %cond, %for.body ], [ 3, %entry ]
15 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
16 %0 = load i64, ptr %arrayidx, align 8
17 %cmp2 = icmp eq i64 %0, 3
18 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
19 %inc = add nuw nsw i64 %iv, 1
20 %exitcond.not = icmp eq i64 %inc, %n
21 br i1 %exitcond.not, label %exit, label %for.body
23 exit: ; preds = %for.body
27 define i64 @select_icmp_const_2(ptr nocapture readonly %a, i64 %n) {
28 ; CHECK-LABEL: define i64 @select_icmp_const_2
29 ; CHECK-NOT: vector.body:
34 for.body: ; preds = %entry, %for.body
35 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
36 %rdx = phi i64 [ %cond, %for.body ], [ 3, %entry ]
37 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
38 %0 = load i64, ptr %arrayidx, align 8
39 %cmp2 = icmp eq i64 %0, 3
40 %cond = select i1 %cmp2, i64 %rdx, i64 %iv
41 %inc = add nuw nsw i64 %iv, 1
42 %exitcond.not = icmp eq i64 %inc, %n
43 br i1 %exitcond.not, label %exit, label %for.body
45 exit: ; preds = %for.body
49 define i64 @select_icmp_const_3_variable_rdx_start(ptr nocapture readonly %a, i64 %rdx.start, i64 %n) {
50 ; CHECK-LABEL: define i64 @select_icmp_const_3_variable_rdx_start
51 ; CHECK-NOT: vector.body:
56 for.body: ; preds = %entry, %for.body
57 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
58 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
59 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
60 %0 = load i64, ptr %arrayidx, align 8
61 %cmp2 = icmp eq i64 %0, 3
62 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
63 %inc = add nuw nsw i64 %iv, 1
64 %exitcond.not = icmp eq i64 %inc, %n
65 br i1 %exitcond.not, label %exit, label %for.body
67 exit: ; preds = %for.body
71 define i64 @select_fcmp_const_fast(ptr nocapture readonly %a, i64 %n) {
72 ; CHECK-LABEL: define i64 @select_fcmp_const_fast
73 ; CHECK-NOT: vector.body:
78 for.body: ; preds = %entry, %for.body
79 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
80 %rdx = phi i64 [ %cond, %for.body ], [ 2, %entry ]
81 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
82 %0 = load float, ptr %arrayidx, align 4
83 %cmp2 = fcmp fast ueq float %0, 3.0
84 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
85 %inc = add nuw nsw i64 %iv, 1
86 %exitcond.not = icmp eq i64 %inc, %n
87 br i1 %exitcond.not, label %exit, label %for.body
89 exit: ; preds = %for.body
93 define i64 @select_fcmp_const(ptr nocapture readonly %a, i64 %n) {
94 ; CHECK-LABEL: define i64 @select_fcmp_const
95 ; CHECK-NOT: vector.body:
100 for.body: ; preds = %entry, %for.body
101 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
102 %rdx = phi i64 [ %cond, %for.body ], [ 2, %entry ]
103 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
104 %0 = load float, ptr %arrayidx, align 4
105 %cmp2 = fcmp ueq float %0, 3.0
106 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
107 %inc = add nuw nsw i64 %iv, 1
108 %exitcond.not = icmp eq i64 %inc, %n
109 br i1 %exitcond.not, label %exit, label %for.body
111 exit: ; preds = %for.body
115 define i64 @select_icmp(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %rdx.start, i64 %n) {
116 ; CHECK-LABEL: define i64 @select_icmp
117 ; CHECK-NOT: vector.body:
122 for.body: ; preds = %entry, %for.body
123 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
124 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
125 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
126 %0 = load i64, ptr %arrayidx, align 8
127 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
128 %1 = load i64, ptr %arrayidx1, align 8
129 %cmp2 = icmp sgt i64 %0, %1
130 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
131 %inc = add nuw nsw i64 %iv, 1
132 %exitcond.not = icmp eq i64 %inc, %n
133 br i1 %exitcond.not, label %exit, label %for.body
135 exit: ; preds = %for.body
139 define i64 @select_fcmp(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %rdx.start, i64 %n) {
140 ; CHECK-LABEL: define i64 @select_fcmp
141 ; CHECK-NOT: vector.body:
146 for.body: ; preds = %entry, %for.body
147 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
148 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
149 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
150 %0 = load float, ptr %arrayidx, align 4
151 %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %iv
152 %1 = load float, ptr %arrayidx1, align 4
153 %cmp2 = fcmp ogt float %0, %1
154 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
155 %inc = add nuw nsw i64 %iv, 1
156 %exitcond.not = icmp eq i64 %inc, %n
157 br i1 %exitcond.not, label %exit, label %for.body
159 exit: ; preds = %for.body
163 define i64 @select_icmp_min_valid_iv_start(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %rdx.start, i64 %n) {
164 ; CHECK-LABEL: define i64 @select_icmp_min_valid_iv_start
165 ; CHECK-NOT: vector.body:
170 for.body: ; preds = %entry, %for.body
171 %iv.j = phi i64 [ %inc3, %for.body ], [ -9223372036854775807, %entry]
172 %iv.i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
173 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
174 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv.i
175 %0 = load i64, ptr %arrayidx, align 8
176 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv.i
177 %1 = load i64, ptr %arrayidx1, align 8
178 %cmp2 = icmp sgt i64 %0, %1
179 %cond = select i1 %cmp2, i64 %iv.j, i64 %rdx
180 %inc = add nuw nsw i64 %iv.i, 1
181 %inc3 = add nsw i64 %iv.j, 1
182 %exitcond.not = icmp eq i64 %inc, %n
183 br i1 %exitcond.not, label %exit, label %for.body
185 exit: ; preds = %for.body
191 ; This test can theoretically be vectorized without a runtime-check, by
192 ; pattern-matching on the constructs that are introduced by IndVarSimplify.
193 ; We can check two things:
194 ; %1 = trunc i64 %iv to i32
195 ; This indicates that the %iv is truncated to i32. We can then check the loop
196 ; guard is a signed i32:
197 ; %cmp.sgt = icmp sgt i32 %n, 0
198 ; and successfully vectorize the case without a runtime-check.
199 define i32 @not_vectorized_select_icmp_const_truncated_iv_widened_exit(ptr nocapture readonly %a, i32 %n) {
200 ; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_widened_exit
201 ; CHECK-NOT: vector.body:
204 %cmp.sgt = icmp sgt i32 %n, 0
205 br i1 %cmp.sgt, label %for.body.preheader, label %exit
207 for.body.preheader: ; preds = %entry
208 %wide.trip.count = zext i32 %n to i64
211 for.body: ; preds = %for.body.preheader, %for.body
212 %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
213 %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
214 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
215 %0 = load i64, ptr %arrayidx, align 8
216 %cmp = icmp sgt i64 %0, 3
217 %1 = trunc i64 %iv to i32
218 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
219 %inc = add nuw nsw i64 %iv, 1
220 %exitcond.not = icmp eq i64 %inc, %wide.trip.count
221 br i1 %exitcond.not, label %exit, label %for.body
223 exit: ; preds = %for.body, %entry
224 %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
228 ; This test can theoretically be vectorized without a runtime-check, by
229 ; pattern-matching on the constructs that are introduced by IndVarSimplify.
230 ; We can check two things:
231 ; %1 = trunc i64 %iv to i32
232 ; This indicates that the %iv is truncated to i32. We can then check the loop
233 ; exit condition, which compares to a constant that fits within i32:
234 ; %exitcond.not = icmp eq i64 %inc, 20000
235 ; and successfully vectorize the case without a runtime-check.
236 define i32 @not_vectorized_select_icmp_const_truncated_iv_const_exit(ptr nocapture readonly %a) {
237 ; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_const_exit
238 ; CHECK-NOT: vector.body:
243 for.body: ; preds = %entry, %for.body
244 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
245 %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
246 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
247 %0 = load i64, ptr %arrayidx, align 8
248 %cmp = icmp sgt i64 %0, 3
249 %1 = trunc i64 %iv to i32
250 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
251 %inc = add nuw nsw i64 %iv, 1
252 %exitcond.not = icmp eq i64 %inc, 20000
253 br i1 %exitcond.not, label %exit, label %for.body
255 exit: ; preds = %for.body
259 ; This test can theoretically be vectorized, but only with a runtime-check.
260 ; The construct that are introduced by IndVarSimplify is:
261 ; %1 = trunc i64 %iv to i32
262 ; However, the loop guard is an i64:
263 ; %cmp.sgt = icmp sgt i64 %n, 0
264 ; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
265 ; sentinel value), and need a runtime-check to vectorize this case.
266 define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr nocapture readonly %a, i64 %n) {
267 ; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit
268 ; CHECK-NOT: vector.body:
271 %cmp.sgt = icmp sgt i64 %n, 0
272 br i1 %cmp.sgt, label %for.body, label %exit
274 for.body: ; preds = %entry, %for.body
275 %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
276 %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
277 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
278 %0 = load i32, ptr %arrayidx, align 4
279 %cmp = icmp sgt i32 %0, 3
280 %1 = trunc i64 %iv to i32
281 %spec.select = select i1 %cmp, i32 %1, i32 %rdx
282 %inc = add nuw nsw i64 %iv, 1
283 %exitcond.not = icmp eq i64 %inc, %n
284 br i1 %exitcond.not, label %exit, label %for.body
286 exit: ; preds = %for.body, %entry
287 %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
291 ; This test can theoretically be vectorized, but only with a runtime-check.
292 ; The construct that are introduced by IndVarSimplify is:
293 ; %1 = trunc i64 %iv to i32
294 ; However, the loop guard is unsigned:
295 ; %cmp.not = icmp eq i32 %n, 0
296 ; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
297 ; sentinel value), and need a runtime-check to vectorize this case.
298 define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr nocapture readonly %a, i32 %n) {
299 ; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard
300 ; CHECK-NOT: vector.body:
303 %cmp.not = icmp eq i32 %n, 0
304 br i1 %cmp.not, label %exit, label %for.body.preheader
306 for.body.preheader: ; preds = %entry
307 %wide.trip.count = zext i32 %n to i64
310 for.body: ; preds = %for.body.preheader, %for.body
311 %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
312 %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
313 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
314 %0 = load i32, ptr %arrayidx, align 4
315 %cmp1 = icmp sgt i32 %0, 3
316 %1 = trunc i64 %iv to i32
317 %spec.select = select i1 %cmp1, i32 %1, i32 %rdx
318 %inc = add nuw nsw i64 %iv, 1
319 %exitcond.not = icmp eq i64 %inc, %wide.trip.count
320 br i1 %exitcond.not, label %exit, label %for.body
322 exit: ; preds = %for.body, %entry
323 %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
327 ; This test cannot be vectorized, even with a runtime check.
328 ; The construct that are introduced by IndVarSimplify is:
329 ; %1 = trunc i64 %iv to i32
330 ; However, the loop exit condition is a constant that overflows i32:
331 ; %exitcond.not = icmp eq i64 %inc, 4294967294
332 ; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
333 ; cannot vectorize this case.
334 define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr nocapture readonly %a) {
335 ; CHECK-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound
336 ; CHECK-NOT: vector.body:
341 for.body: ; preds = %entry, %for.body
342 %iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ]
343 %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
344 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
345 %0 = load i32, ptr %arrayidx, align 4
346 %cmp = icmp sgt i32 %0, 3
347 %conv = trunc i64 %iv to i32
348 %spec.select = select i1 %cmp, i32 %conv, i32 %rdx
349 %inc = add nuw nsw i64 %iv, 1
350 %exitcond.not = icmp eq i64 %inc, 4294967294
351 br i1 %exitcond.not, label %exit, label %for.body
353 exit: ; preds = %for.body
357 define float @not_vectorized_select_float_induction_icmp(ptr nocapture readonly %a, ptr nocapture readonly %b, float %rdx.start, i64 %n) {
358 ; CHECK-LABEL: @not_vectorized_select_float_induction_icmp
359 ; CHECK-NOT: vector.body:
364 for.body: ; preds = %entry, %for.body
365 %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
366 %fiv = phi float [ %conv3, %for.body ], [ 0.000000e+00, %entry ]
367 %rdx = phi float [ %cond, %for.body ], [ %rdx.start, %entry ]
368 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
369 %0 = load i64, ptr %arrayidx, align 8
370 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
371 %1 = load i64, ptr %arrayidx1, align 8
372 %cmp2 = icmp sgt i64 %0, %1
373 %cond = select i1 %cmp2, float %fiv, float %rdx
374 %conv3 = fadd float %fiv, 1.000000e+00
375 %inc = add nuw nsw i64 %iv, 1
376 %exitcond.not = icmp eq i64 %inc, %n
377 br i1 %exitcond.not, label %exit, label %for.body
379 exit: ; preds = %for.body
383 define i64 @not_vectorized_select_decreasing_induction_icmp_const_start(ptr nocapture readonly %a) {
384 ; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_const_start
385 ; CHECK-NOT: vector.body:
390 for.body: ; preds = %entry, %for.body
391 %iv = phi i64 [ 19999, %entry ], [ %dec, %for.body ]
392 %rdx = phi i64 [ 331, %entry ], [ %spec.select, %for.body ]
393 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
394 %0 = load i64, ptr %arrayidx, align 8
395 %cmp = icmp sgt i64 %0, 3
396 %spec.select = select i1 %cmp, i64 %iv, i64 %rdx
397 %dec = add nsw i64 %iv, -1
398 %cmp.not = icmp eq i64 %iv, 0
399 br i1 %cmp.not, label %exit, label %for.body
401 exit: ; preds = %for.body
405 define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %rdx.start, i64 %n) {
406 ; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_non_const_start
407 ; CHECK-NOT: vector.body:
412 for.body: ; preds = %entry, %for.body
413 %i.0.in10 = phi i64 [ %iv, %for.body ], [ %n, %entry ]
414 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
415 %iv = add nsw i64 %i.0.in10, -1
416 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
417 %0 = load i64, ptr %arrayidx, align 8
418 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
419 %1 = load i64, ptr %arrayidx1, align 8
420 %cmp2 = icmp sgt i64 %0, %1
421 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
422 %cmp = icmp ugt i64 %i.0.in10, 1
423 br i1 %cmp, label %for.body, label %exit
425 exit: ; preds = %for.body
429 define i64 @not_vectorized_select_icmp_iv_out_of_bound(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %rdx.start, i64 %n) {
430 ; CHECK-LABEL: @not_vectorized_select_icmp_iv_out_of_bound
431 ; CHECK-NOT: vector.body:
436 for.body: ; preds = %entry, %for.body
437 %iv.j = phi i64 [ %inc3, %for.body ], [ -9223372036854775808, %entry]
438 %iv.i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
439 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
440 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv.i
441 %0 = load i64, ptr %arrayidx, align 8
442 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv.i
443 %1 = load i64, ptr %arrayidx1, align 8
444 %cmp2 = icmp sgt i64 %0, %1
445 %cond = select i1 %cmp2, i64 %iv.j, i64 %rdx
446 %inc = add nuw nsw i64 %iv.i, 1
447 %inc3 = add nsw i64 %iv.j, 1
448 %exitcond.not = icmp eq i64 %inc, %n
449 br i1 %exitcond.not, label %exit, label %for.body
451 exit: ; preds = %for.body
455 define i64 @not_vectorized_select_icmp_non_const_iv_start_value(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %ivstart, i64 %rdx.start, i64 %n) {
456 ; CHECK-LABEL: define i64 @not_vectorized_select_icmp_non_const_iv_start_value
457 ; CHECK-NOT: vector.body:
462 for.body: ; preds = %entry, %for.body
463 %iv = phi i64 [ %inc, %for.body ], [ %ivstart, %entry ]
464 %rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
465 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
466 %0 = load i64, ptr %arrayidx, align 8
467 %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
468 %1 = load i64, ptr %arrayidx1, align 8
469 %cmp2 = icmp sgt i64 %0, %1
470 %cond = select i1 %cmp2, i64 %iv, i64 %rdx
471 %inc = add nuw nsw i64 %iv, 1
472 %exitcond.not = icmp eq i64 %inc, %n
473 br i1 %exitcond.not, label %exit, label %for.body
475 exit: ; preds = %for.body