1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
3 ; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
5 declare void @init_mem(ptr, i64);
7 ; == SOME LEGAL EXAMPLES ==
9 ; The form of the induction variables requires SCEV predicates.
10 define i32 @diff_exit_block_needs_scev_check(i32 %end) {
11 ; CHECK-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check'
12 ; CHECK: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
13 ; CHECK-NEXT: LV: We can vectorize this loop!
14 ; CHECK-NOT: LV: Not vectorizing:
16 %p1 = alloca [1024 x i32]
17 %p2 = alloca [1024 x i32]
18 call void @init_mem(ptr %p1, i64 1024)
19 call void @init_mem(ptr %p2, i64 1024)
20 %end.clamped = and i32 %end, 1023
24 %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
25 %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
26 %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
27 %0 = load i32, ptr %arrayidx1, align 4
28 %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
29 %1 = load i32, ptr %arrayidx2, align 4
30 %cmp.early = icmp eq i32 %0, %1
31 br i1 %cmp.early, label %found, label %for.inc
34 %ind.next = add i8 %ind, 1
35 %conv = zext i8 %ind.next to i32
36 %gep.ind.next = add i64 %gep.ind, 1
37 %cmp = icmp ult i32 %conv, %end.clamped
38 br i1 %cmp, label %for.body, label %exit
48 define i64 @same_exit_block_pre_inc_use1() {
49 ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
50 ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
51 ; CHECK-NEXT: LV: We can vectorize this loop!
52 ; CHECK-NOT: LV: Not vectorizing
54 %p1 = alloca [1024 x i8]
55 %p2 = alloca [1024 x i8]
56 call void @init_mem(ptr %p1, i64 1024)
57 call void @init_mem(ptr %p2, i64 1024)
61 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
62 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
63 %ld1 = load i8, ptr %arrayidx, align 1
64 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
65 %ld2 = load i8, ptr %arrayidx1, align 1
66 %cmp3 = icmp eq i8 %ld1, %ld2
67 br i1 %cmp3, label %loop.inc, label %loop.end
70 %index.next = add i64 %index, 1
71 %exitcond = icmp ne i64 %index.next, 67
72 br i1 %exitcond, label %loop, label %loop.end
75 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
80 define i64 @loop_contains_safe_call() {
81 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_safe_call'
82 ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
83 ; CHECK-NEXT: LV: We can vectorize this loop!
85 %p1 = alloca [1024 x i8]
86 %p2 = alloca [1024 x i8]
87 call void @init_mem(ptr %p1, i64 1024)
88 call void @init_mem(ptr %p2, i64 1024)
92 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
93 %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index
94 %ld1 = load float, ptr %arrayidx, align 1
95 %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1)
96 %cmp = fcmp fast ult float %sqrt, 3.0e+00
97 br i1 %cmp, label %loop.inc, label %loop.end
100 %index.next = add i64 %index, 1
101 %exitcond = icmp ne i64 %index.next, 67
102 br i1 %exitcond, label %loop, label %loop.end
105 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
110 define i64 @loop_contains_safe_div() {
111 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_safe_div'
112 ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
113 ; CHECK-NEXT: LV: We can vectorize this loop!
115 %p1 = alloca [1024 x i8]
116 %p2 = alloca [1024 x i8]
117 call void @init_mem(ptr %p1, i64 1024)
118 call void @init_mem(ptr %p2, i64 1024)
122 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
123 %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
124 %ld1 = load i32, ptr %arrayidx, align 1
125 %div = udiv i32 %ld1, 20000
126 %cmp = icmp eq i32 %div, 1
127 br i1 %cmp, label %loop.inc, label %loop.end
130 %index.next = add i64 %index, 1
131 %exitcond = icmp ne i64 %index.next, 67
132 br i1 %exitcond, label %loop, label %loop.end
135 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
140 define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) {
141 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
142 ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
143 ; CHECK-NEXT: LV: We can vectorize this loop!
144 ; CHECK-NOT: LV: Not vectorizing
146 %p1 = alloca [1024 x i8]
147 call void @init_mem(ptr %p1, i64 1024)
151 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
152 %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
153 %ld1 = load i32, ptr %arrayidx, align 1
154 %cmp = icmp eq i32 %ld1, 1
155 br i1 %cmp, label %loop.inc, label %loop.end
158 %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index
159 %ld2 = load i64, ptr %arrayidx2, align 8
160 %index.next = add i64 %index, 1
161 %exitcond = icmp ne i64 %index.next, 67
162 br i1 %exitcond, label %loop, label %loop.end
165 %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ]
170 define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
171 ; CHECK-LABEL: LV: Checking a loop in 'one_uncountable_two_countable_same_exit_phi_of_consts'
172 ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 61
173 ; CHECK-NEXT: LV: We can vectorize this loop!
174 ; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops requiring a scalar epilogue is unsupported.
176 %p1 = alloca [1024 x i8]
177 %p2 = alloca [1024 x i8]
178 call void @init_mem(ptr %p1, i64 1024)
179 call void @init_mem(ptr %p2, i64 1024)
183 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
184 %cmp1 = icmp ne i64 %index, 64
185 br i1 %cmp1, label %search, label %loop.end
188 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
189 %ld1 = load i8, ptr %arrayidx, align 1
190 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
191 %ld2 = load i8, ptr %arrayidx1, align 1
192 %cmp3 = icmp eq i8 %ld1, %ld2
193 br i1 %cmp3, label %loop.end, label %loop.inc
196 %index.next = add i64 %index, 1
197 %exitcond = icmp ne i64 %index.next, 128
198 br i1 %exitcond, label %loop, label %loop.end
201 %retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ]
206 ; == SOME ILLEGAL EXAMPLES ==
209 define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
210 ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas'
211 ; CHECK: LV: Not vectorizing: Loop may fault.
213 %p1 = alloca [42 x i8]
214 %p2 = alloca [42 x i8]
215 call void @init_mem(ptr %p1, i64 1024)
216 call void @init_mem(ptr %p2, i64 1024)
220 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
221 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
222 %ld1 = load i8, ptr %arrayidx, align 1
223 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
224 %ld2 = load i8, ptr %arrayidx1, align 1
225 %cmp3 = icmp eq i8 %ld1, %ld2
226 br i1 %cmp3, label %loop.inc, label %loop.end
229 %index.next = add i64 %index, 1
230 %exitcond = icmp ne i64 %index.next, 67
231 br i1 %exitcond, label %loop, label %loop.end
234 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
239 define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) {
240 ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_deref_ptrs'
241 ; CHECK: LV: Not vectorizing: Loop may fault.
246 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
247 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
248 %ld1 = load i8, ptr %arrayidx, align 1
249 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
250 %ld2 = load i8, ptr %arrayidx1, align 1
251 %cmp3 = icmp eq i8 %ld1, %ld2
252 br i1 %cmp3, label %loop.inc, label %loop.end
255 %index.next = add i64 %index, 1
256 %exitcond = icmp ne i64 %index.next, 67
257 br i1 %exitcond, label %loop, label %loop.end
260 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
265 define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) {
266 ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_unknown_ptrs'
267 ; CHECK: LV: Not vectorizing: Loop may fault.
272 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
273 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
274 %ld1 = load i8, ptr %arrayidx, align 1
275 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
276 %ld2 = load i8, ptr %arrayidx1, align 1
277 %cmp3 = icmp eq i8 %ld1, %ld2
278 br i1 %cmp3, label %loop.inc, label %loop.end
281 %index.next = add i64 %index, 1
282 %exitcond = icmp ne i64 %index.next, 67
283 br i1 %exitcond, label %loop, label %loop.end
286 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
291 ; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't
293 define i64 @uncountable_exit_on_last_block() {
294 ; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_on_last_block'
295 ; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor.
297 %p1 = alloca [1024 x i8]
298 %p2 = alloca [1024 x i8]
299 call void @init_mem(ptr %p1, i64 1024)
300 call void @init_mem(ptr %p2, i64 1024)
304 %index = phi i64 [ %index.next, %search ], [ 3, %entry ]
305 %index.next = add i64 %index, 1
306 %exitcond = icmp ne i64 %index.next, 67
307 br i1 %exitcond, label %search, label %loop.end
310 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
311 %ld1 = load i8, ptr %arrayidx, align 1
312 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
313 %ld2 = load i8, ptr %arrayidx1, align 1
314 %cmp3 = icmp eq i8 %ld1, %ld2
315 br i1 %cmp3, label %loop.end, label %loop
318 %retval = phi i64 [ 64, %loop ], [ %index, %search ]
323 ; We don't currently support multiple uncountable early exits.
324 define i64 @multiple_uncountable_exits() {
325 ; CHECK-LABEL: LV: Checking a loop in 'multiple_uncountable_exits'
326 ; CHECK: LV: Not vectorizing: Loop has too many uncountable exits.
328 %p1 = alloca [1024 x i8]
329 %p2 = alloca [1024 x i8]
330 call void @init_mem(ptr %p1, i64 1024)
331 call void @init_mem(ptr %p2, i64 1024)
335 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
336 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
337 %ld1 = load i8, ptr %arrayidx, align 1
338 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
339 %ld2 = load i8, ptr %arrayidx1, align 1
340 %cmp1 = icmp eq i8 %ld1, %ld2
341 br i1 %cmp1, label %loop.end, label %search2
344 %cmp2 = icmp ult i8 %ld1, 34
345 br i1 %cmp2, label %loop.end, label %loop.inc
348 %index.next = add i64 %index, 1
349 %exitcond = icmp ne i64 %index.next, 67
350 br i1 %exitcond, label %search1, label %loop.end
353 %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ]
358 define i64 @uncountable_exit_infinite_loop() {
359 ; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_infinite_loop'
360 ; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
362 %p1 = alloca [1024 x i8]
363 %p2 = alloca [1024 x i8]
364 call void @init_mem(ptr %p1, i64 1024)
365 call void @init_mem(ptr %p2, i64 1024)
369 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
370 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
371 %ld1 = load i8, ptr %arrayidx, align 1
372 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
373 %ld2 = load i8, ptr %arrayidx1, align 1
374 %cmp3 = icmp eq i8 %ld1, %ld2
375 br i1 %cmp3, label %loop.inc, label %loop.end
378 %index.next = add i64 %index, 1
379 %exitcond = icmp ne i64 %index.next, 67
383 %retval = phi i64 [ %index, %loop ]
388 define i64 @loop_contains_unsafe_call() {
389 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_unsafe_call'
390 ; CHECK: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed.
392 %p1 = alloca [1024 x i8]
393 %p2 = alloca [1024 x i8]
394 call void @init_mem(ptr %p1, i64 1024)
395 call void @init_mem(ptr %p2, i64 1024)
399 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
400 %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
401 %ld1 = load i32, ptr %arrayidx, align 1
402 %bad_call = call i32 @foo(i32 %ld1) #0
403 %cmp = icmp eq i32 %bad_call, 34
404 br i1 %cmp, label %loop.inc, label %loop.end
407 %index.next = add i64 %index, 1
408 %exitcond = icmp ne i64 %index.next, 67
409 br i1 %exitcond, label %loop, label %loop.end
412 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
417 define i64 @loop_contains_unsafe_div() {
418 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_unsafe_div'
419 ; CHECK: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed.
421 %p1 = alloca [1024 x i8]
422 %p2 = alloca [1024 x i8]
423 call void @init_mem(ptr %p1, i64 1024)
424 call void @init_mem(ptr %p2, i64 1024)
428 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
429 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
430 %ld1 = load i32, ptr %arrayidx, align 1
431 %div = udiv i32 20000, %ld1
432 %cmp = icmp eq i32 %div, 1
433 br i1 %cmp, label %loop.inc, label %loop.end
436 %index.next = add i64 %index, 1
437 %exitcond = icmp ne i64 %index.next, 67
438 br i1 %exitcond, label %loop, label %loop.end
441 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
446 define i64 @loop_contains_store(ptr %dest) {
447 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
448 ; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops
450 %p1 = alloca [1024 x i8]
451 call void @init_mem(ptr %p1, i64 1024)
455 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
456 %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
457 %ld1 = load i32, ptr %arrayidx, align 1
458 %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %index
459 store i32 %ld1, ptr %arrayidx2, align 4
460 %cmp = icmp eq i32 %ld1, 1
461 br i1 %cmp, label %loop.inc, label %loop.end
464 %index.next = add i64 %index, 1
465 %exitcond = icmp ne i64 %index.next, 67
466 br i1 %exitcond, label %loop, label %loop.end
469 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
474 define i64 @uncountable_exit_in_conditional_block(ptr %mask) {
475 ; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block'
476 ; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor.
478 %p1 = alloca [1024 x i8]
479 %p2 = alloca [1024 x i8]
480 call void @init_mem(ptr %p1, i64 1024)
481 call void @init_mem(ptr %p2, i64 1024)
485 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
486 %arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index
487 %ld1 = load i8, ptr %arrayidx1, align 1
488 %cmp1 = icmp ne i8 %ld1, 0
489 br i1 %cmp1, label %loop.search, label %loop.inc
492 %arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index
493 %ld2 = load i8, ptr %arrayidx2, align 1
494 %arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index
495 %ld3 = load i8, ptr %arrayidx3, align 1
496 %cmp2 = icmp eq i8 %ld2, %ld3
497 br i1 %cmp2, label %loop.inc, label %loop.end
500 %index.next = add i64 %index, 1
501 %exitcond = icmp ne i64 %index.next, 67
502 br i1 %exitcond, label %loop, label %loop.end
505 %retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ]
510 define i64 @same_exit_block_pre_inc_use1_with_reduction() {
511 ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_with_reduction'
512 ; CHECK: LV: Not vectorizing: Found reductions or recurrences in early-exit loop.
514 %p1 = alloca [1024 x i8]
515 %p2 = alloca [1024 x i8]
516 call void @init_mem(ptr %p1, i64 1024)
517 call void @init_mem(ptr %p2, i64 1024)
521 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
522 %red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ]
523 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
524 %ld1 = load i8, ptr %arrayidx, align 1
525 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
526 %ld2 = load i8, ptr %arrayidx1, align 1
527 %ld2.zext = zext i8 %ld2 to i64
528 %red.next = add i64 %red, %ld2.zext
529 %cmp3 = icmp eq i8 %ld1, %ld2
530 br i1 %cmp3, label %loop.inc, label %loop.end
533 %index.next = add i64 %index, 1
534 %exitcond = icmp ne i64 %index.next, 67
535 br i1 %exitcond, label %loop, label %loop.end
538 %final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
539 %retval = add i64 %red.next, %final.ind
544 define i64 @uncountable_exit_has_multiple_outside_successors() {
545 ; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_has_multiple_outside_successors'
546 ; CHECK: LV: Not vectorizing: Loop contains an unsupported switch
548 %p1 = alloca [1024 x i8]
549 call void @init_mem(ptr %p1, i64 1024)
553 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
554 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
555 %ld1 = load i8, ptr %arrayidx, align 1
556 switch i8 %ld1, label %loop.inc [
557 i8 2, label %loop.end
558 i8 3, label %loop.surprise
562 %index.next = add i64 %index, 1
563 %exitcond = icmp ne i64 %index.next, 67
564 br i1 %exitcond, label %loop, label %loop.end
570 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
575 declare i32 @foo(i32) readonly
576 declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
578 attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }