1 ; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
4 ; Verify that LV can handle explicit vectorization outer loops with uniform branches
5 ; but bails out on outer loops with divergent branches.
7 ; Root C/C++ source code for the test cases
8 ; void foo(int *a, int *b, int N, int M)
11 ; #pragma clang loop vectorize(enable) vectorize_width(8)
12 ; for (i = 0; i < N; i++) {
13 ; // Tested conditional branch. COND will be replaced per test.
15 ; for (j = 0; j < M; j++) {
16 ; a[i*M+j] = b[i*M+j] * b[i*M+j];
21 ; Case 1 (COND => M == N): Outer loop with uniform conditional branch.
23 ; CHECK-LABEL: uniform_branch
24 ; CHECK: LV: We can vectorize this outer loop!
26 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
28 define void @uniform_branch(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
30 %cmp39 = icmp sgt i32 %N, 0
31 br i1 %cmp39, label %outer.ph, label %for.end19
33 outer.ph: ; preds = %entry
34 %cmp337 = icmp slt i32 %M, 1
35 %0 = sext i32 %M to i64
36 %N64 = zext i32 %N to i64
37 %M64 = zext i32 %M to i64
38 %cmp1 = icmp ne i32 %M, %N ; Uniform condition
39 %brmerge = or i1 %cmp1, %cmp337 ; Uniform condition
42 outer.body: ; preds = %outer.inc, %outer.ph
43 %indvars.iv42 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next43, %outer.inc ]
44 %1 = mul nsw i64 %indvars.iv42, %0
45 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %1
46 %2 = load i32, i32* %arrayidx, align 4, !tbaa !2
47 br i1 %brmerge, label %outer.inc, label %inner.ph ; Supported uniform branch
49 inner.ph: ; preds = %outer.body
52 inner.body: ; preds = %inner.ph, %inner.body
53 %indvars.iv = phi i64 [ %indvars.iv.next, %inner.body ], [ 0, %inner.ph ]
54 %3 = add nsw i64 %indvars.iv, %1
55 %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %3
56 %4 = load i32, i32* %arrayidx7, align 4, !tbaa !2
57 %mul12 = mul nsw i32 %4, %4
58 %arrayidx16 = getelementptr inbounds i32, i32* %a, i64 %3
59 store i32 %mul12, i32* %arrayidx16, align 4, !tbaa !2
60 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
61 %exitcond = icmp eq i64 %indvars.iv.next, %M64
62 br i1 %exitcond, label %outer.inc, label %inner.body
64 outer.inc: ; preds = %inner.body, %outer.body
65 %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
66 %exitcond46 = icmp eq i64 %indvars.iv.next43, %N64
67 br i1 %exitcond46, label %for.end19, label %outer.body, !llvm.loop !6
69 for.end19: ; preds = %outer.inc, %entry
74 ; Case 2 (COND => B[i * M] == 0): Outer loop with divergent conditional branch.
76 ; CHECK-LABEL: divergent_branch
77 ; CHECK: Unsupported conditional branch.
78 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
80 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
82 define void @divergent_branch(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
84 %cmp39 = icmp sgt i32 %N, 0
85 br i1 %cmp39, label %outer.ph, label %for.end19
87 outer.ph: ; preds = %entry
88 %cmp337 = icmp slt i32 %M, 1
89 %0 = sext i32 %M to i64
90 %N64 = zext i32 %N to i64
91 %M64 = zext i32 %M to i64
94 outer.body: ; preds = %outer.inc, %outer.ph
95 %indvars.iv42 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next43, %outer.inc ]
96 %1 = mul nsw i64 %indvars.iv42, %0
97 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %1
98 %2 = load i32, i32* %arrayidx, align 4, !tbaa !2
99 %cmp1 = icmp ne i32 %2, 0 ; Divergent condition
100 %brmerge = or i1 %cmp1, %cmp337 ; Divergent condition
101 br i1 %brmerge, label %outer.inc, label %inner.ph ; Unsupported divergent branch.
103 inner.ph: ; preds = %outer.body
106 inner.body: ; preds = %inner.ph, %inner.body
107 %indvars.iv = phi i64 [ %indvars.iv.next, %inner.body ], [ 0, %inner.ph ]
108 %3 = add nsw i64 %indvars.iv, %1
109 %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %3
110 %4 = load i32, i32* %arrayidx7, align 4, !tbaa !2
111 %mul12 = mul nsw i32 %4, %4
112 %arrayidx16 = getelementptr inbounds i32, i32* %a, i64 %3
113 store i32 %mul12, i32* %arrayidx16, align 4, !tbaa !2
114 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
115 %exitcond = icmp eq i64 %indvars.iv.next, %M64
116 br i1 %exitcond, label %outer.inc, label %inner.body
118 outer.inc: ; preds = %inner.body, %outer.body
119 %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
120 %exitcond46 = icmp eq i64 %indvars.iv.next43, %N64
121 br i1 %exitcond46, label %for.end19, label %outer.body, !llvm.loop !6
123 for.end19: ; preds = %outer.inc, %entry
127 !llvm.module.flags = !{!0}
130 !0 = !{i32 1, !"wchar_size", i32 4}
131 !1 = !{!"clang version 6.0.0"}
132 !2 = !{!3, !3, i64 0}
133 !3 = !{!"int", !4, i64 0}
134 !4 = !{!"omnipotent char", !5, i64 0}
135 !5 = !{!"Simple C/C++ TBAA"}
136 !6 = distinct !{!6, !7, !8}
137 !7 = !{!"llvm.loop.vectorize.width", i32 8}
138 !8 = !{!"llvm.loop.vectorize.enable", i1 true}