2 ; RUN: opt -p loop-vectorize -debug-only=loop-vectorize -S -disable-output < %s 2>&1 | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 define void @no_outer_loop(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %off, i64 noundef %n) {
7 ; CHECK-LABEL: LV: Checking a loop in 'no_outer_loop'
8 ; CHECK: Calculating cost of runtime checks:
9 ; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop.
10 ; CHECK: Total cost of runtime checks: 4
11 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
16 %inner.iv = phi i64 [ 0, %entry ], [ %inner.iv.next, %inner.loop ]
17 %add.us = add nuw nsw i64 %inner.iv, %off
18 %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
19 %0 = load i8, ptr %arrayidx.us, align 1
20 %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
21 %1 = load i8, ptr %arrayidx7.us, align 1
22 %add9.us = add i8 %1, %0
23 store i8 %add9.us, ptr %arrayidx7.us, align 1
24 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
25 %exitcond.not = icmp eq i64 %inner.iv.next, %n
26 br i1 %exitcond.not, label %inner.exit, label %inner.loop
32 define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
33 ; CHECK-LABEL: LV: Checking a loop in 'outer_no_tc'
34 ; CHECK: Calculating cost of runtime checks:
35 ; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 3
36 ; CHECK: Total cost of runtime checks: 3
37 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
42 %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
43 %mul.us = mul nsw i64 %outer.iv, %n
47 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
48 %add.us = add nuw nsw i64 %inner.iv, %mul.us
49 %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
50 %0 = load i8, ptr %arrayidx.us, align 1
51 %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
52 %1 = load i8, ptr %arrayidx7.us, align 1
53 %add9.us = add i8 %1, %0
54 store i8 %add9.us, ptr %arrayidx7.us, align 1
55 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
56 %exitcond.not = icmp eq i64 %inner.iv.next, %n
57 br i1 %exitcond.not, label %inner.exit, label %inner.loop
60 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
61 %exitcond27.not = icmp eq i64 %outer.iv.next, %m
62 br i1 %exitcond27.not, label %outer.exit, label %outer.loop
69 define void @outer_known_tc3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
70 ; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3'
71 ; CHECK: Calculating cost of runtime checks:
72 ; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
73 ; CHECK: Total cost of runtime checks: 2
74 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
79 %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
80 %mul.us = mul nsw i64 %outer.iv, %n
84 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
85 %add.us = add nuw nsw i64 %inner.iv, %mul.us
86 %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
87 %0 = load i8, ptr %arrayidx.us, align 1
88 %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
89 %1 = load i8, ptr %arrayidx7.us, align 1
90 %add9.us = add i8 %1, %0
91 store i8 %add9.us, ptr %arrayidx7.us, align 1
92 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
93 %exitcond.not = icmp eq i64 %inner.iv.next, %n
94 br i1 %exitcond.not, label %inner.exit, label %inner.loop
97 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
98 %exitcond26.not = icmp eq i64 %outer.iv.next, 3
99 br i1 %exitcond26.not, label %outer.exit, label %outer.loop
106 define void @outer_known_tc64(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
107 ; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc64'
108 ; CHECK: Calculating cost of runtime checks:
109 ; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 1
110 ; CHECK: Total cost of runtime checks: 1
111 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
116 %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
117 %mul.us = mul nsw i64 %outer.iv, %n
121 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
122 %add.us = add nuw nsw i64 %inner.iv, %mul.us
123 %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
124 %0 = load i8, ptr %arrayidx.us, align 1
125 %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
126 %1 = load i8, ptr %arrayidx7.us, align 1
127 %add9.us = add i8 %1, %0
128 store i8 %add9.us, ptr %arrayidx7.us, align 1
129 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
130 %exitcond.not = icmp eq i64 %inner.iv.next, %n
131 br i1 %exitcond.not, label %inner.exit, label %inner.loop
134 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
135 %exitcond26.not = icmp eq i64 %outer.iv.next, 64
136 br i1 %exitcond26.not, label %outer.exit, label %outer.loop
143 define void @outer_pgo_3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
144 ; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_3'
145 ; CHECK: Calculating cost of runtime checks:
146 ; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
147 ; CHECK: Total cost of runtime checks: 2
148 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
153 %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
154 %mul.us = mul nsw i64 %outer.iv, %n
158 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
159 %add.us = add nuw nsw i64 %inner.iv, %mul.us
160 %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
161 %0 = load i8, ptr %arrayidx.us, align 1
162 %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
163 %1 = load i8, ptr %arrayidx7.us, align 1
164 %add9.us = add i8 %1, %0
165 store i8 %add9.us, ptr %arrayidx7.us, align 1
166 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
167 %exitcond.not = icmp eq i64 %inner.iv.next, %n
168 br i1 %exitcond.not, label %inner.exit, label %inner.loop
171 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
172 %exitcond26.not = icmp eq i64 %outer.iv.next, %m
173 br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !0
180 define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
181 ; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_minus1'
182 ; CHECK: Calculating cost of runtime checks:
183 ; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced
184 ; CHECK: Total cost of runtime checks: 6
185 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
190 %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
191 %mul.us = mul nsw i64 %outer.iv, %n
195 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
196 %add.us = add nuw nsw i64 %inner.iv, %mul.us
197 %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
198 %0 = load i8, ptr %arrayidx.us, align 1
199 %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
200 %1 = load i8, ptr %arrayidx7.us, align 1
201 %add9.us = add i8 %1, %0
202 store i8 %add9.us, ptr %arrayidx7.us, align 1
203 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
204 %exitcond.not = icmp eq i64 %inner.iv.next, %n
205 br i1 %exitcond.not, label %inner.exit, label %inner.loop
208 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
209 %exitcond26.not = icmp eq i64 %outer.iv.next, %m
210 br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !1
217 define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
218 ; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
219 ; CHECK: Calculating cost of runtime checks:
220 ; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
221 ; CHECK: Total cost of runtime checks: 2
222 ; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:4
227 %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
228 %0 = mul nsw i64 %outer.iv, %n
232 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
233 %1 = add nuw nsw i64 %iv.inner, %0
234 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %1
235 %2 = load i32, ptr %arrayidx.us, align 4
236 %arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %1
237 %3 = load i32, ptr %arrayidx8.us, align 4
238 %add9.us = add nsw i32 %3, %2
239 store i32 %add9.us, ptr %arrayidx8.us, align 4
240 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
241 %inner.exit.cond = icmp eq i64 %iv.inner.next, %n
242 br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
245 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
246 %outer.exit.cond = icmp eq i64 %outer.iv.next, 3
247 br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
254 !0 = !{!"branch_weights", i32 10, i32 20}
255 !1 = !{!"branch_weights", i32 1, i32 -1}