1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
3 ; RUN: opt < %s -p 'loop-vectorize' -force-vector-interleave=1 -S \
4 ; RUN: -force-vector-width=4 -debug-only=loop-accesses,loop-vectorize,loop-utils 2> %t | FileCheck %s
5 ; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
7 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
9 ; Equivalent example in C:
10 ; void diff_checks(int32_t *dst, int32_t *src, int m, int n) {
11 ; for (int i = 0; i < m; i++) {
12 ; for (int j = 0; j < n; j++) {
13 ; dst[(i * (n + 1)) + j] = src[(i * n) + j];
17 ; NOTE: The strides of the starting address values in the inner loop differ, i.e.
18 ; '(i * (n + 1))' vs '(i * n)'.
20 ; DEBUG-LABEL: 'diff_checks'
21 ; DEBUG: LAA: Found an analyzable loop: inner.loop
22 ; DEBUG: LAA: Not creating diff runtime check, since these cannot be hoisted out of the outer loop
23 ; DEBUG: LAA: Adding RT check for range:
24 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
25 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (4 * (sext i32 (1 + %n)<nuw> to i64))<nsw>
26 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %n to i64))<nuw><nsw> + (4 * (sext i32 (1 + %n)<nuw> to i64) * (-1 + (zext i32 %m to i64))<nsw>) + %dst)
27 ; DEBUG-NEXT: LAA: Adding RT check for range:
28 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
29 ; DEBUG-NEXT: Start: %src End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %src)
31 define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) {
32 ; CHECK-LABEL: define void @diff_checks
33 ; CHECK-SAME: (ptr noundef writeonly captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
35 ; CHECK-NEXT: [[ADD5:%.*]] = add nuw i32 [[N]], 1
36 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
37 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[ADD5]] to i64
38 ; CHECK-NEXT: [[WIDE_M:%.*]] = zext i32 [[M]] to i64
39 ; CHECK-NEXT: [[WIDE_N:%.*]] = zext i32 [[N]] to i64
40 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[WIDE_M]], -1
41 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP1]]
42 ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 2
43 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[WIDE_N]], 2
44 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], [[TMP5]]
45 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
46 ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP1]], 2
47 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[WIDE_N]], [[WIDE_M]]
48 ; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 2
49 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP9]]
50 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
52 ; CHECK-NEXT: [[IV_OUTER:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_OUTER_NEXT:%.*]], [[INNER_EXIT:%.*]] ]
53 ; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i64 [[IV_OUTER]], [[TMP0]]
54 ; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IV_OUTER]], [[TMP1]]
55 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4
56 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
57 ; CHECK: vector.memcheck:
58 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
59 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
60 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
61 ; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP7]], 0
62 ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
63 ; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
65 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4
66 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_N]], [[N_MOD_VF]]
67 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
69 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
70 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
71 ; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP13]], [[TMP10]]
72 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP14]]
73 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
74 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4, !alias.scope [[META0:![0-9]+]]
75 ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[TMP13]], [[TMP11]]
76 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP17]]
77 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
78 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP19]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
79 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
80 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
81 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
82 ; CHECK: middle.block:
83 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]]
84 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]]
86 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
87 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
89 ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
90 ; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP10]]
91 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]]
92 ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
93 ; CHECK-NEXT: [[TMP23:%.*]] = add nsw i64 [[IV_INNER]], [[TMP11]]
94 ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP23]]
95 ; CHECK-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX9_US]], align 4
96 ; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1
97 ; CHECK-NEXT: [[INNER_EXIT_COND:%.*]] = icmp eq i64 [[IV_INNER_NEXT]], [[WIDE_N]]
98 ; CHECK-NEXT: br i1 [[INNER_EXIT_COND]], label [[INNER_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
100 ; CHECK-NEXT: [[IV_OUTER_NEXT]] = add nuw nsw i64 [[IV_OUTER]], 1
101 ; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp eq i64 [[IV_OUTER_NEXT]], [[WIDE_M]]
102 ; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label [[OUTER_EXIT:%.*]], label [[OUTER_LOOP]]
104 ; CHECK-NEXT: ret void
107 %add5 = add nuw i32 %n, 1
108 %0 = zext i32 %n to i64
109 %1 = sext i32 %add5 to i64
110 %wide.m = zext i32 %m to i64
111 %wide.n = zext i32 %n to i64
115 %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %inner.exit ]
116 %2 = mul nsw i64 %iv.outer, %0
117 %3 = mul nsw i64 %iv.outer, %1
121 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
122 %4 = add nuw nsw i64 %iv.inner, %2
123 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %4
124 %5 = load i32, ptr %arrayidx.us, align 4
125 %6 = add nsw i64 %iv.inner, %3
126 %arrayidx9.us = getelementptr inbounds i32, ptr %dst, i64 %6
127 store i32 %5, ptr %arrayidx9.us, align 4
128 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
129 %inner.exit.cond = icmp eq i64 %iv.inner.next, %wide.n
130 br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
133 %iv.outer.next = add nuw nsw i64 %iv.outer, 1
134 %outer.exit.cond = icmp eq i64 %iv.outer.next, %wide.m
135 br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
142 ; Equivalent example in C:
143 ; void full_checks(int32_t *dst, int32_t *src, int m, int n) {
144 ; for (int i = 0; i < m; i++) {
145 ; for (int j = 0; j < n; j++) {
146 ; dst[(i * n) + j] += src[(i * n) + j];
150 ; We decide to do full runtime checks here (as opposed to diff checks) due to
151 ; the additional load of 'dst[(i * n) + j]' in the loop.
153 ; DEBUG-LABEL: 'full_checks'
154 ; DEBUG: LAA: Found an analyzable loop: inner.loop
155 ; DEBUG-NOT: LAA: Creating diff runtime check for:
156 ; DEBUG: LAA: Adding RT check for range:
157 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
158 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %dst)
159 ; DEBUG-NEXT: LAA: Adding RT check for range:
160 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
161 ; DEBUG-NEXT: Start: %src End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %src)
163 define void @full_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) {
164 ; CHECK-LABEL: define void @full_checks
165 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
167 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
168 ; CHECK-NEXT: [[WIDE_M:%.*]] = zext i32 [[M]] to i64
169 ; CHECK-NEXT: [[WIDE_N:%.*]] = zext i32 [[N]] to i64
170 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[WIDE_N]], [[WIDE_M]]
171 ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
172 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
173 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
174 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
176 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_EXIT:%.*]] ]
177 ; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP0]]
178 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4
179 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
180 ; CHECK: vector.memcheck:
181 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
182 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
183 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
184 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
186 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4
187 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_N]], [[N_MOD_VF]]
188 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
189 ; CHECK: vector.body:
190 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
191 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
192 ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], [[TMP3]]
193 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP5]]
194 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
195 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META9:![0-9]+]]
196 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]]
197 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
198 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META9]]
199 ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
200 ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP9]], align 4, !alias.scope [[META12]], !noalias [[META9]]
201 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
202 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
203 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
204 ; CHECK: middle.block:
205 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]]
206 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]]
208 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
209 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
211 ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
212 ; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP3]]
213 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP12]]
214 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
215 ; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP12]]
216 ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4
217 ; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
218 ; CHECK-NEXT: store i32 [[ADD9_US]], ptr [[ARRAYIDX8_US]], align 4
219 ; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1
220 ; CHECK-NEXT: [[INNER_EXIT_COND:%.*]] = icmp eq i64 [[IV_INNER_NEXT]], [[WIDE_N]]
221 ; CHECK-NEXT: br i1 [[INNER_EXIT_COND]], label [[INNER_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
223 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
224 ; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[WIDE_M]]
225 ; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label [[OUTER_EXIT:%.*]], label [[OUTER_LOOP]]
227 ; CHECK-NEXT: ret void
230 %0 = zext i32 %n to i64
231 %wide.m = zext i32 %m to i64
232 %wide.n = zext i32 %n to i64
236 %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
237 %1 = mul nsw i64 %outer.iv, %0
241 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
242 %2 = add nuw nsw i64 %iv.inner, %1
243 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %2
244 %3 = load i32, ptr %arrayidx.us, align 4
245 %arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %2
246 %4 = load i32, ptr %arrayidx8.us, align 4
247 %add9.us = add nsw i32 %4, %3
248 store i32 %add9.us, ptr %arrayidx8.us, align 4
249 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
250 %inner.exit.cond = icmp eq i64 %iv.inner.next, %wide.n
251 br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
254 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
255 %outer.exit.cond = icmp eq i64 %outer.iv.next, %wide.m
256 br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
263 ; Equivalent example in C:
264 ; void full_checks_diff_strides(int32_t *dst, int32_t *src, int m, int n) {
265 ; for (int i = 0; i < m; i++) {
266 ; for (int j = 0; j < n; j++) {
267 ; dst[(i * (n + 1)) + j] += src[(i * n) + j];
271 ; We decide to do full runtime checks here (as opposed to diff checks) due to
272 ; the additional load of 'dst[(i * n) + j]' in the loop.
273 ; NOTE: This is different to the test above (@full_checks) because the dst array
274 ; is accessed with a higher stride compared src, and therefore the inner loop
275 ; runtime checks will vary for each outer loop iteration.
277 ; DEBUG-LABEL: 'full_checks_diff_strides'
278 ; DEBUG: LAA: Found an analyzable loop: inner.loop
279 ; DEBUG-NOT: LAA: Creating diff runtime check for:
280 ; DEBUG: LAA: Adding RT check for range:
281 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
282 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %n to i64))<nuw><nsw> + ((4 + (4 * (zext i32 %n to i64))<nuw><nsw>)<nuw><nsw> * (-1 + (zext i32 %m to i64))<nsw>) + %dst)
283 ; DEBUG-NEXT: LAA: Adding RT check for range:
284 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
285 ; DEBUG-NEXT: Start: %src End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %src)
287 define void @full_checks_diff_strides(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) {
288 ; CHECK-LABEL: define void @full_checks_diff_strides
289 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
291 ; CHECK-NEXT: [[WIDE_M:%.*]] = zext i32 [[M]] to i64
292 ; CHECK-NEXT: [[WIDE_N:%.*]] = zext i32 [[N]] to i64
293 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_M]], -1
294 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[WIDE_N]], 2
295 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 4
296 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP0]], [[TMP2]]
297 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[TMP1]]
298 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
299 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[WIDE_N]], [[WIDE_M]]
300 ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
301 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]]
302 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
304 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_EXIT:%.*]] ]
305 ; CHECK-NEXT: [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1
306 ; CHECK-NEXT: [[WIDE_NPLUS1:%.*]] = zext i32 [[NPLUS1]] to i64
307 ; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[OUTER_IV]], [[WIDE_N]]
308 ; CHECK-NEXT: [[TMP8:%.*]] = mul nsw i64 [[OUTER_IV]], [[WIDE_NPLUS1]]
309 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4
310 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
311 ; CHECK: vector.memcheck:
312 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
313 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
314 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
315 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
317 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4
318 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_N]], [[N_MOD_VF]]
319 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
320 ; CHECK: vector.body:
321 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
322 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
323 ; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP9]], [[TMP7]]
324 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP10]]
325 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
326 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4, !alias.scope [[META16:![0-9]+]]
327 ; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP9]], [[TMP8]]
328 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP13]]
329 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
330 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP15]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META16]]
331 ; CHECK-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
332 ; CHECK-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP15]], align 4, !alias.scope [[META19]], !noalias [[META16]]
333 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
334 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
335 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
336 ; CHECK: middle.block:
337 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]]
338 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]]
340 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
341 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
343 ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
344 ; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP7]]
345 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP18]]
346 ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
347 ; CHECK-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP8]]
348 ; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP20]]
349 ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4
350 ; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP21]], [[TMP19]]
351 ; CHECK-NEXT: store i32 [[ADD9_US]], ptr [[ARRAYIDX8_US]], align 4
352 ; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1
353 ; CHECK-NEXT: [[INNER_EXIT_COND:%.*]] = icmp eq i64 [[IV_INNER_NEXT]], [[WIDE_N]]
354 ; CHECK-NEXT: br i1 [[INNER_EXIT_COND]], label [[INNER_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
356 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
357 ; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[WIDE_M]]
358 ; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label [[OUTER_EXIT:%.*]], label [[OUTER_LOOP]]
360 ; CHECK-NEXT: ret void
363 %wide.m = zext i32 %m to i64
364 %wide.n = zext i32 %n to i64
368 %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
369 %nplus1 = add nuw nsw i32 %n, 1
370 %wide.nplus1 = zext i32 %nplus1 to i64
371 %0 = mul nsw i64 %outer.iv, %wide.n
372 %1 = mul nsw i64 %outer.iv, %wide.nplus1
376 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
377 %2 = add nuw nsw i64 %iv.inner, %0
378 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %2
379 %3 = load i32, ptr %arrayidx.us, align 4
380 %4 = add nuw nsw i64 %iv.inner, %1
381 %arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %4
382 %5 = load i32, ptr %arrayidx8.us, align 4
383 %add9.us = add nsw i32 %5, %3
384 store i32 %add9.us, ptr %arrayidx8.us, align 4
385 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
386 %inner.exit.cond = icmp eq i64 %iv.inner.next, %wide.n
387 br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
390 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
391 %outer.exit.cond = icmp eq i64 %outer.iv.next, %wide.m
392 br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
399 ; Equivalent example in C:
400 ; void diff_checks_src_start_invariant(int32_t *dst, int32_t *src, int m, int n) {
401 ; for (int i = 0; i < m; i++) {
402 ; for (int j = 0; j < n; j++) {
403 ; dst[(i * n) + j] = src[j];
408 ; DEBUG-LABEL: 'diff_checks_src_start_invariant'
409 ; DEBUG: LAA: Found an analyzable loop: inner.loop
410 ; DEBUG-NOT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
412 define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) {
413 ; CHECK-LABEL: define void @diff_checks_src_start_invariant
414 ; CHECK-SAME: (ptr noundef writeonly captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
416 ; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
417 ; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
418 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
419 ; CHECK-NEXT: [[WIDE_M:%.*]] = zext i32 [[M]] to i64
420 ; CHECK-NEXT: [[WIDE_N:%.*]] = zext i32 [[N]] to i64
421 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[SRC2]]
422 ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[WIDE_N]], 2
423 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
425 ; CHECK-NEXT: [[IV_OUTER:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_OUTER_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
426 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[IV_OUTER]]
427 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], [[TMP3]]
428 ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[IV_OUTER]], [[TMP0]]
429 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4
430 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
431 ; CHECK: vector.memcheck:
432 ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16
433 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
435 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4
436 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_N]], [[N_MOD_VF]]
437 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
438 ; CHECK: vector.body:
439 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
440 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
441 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP6]]
442 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
443 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4
444 ; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP5]]
445 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP9]]
446 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
447 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP11]], align 4
448 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
449 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
450 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
451 ; CHECK: middle.block:
452 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]]
453 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
455 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
456 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
458 ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
459 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV_INNER]]
460 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
461 ; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP5]]
462 ; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP14]]
463 ; CHECK-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX6_US]], align 4
464 ; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1
465 ; CHECK-NEXT: [[INNER_EXIT_COND:%.*]] = icmp eq i64 [[IV_INNER_NEXT]], [[WIDE_N]]
466 ; CHECK-NEXT: br i1 [[INNER_EXIT_COND]], label [[INNER_LOOP_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP24:![0-9]+]]
467 ; CHECK: inner.loop.exit:
468 ; CHECK-NEXT: [[IV_OUTER_NEXT]] = add nuw nsw i64 [[IV_OUTER]], 1
469 ; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp eq i64 [[IV_OUTER_NEXT]], [[WIDE_M]]
470 ; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label [[OUTER_LOOP_EXIT:%.*]], label [[OUTER_LOOP]]
471 ; CHECK: outer.loop.exit:
472 ; CHECK-NEXT: ret void
475 %0 = zext i32 %n to i64
476 %wide.m = zext i32 %m to i64
477 %wide.n = zext i32 %n to i64
481 %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %inner.loop.exit ]
482 %1 = mul nsw i64 %iv.outer, %0
486 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
487 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %iv.inner
488 %2 = load i32, ptr %arrayidx.us, align 4
489 %3 = add nuw nsw i64 %iv.inner, %1
490 %arrayidx6.us = getelementptr inbounds i32, ptr %dst, i64 %3
491 store i32 %2, ptr %arrayidx6.us, align 4
492 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
493 %inner.exit.cond = icmp eq i64 %iv.inner.next, %wide.n
494 br i1 %inner.exit.cond, label %inner.loop.exit, label %inner.loop
497 %iv.outer.next = add nuw nsw i64 %iv.outer, 1
498 %outer.exit.cond = icmp eq i64 %iv.outer.next, %wide.m
499 br i1 %outer.exit.cond, label %outer.loop.exit, label %outer.loop
506 ; Equivalent example in C:
507 ; void full_checks_src_start_invariant(int32_t *dst, int32_t *src, int m, int n) {
508 ; for (int i = 0; i < m; i++) {
509 ; for (int j = 0; j < n; j++) {
510 ; dst[(i * n) + j] += src[j];
515 ; DEBUG-LABEL: 'full_checks_src_start_invariant'
516 ; DEBUG: LAA: Found an analyzable loop: inner.loop
517 ; DEBUG: LAA: Adding RT check for range:
518 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
519 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %dst)
520 ; DEBUG-NEXT: LAA: Adding RT check for range:
521 ; DEBUG-NEXT: Start: %src End: ((4 * (zext i32 %n to i64))<nuw><nsw> + %src)
523 define void @full_checks_src_start_invariant(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) {
524 ; CHECK-LABEL: define void @full_checks_src_start_invariant
525 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
527 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
528 ; CHECK-NEXT: [[WIDE_M:%.*]] = zext i32 [[M]] to i64
529 ; CHECK-NEXT: [[WIDE_N:%.*]] = zext i32 [[N]] to i64
530 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[WIDE_N]], [[WIDE_M]]
531 ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
532 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
533 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[WIDE_N]], 2
534 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
535 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
537 ; CHECK-NEXT: [[IV_OUTER:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_OUTER_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
538 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 [[IV_OUTER]], [[TMP0]]
539 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4
540 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
541 ; CHECK: vector.memcheck:
542 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
543 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
544 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
545 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
547 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4
548 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_N]], [[N_MOD_VF]]
549 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
550 ; CHECK: vector.body:
551 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
552 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
553 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP5]]
554 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
555 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META25:![0-9]+]]
556 ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP5]], [[TMP4]]
557 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]]
558 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
559 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META28:![0-9]+]], !noalias [[META25]]
560 ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
561 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP10]], align 4, !alias.scope [[META28]], !noalias [[META25]]
562 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
563 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
564 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
565 ; CHECK: middle.block:
566 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]]
567 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
569 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
570 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
572 ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
573 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV_INNER]]
574 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
575 ; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP4]]
576 ; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP14]]
577 ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_US]], align 4
578 ; CHECK-NEXT: [[ADD7_US:%.*]] = add nsw i32 [[TMP15]], [[TMP13]]
579 ; CHECK-NEXT: store i32 [[ADD7_US]], ptr [[ARRAYIDX6_US]], align 4
580 ; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1
581 ; CHECK-NEXT: [[INNER_EXIT_COND:%.*]] = icmp eq i64 [[IV_INNER_NEXT]], [[WIDE_N]]
582 ; CHECK-NEXT: br i1 [[INNER_EXIT_COND]], label [[INNER_LOOP_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
583 ; CHECK: inner.loop.exit:
584 ; CHECK-NEXT: [[IV_OUTER_NEXT]] = add nuw nsw i64 [[IV_OUTER]], 1
585 ; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp eq i64 [[IV_OUTER_NEXT]], [[WIDE_M]]
586 ; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label [[OUTER_LOOP_EXIT:%.*]], label [[OUTER_LOOP]]
587 ; CHECK: outer.loop.exit:
588 ; CHECK-NEXT: ret void
591 %0 = zext i32 %n to i64
592 %wide.m = zext i32 %m to i64
593 %wide.n = zext i32 %n to i64
597 %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %inner.loop.exit ]
598 %1 = mul nsw i64 %iv.outer, %0
602 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
603 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %iv.inner
604 %2 = load i32, ptr %arrayidx.us, align 4
605 %3 = add nuw nsw i64 %iv.inner, %1
606 %arrayidx6.us = getelementptr inbounds i32, ptr %dst, i64 %3
607 %4 = load i32, ptr %arrayidx6.us, align 4
608 %add7.us = add nsw i32 %4, %2
609 store i32 %add7.us, ptr %arrayidx6.us, align 4
610 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
611 %inner.exit.cond = icmp eq i64 %iv.inner.next, %wide.n
612 br i1 %inner.exit.cond, label %inner.loop.exit, label %inner.loop
615 %iv.outer.next = add nuw nsw i64 %iv.outer, 1
616 %outer.exit.cond = icmp eq i64 %iv.outer.next, %wide.m
617 br i1 %outer.exit.cond, label %outer.loop.exit, label %outer.loop
624 ; Equivalent example in C:
625 ; void triple_nested_loop_mixed_access(int *dst, int *src, int m, int n, int o) {
626 ; for (int i = 0; i < m; i++) {
627 ; for (int j = 0; j < n; j++) {
628 ; for (int l = 0; l < o; l++) {
629 ; dst[(i * n * (o + 1)) + (j * o) + l] += src[(i * n * o) + l];
634 ; The 'src' access varies with the outermost loop, rather than the parent of the
635 ; innermost loop. Hence we don't expand `src`, although in theory we could do.
637 ; DEBUG-LABEL: 'triple_nested_loop_mixed_access'
638 ; DEBUG: LAA: Found an analyzable loop: inner.loop
639 ; DEBUG-NOT: LAA: Creating diff runtime check for:
640 ; DEBUG: LAA: Adding RT check for range:
641 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
642 ; DEBUG-NEXT: Start: {%dst,+,(4 * (zext i32 (1 + %o)<nsw> to i64) * (zext i32 %n to i64))}<%outer.outer.loop> End: {((4 * (zext i32 %n to i64) * (zext i32 %o to i64)) + %dst),+,(4 * (zext i32 (1 + %o)<nsw> to i64) * (zext i32 %n to i64))}<%outer.outer.loop>
643 ; DEBUG-NEXT: LAA: Adding RT check for range:
644 ; DEBUG-NEXT: Start: {%src,+,(4 * (zext i32 %n to i64) * (zext i32 %o to i64))}<%outer.outer.loop> End: {((4 * (zext i32 %o to i64))<nuw><nsw> + %src),+,(4 * (zext i32 %n to i64) * (zext i32 %o to i64))}<%outer.outer.loop>
646 define void @triple_nested_loop_mixed_access(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n, i32 noundef %o) {
647 ; CHECK-LABEL: define void @triple_nested_loop_mixed_access
648 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]], i32 noundef [[O:%.*]]) {
650 ; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[O]], 1
651 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[O]] to i64
652 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
653 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[ADD11]] to i64
654 ; CHECK-NEXT: [[WIDE_TRIP_COUNT68:%.*]] = zext i32 [[M]] to i64
655 ; CHECK-NEXT: [[WIDE_TRIP_COUNT60:%.*]] = zext i32 [[N]] to i64
656 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[O]] to i64
657 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
658 ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 2
659 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
660 ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
661 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
662 ; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 2
663 ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
664 ; CHECK-NEXT: br label [[OUTER_OUTER_LOOP:%.*]]
665 ; CHECK: outer.outer.loop:
666 ; CHECK-NEXT: [[OUTER_OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_OUTER_IV_NEXT:%.*]], [[OUTER_LOOP_END:%.*]] ]
667 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP4]], [[OUTER_OUTER_IV]]
668 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
669 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP6]], [[TMP10]]
670 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
671 ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP8]], [[OUTER_OUTER_IV]]
672 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
673 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP9]], [[TMP12]]
674 ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
675 ; CHECK-NEXT: [[TMP14:%.*]] = mul nsw i64 [[OUTER_OUTER_IV]], [[TMP1]]
676 ; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 [[TMP14]], [[TMP0]]
677 ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 [[TMP14]], [[TMP2]]
678 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
680 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[INNER_LOOP_END:%.*]] ], [ 0, [[OUTER_OUTER_LOOP]] ]
681 ; CHECK-NEXT: [[TMP17:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP0]]
682 ; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP17]], [[TMP16]]
683 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
684 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
685 ; CHECK: vector.memcheck:
686 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP3]]
687 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP1]]
688 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
689 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
691 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
692 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
693 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
694 ; CHECK: vector.body:
695 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
696 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 0
697 ; CHECK-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[TMP19]], [[TMP15]]
698 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP20]]
699 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0
700 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4, !alias.scope [[META32:![0-9]+]]
701 ; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP18]], [[TMP19]]
702 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP23]]
703 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0
704 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META35:![0-9]+]], !noalias [[META32]]
705 ; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> [[WIDE_LOAD4]], [[WIDE_LOAD]]
706 ; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP25]], align 4, !alias.scope [[META35]], !noalias [[META32]]
707 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
708 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
709 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
710 ; CHECK: middle.block:
711 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
712 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_END]], label [[SCALAR_PH]]
714 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
715 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
717 ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
718 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[INNER_IV]], [[TMP15]]
719 ; CHECK-NEXT: [[ARRAYIDX_US_US_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP28]]
720 ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX_US_US_US]], align 4
721 ; CHECK-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[TMP18]], [[INNER_IV]]
722 ; CHECK-NEXT: [[ARRAYIDX17_US_US_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP30]]
723 ; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX17_US_US_US]], align 4
724 ; CHECK-NEXT: [[ADD18_US_US_US:%.*]] = add nsw i32 [[TMP31]], [[TMP29]]
725 ; CHECK-NEXT: store i32 [[ADD18_US_US_US]], ptr [[ARRAYIDX17_US_US_US]], align 4
726 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
727 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
728 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[INNER_LOOP_END]], label [[INNER_LOOP]], !llvm.loop [[LOOP38:![0-9]+]]
729 ; CHECK: inner.loop.end:
730 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
731 ; CHECK-NEXT: [[EXIT_OUTER:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[WIDE_TRIP_COUNT60]]
732 ; CHECK-NEXT: br i1 [[EXIT_OUTER]], label [[OUTER_LOOP_END]], label [[OUTER_LOOP]]
733 ; CHECK: outer.loop.end:
734 ; CHECK-NEXT: [[OUTER_OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_OUTER_IV]], 1
735 ; CHECK-NEXT: [[EXIT_OUTER_OUTER:%.*]] = icmp eq i64 [[OUTER_OUTER_IV_NEXT]], [[WIDE_TRIP_COUNT68]]
736 ; CHECK-NEXT: br i1 [[EXIT_OUTER_OUTER]], label [[EXIT:%.*]], label [[OUTER_OUTER_LOOP]]
738 ; CHECK-NEXT: ret void
741 %add11 = add nsw i32 %o, 1
742 %0 = zext i32 %o to i64
743 %1 = zext i32 %n to i64
744 %2 = zext i32 %add11 to i64
745 %wide.trip.count68 = zext i32 %m to i64
746 %wide.trip.count60 = zext i32 %n to i64
747 %wide.trip.count = zext i32 %o to i64
748 br label %outer.outer.loop
751 %outer.outer.iv = phi i64 [ 0, %entry ], [ %outer.outer.iv.next, %outer.loop.end ]
752 %3 = mul nsw i64 %outer.outer.iv, %1
753 %4 = mul nsw i64 %3, %0
754 %5 = mul nsw i64 %3, %2
758 %outer.iv = phi i64 [ %outer.iv.next, %inner.loop.end ], [ 0, %outer.outer.loop ]
759 %6 = mul nsw i64 %outer.iv, %0
760 %7 = add nuw nsw i64 %6, %5
764 %inner.iv = phi i64 [ %inner.iv.next, %inner.loop ], [ 0, %outer.loop ]
765 %8 = add nuw nsw i64 %inner.iv, %4
766 %arrayidx.us.us.us = getelementptr inbounds i32, ptr %src, i64 %8
767 %9 = load i32, ptr %arrayidx.us.us.us, align 4
768 %10 = add nuw nsw i64 %7, %inner.iv
769 %arrayidx17.us.us.us = getelementptr inbounds i32, ptr %dst, i64 %10
770 %11 = load i32, ptr %arrayidx17.us.us.us, align 4
771 %add18.us.us.us = add nsw i32 %11, %9
772 store i32 %add18.us.us.us, ptr %arrayidx17.us.us.us, align 4
773 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
774 %exitcond.not = icmp eq i64 %inner.iv.next, %wide.trip.count
775 br i1 %exitcond.not, label %inner.loop.end, label %inner.loop
778 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
779 %exit.outer = icmp eq i64 %outer.iv.next, %wide.trip.count60
780 br i1 %exit.outer, label %outer.loop.end, label %outer.loop
783 %outer.outer.iv.next = add nuw nsw i64 %outer.outer.iv, 1
784 %exit.outer.outer = icmp eq i64 %outer.outer.iv.next, %wide.trip.count68
785 br i1 %exit.outer.outer, label %exit, label %outer.outer.loop
792 ; Equivalent example in C:
793 ; void uncomputable_outer_tc(int32_t *dst, int32_t *src, char *str, int n) {
795 ; while (str[i] != '\0') {
796 ; for (int j = 0; j < n; j++) {
797 ; dst[(i * (n + 1)) + j] += src[(i * n) + j];
802 ; Outer loop trip count is uncomputable so we shouldn't expand the ranges.
804 ; DEBUG-LABEL: 'uncomputable_outer_tc'
805 ; DEBUG: LAA: Found an analyzable loop: inner.loop
806 ; DEBUG: LAA: Adding RT check for range:
807 ; DEBUG-NEXT: Start: {%dst,+,(4 * (zext i32 (1 + %n) to i64))<nuw><nsw>}<%outer.loop> End: {((4 * (zext i32 %n to i64))<nuw><nsw> + %dst),+,(4 * (zext i32 (1 + %n) to i64))<nuw><nsw>}<%outer.loop>
808 ; DEBUG-NEXT: LAA: Adding RT check for range:
809 ; DEBUG-NEXT: Start: {%src,+,(4 * (zext i32 %n to i64))<nuw><nsw>}<%outer.loop> End: {((4 * (zext i32 %n to i64))<nuw><nsw> + %src),+,(4 * (zext i32 %n to i64))<nuw><nsw>}<%outer.loop>
811 define void @uncomputable_outer_tc(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, ptr nocapture noundef readonly %str, i32 noundef %n) {
812 ; CHECK-LABEL: define void @uncomputable_outer_tc
813 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], ptr noundef readonly captures(none) [[STR:%.*]], i32 noundef [[N:%.*]]) {
815 ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1
816 ; CHECK-NEXT: [[CMP_NOT23:%.*]] = icmp ne i8 [[TMP0]], 0
817 ; CHECK-NEXT: [[CMP221:%.*]] = icmp sgt i32 [[N]], 0
818 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT23]], [[CMP221]]
819 ; CHECK-NEXT: br i1 [[OR_COND]], label [[OUTER_LOOP_PREHEADER:%.*]], label [[WHILE_END:%.*]]
820 ; CHECK: outer.loop.preheader:
821 ; CHECK-NEXT: [[ADD6:%.*]] = add nuw nsw i32 [[N]], 1
822 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
823 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[ADD6]] to i64
824 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
825 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2
826 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
827 ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[WIDE_TRIP_COUNT]], 2
828 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
830 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[OUTER_LOOP_PREHEADER]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
831 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP3]], [[OUTER_IV]]
832 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
833 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], [[TMP6]]
834 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
835 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP5]], [[OUTER_IV]]
836 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP8]]
837 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP4]], [[TMP8]]
838 ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP9]]
839 ; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP1]]
840 ; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP2]]
841 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
842 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
843 ; CHECK: vector.memcheck:
844 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP3]]
845 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP1]]
846 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
847 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
849 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
850 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
851 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
852 ; CHECK: vector.body:
853 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
854 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
855 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i64 [[TMP12]], [[TMP10]]
856 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP13]]
857 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
858 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP15]], align 4, !alias.scope [[META39:![0-9]+]]
859 ; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 [[TMP12]], [[TMP11]]
860 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP16]]
861 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
862 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP18]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META39]]
863 ; CHECK-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD4]], [[WIDE_LOAD]]
864 ; CHECK-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP18]], align 4, !alias.scope [[META42]], !noalias [[META39]]
865 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
866 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
867 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
868 ; CHECK: middle.block:
869 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
870 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
872 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
873 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
875 ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
876 ; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[INNER_IV]], [[TMP10]]
877 ; CHECK-NEXT: [[ARRAYIDX5_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]]
878 ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX5_US]], align 4
879 ; CHECK-NEXT: [[TMP23:%.*]] = add nsw i64 [[INNER_IV]], [[TMP11]]
880 ; CHECK-NEXT: [[ARRAYIDX10_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP23]]
881 ; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX10_US]], align 4
882 ; CHECK-NEXT: [[ADD11_US:%.*]] = add nsw i32 [[TMP24]], [[TMP22]]
883 ; CHECK-NEXT: store i32 [[ADD11_US]], ptr [[ARRAYIDX10_US]], align 4
884 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
885 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
886 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[INNER_LOOP_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP45:![0-9]+]]
887 ; CHECK: inner.loop.exit:
888 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1
889 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i8, ptr [[STR]], i64 [[OUTER_IV_NEXT]]
890 ; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX_US]], align 1
891 ; CHECK-NEXT: [[CMP_NOT_US:%.*]] = icmp eq i8 [[TMP25]], 0
892 ; CHECK-NEXT: br i1 [[CMP_NOT_US]], label [[WHILE_END_LOOPEXIT:%.*]], label [[OUTER_LOOP]]
893 ; CHECK: while.end.loopexit:
894 ; CHECK-NEXT: br label [[WHILE_END]]
896 ; CHECK-NEXT: ret void
899 %0 = load i8, ptr %str, align 1
900 %cmp.not23 = icmp ne i8 %0, 0
901 %cmp221 = icmp sgt i32 %n, 0
902 %or.cond = and i1 %cmp.not23, %cmp221
903 br i1 %or.cond, label %outer.loop.preheader, label %while.end
905 outer.loop.preheader:
906 %add6 = add nuw nsw i32 %n, 1
907 %1 = zext i32 %n to i64
908 %2 = zext i32 %add6 to i64
909 %wide.trip.count = zext i32 %n to i64
913 %outer.iv = phi i64 [ 0, %outer.loop.preheader ], [ %outer.iv.next, %inner.loop.exit ]
914 %3 = mul nsw i64 %outer.iv, %1
915 %4 = mul nsw i64 %outer.iv, %2
919 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
920 %5 = add nsw i64 %inner.iv, %3
921 %arrayidx5.us = getelementptr inbounds i32, ptr %src, i64 %5
922 %6 = load i32, ptr %arrayidx5.us, align 4
923 %7 = add nsw i64 %inner.iv, %4
924 %arrayidx10.us = getelementptr inbounds i32, ptr %dst, i64 %7
925 %8 = load i32, ptr %arrayidx10.us, align 4
926 %add11.us = add nsw i32 %8, %6
927 store i32 %add11.us, ptr %arrayidx10.us, align 4
928 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
929 %exitcond.not = icmp eq i64 %inner.iv.next, %wide.trip.count
930 br i1 %exitcond.not, label %inner.loop.exit, label %inner.loop
933 %outer.iv.next = add i64 %outer.iv, 1
934 %arrayidx.us = getelementptr inbounds i8, ptr %str, i64 %outer.iv.next
935 %9 = load i8, ptr %arrayidx.us, align 1
936 %cmp.not.us = icmp eq i8 %9, 0
937 br i1 %cmp.not.us, label %while.end, label %outer.loop
944 ; Equivalent example in C:
945 ; void decreasing_inner_iv(int32_t *dst, int32_t *src, int stride1, int stride2, int m, int n) {
946 ; for (int i = 0; i < m; i++) {
947 ; for (int j = n; j >= 0; j--) {
948 ; dst[(i * stride1) + j] += src[(i * stride2) + j];
952 ; Inner IV is decreasing, but this isn't a problem and we can still expand the
953 ; runtime checks correctly to cover the whole loop.
955 ; DEBUG-LABEL: 'decreasing_inner_iv'
956 ; DEBUG: LAA: Found an analyzable loop: inner.loop
957 ; DEBUG: LAA: Adding RT check for range:
958 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
959 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (4 * (sext i32 %stride1 to i64))<nsw>
960 ; DEBUG-NEXT: Start: %dst End: (4 + (4 * (zext i32 %n to i64))<nuw><nsw> + (4 * (sext i32 %stride1 to i64) * (-1 + (zext i32 %m to i64))<nsw>) + %dst)
961 ; DEBUG-NEXT: LAA: Adding RT check for range:
962 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
963 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (4 * (sext i32 %stride2 to i64))<nsw>
964 ; DEBUG-NEXT: Start: %src End: (4 + (4 * (zext i32 %n to i64))<nuw><nsw> + (4 * (sext i32 %stride2 to i64) * (-1 + (zext i32 %m to i64))<nsw>) + %src)
966 define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %stride1, i32 noundef %stride2, i32 noundef %m, i32 noundef %n) {
967 ; CHECK-LABEL: define void @decreasing_inner_iv
968 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[STRIDE1:%.*]], i32 noundef [[STRIDE2:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
970 ; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[M]], 0
971 ; CHECK-NEXT: [[CMP218:%.*]] = icmp sgt i32 [[N]], -1
972 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP20]], [[CMP218]]
973 ; CHECK-NEXT: br i1 [[OR_COND]], label [[OUTER_LOOP_PRE:%.*]], label [[EXIT:%.*]]
974 ; CHECK: outer.loop.pre:
975 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
976 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[STRIDE2]] to i64
977 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[STRIDE1]] to i64
978 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
979 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
980 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP2]]
981 ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2
982 ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 2
983 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP5]], [[TMP6]]
984 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 4
985 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
986 ; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP2]], 2
987 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP3]], [[TMP1]]
988 ; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 2
989 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP6]]
990 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 4
991 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
992 ; CHECK-NEXT: [[TMP14:%.*]] = shl nsw i64 [[TMP1]], 2
993 ; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP0]], 1
994 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
996 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[OUTER_LOOP_PRE]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
997 ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP1]]
998 ; CHECK-NEXT: [[TMP17:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP2]]
999 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP15]], 4
1000 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1001 ; CHECK: vector.memcheck:
1002 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
1003 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
1004 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1005 ; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP9]], 0
1006 ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
1007 ; CHECK-NEXT: [[STRIDE_CHECK2:%.*]] = icmp slt i64 [[TMP14]], 0
1008 ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[STRIDE_CHECK2]]
1009 ; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1011 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP15]], 4
1012 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP15]], [[N_MOD_VF]]
1013 ; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
1014 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1015 ; CHECK: vector.body:
1016 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1017 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[TMP0]], [[INDEX]]
1018 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0
1019 ; CHECK-NEXT: [[TMP22:%.*]] = add nsw i64 [[TMP21]], [[TMP16]]
1020 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP22]]
1021 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
1022 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 -3
1023 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META46:![0-9]+]]
1024 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1025 ; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP21]], [[TMP17]]
1026 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP26]]
1027 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 0
1028 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 -3
1029 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]]
1030 ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1031 ; CHECK-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]]
1032 ; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP30]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1033 ; CHECK-NEXT: store <4 x i32> [[REVERSE5]], ptr [[TMP29]], align 4, !alias.scope [[META49]], !noalias [[META46]]
1034 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1035 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1036 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP51:![0-9]+]]
1037 ; CHECK: middle.block:
1038 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP15]], [[N_VEC]]
1039 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
1041 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[OUTER_LOOP]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ]
1042 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
1043 ; CHECK: inner.loop:
1044 ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
1045 ; CHECK-NEXT: [[TMP32:%.*]] = add nsw i64 [[INNER_IV]], [[TMP16]]
1046 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP32]]
1047 ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1048 ; CHECK-NEXT: [[TMP34:%.*]] = add nsw i64 [[INNER_IV]], [[TMP17]]
1049 ; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP34]]
1050 ; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4
1051 ; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP35]], [[TMP33]]
1052 ; CHECK-NEXT: store i32 [[ADD9_US]], ptr [[ARRAYIDX8_US]], align 4
1053 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], -1
1054 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[INNER_IV]], 0
1055 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[INNER_LOOP]], label [[INNER_LOOP_EXIT]], !llvm.loop [[LOOP52:![0-9]+]]
1056 ; CHECK: inner.loop.exit:
1057 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
1058 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1059 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[OUTER_LOOP_EXIT:%.*]], label [[OUTER_LOOP]]
1060 ; CHECK: outer.loop.exit:
1061 ; CHECK-NEXT: br label [[EXIT]]
1063 ; CHECK-NEXT: ret void
1066 %cmp20 = icmp sgt i32 %m, 0
1067 %cmp218 = icmp sgt i32 %n, -1
1068 %or.cond = and i1 %cmp20, %cmp218
1069 br i1 %or.cond, label %outer.loop.pre, label %exit
1072 %0 = zext i32 %n to i64
1073 %1 = sext i32 %stride2 to i64
1074 %2 = sext i32 %stride1 to i64
1075 %wide.trip.count = zext i32 %m to i64
1076 br label %outer.loop
1079 %outer.iv = phi i64 [ 0, %outer.loop.pre ], [ %outer.iv.next, %inner.loop.exit ]
1080 %3 = mul nsw i64 %outer.iv, %1
1081 %4 = mul nsw i64 %outer.iv, %2
1082 br label %inner.loop
1085 %inner.iv = phi i64 [ %0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
1086 %5 = add nsw i64 %inner.iv, %3
1087 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %5
1088 %6 = load i32, ptr %arrayidx.us, align 4
1089 %7 = add nsw i64 %inner.iv, %4
1090 %arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %7
1091 %8 = load i32, ptr %arrayidx8.us, align 4
1092 %add9.us = add nsw i32 %8, %6
1093 store i32 %add9.us, ptr %arrayidx8.us, align 4
1094 %inner.iv.next = add nsw i64 %inner.iv, -1
1095 %cmp2.us = icmp sgt i64 %inner.iv, 0
1096 br i1 %cmp2.us, label %inner.loop, label %inner.loop.exit
1099 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
1100 %exitcond.not = icmp eq i64 %outer.iv.next, %wide.trip.count
1101 br i1 %exitcond.not, label %outer.loop.exit, label %outer.loop
1111 ; Equivalent example in C:
1112 ; void decreasing_outer_iv(int32_t *dst, int32_t *src, int stride1, int stride2, int m, int n) {
1113 ; for (int i = m - 1; i >= 0; i--) {
1114 ; for (int j = 0; j <= n; j++) {
1115 ; dst[(i * stride1) + j] += src[(i * stride2) + j];
1119 ; Outer IV is decreasing, but the direction of memory accesses also depends
1120 ; upon the signedness of stride1.
1122 ; DEBUG-LABEL: 'decreasing_outer_iv'
1123 ; DEBUG: LAA: Found an analyzable loop: inner.loop
1124 ; DEBUG: LAA: Adding RT check for range:
1125 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
1126 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (-4 * (sext i32 %stride1 to i64))<nsw>
1127 ; DEBUG-NEXT: Start: ((4 * (zext i32 %m to i64) * (sext i32 %stride1 to i64)) + %dst) End: ((4 * (zext i32 (1 + %n) to i64))<nuw><nsw> + (4 * (sext i32 %stride1 to i64))<nsw> + %dst)
1128 ; DEBUG-NEXT: LAA: Adding RT check for range:
1129 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
1130 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (-4 * (sext i32 %stride2 to i64))<nsw>
1131 ; DEBUG-NEXT: Start: ((4 * (zext i32 %m to i64) * (sext i32 %stride2 to i64)) + %src) End: ((4 * (zext i32 (1 + %n) to i64))<nuw><nsw> + (4 * (sext i32 %stride2 to i64))<nsw> + %src)
1133 define void @decreasing_outer_iv(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %stride1, i32 noundef %stride2, i32 noundef %m, i32 noundef %n) {
1134 ; CHECK-LABEL: define void @decreasing_outer_iv
1135 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[STRIDE1:%.*]], i32 noundef [[STRIDE2:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
1136 ; CHECK-NEXT: entry:
1137 ; CHECK-NEXT: [[CMP21:%.*]] = icmp slt i32 [[M]], 1
1138 ; CHECK-NEXT: [[CMP2_NOT18:%.*]] = icmp slt i32 [[N]], 0
1139 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP21]], [[CMP2_NOT18]]
1140 ; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[OUTER_LOOP_PRE:%.*]]
1141 ; CHECK: outer.loop.pre:
1142 ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[N]], 1
1143 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[M]] to i64
1144 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[STRIDE1]] to i64
1145 ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[STRIDE2]] to i64
1146 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
1147 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP1]]
1148 ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2
1149 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
1150 ; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP2]], 2
1151 ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
1152 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP6]], [[TMP7]]
1153 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
1154 ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw i64 [[TMP2]], -4
1155 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP3]], [[TMP1]]
1156 ; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 2
1157 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]]
1158 ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i64 [[TMP3]], 2
1159 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[TMP7]]
1160 ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
1161 ; CHECK-NEXT: [[TMP14:%.*]] = mul nsw i64 [[TMP3]], -4
1162 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
1163 ; CHECK: outer.loop:
1164 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[TMP1]], [[OUTER_LOOP_PRE]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
1165 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], -1
1166 ; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP3]]
1167 ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP2]]
1168 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
1169 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1170 ; CHECK: vector.memcheck:
1171 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP3]]
1172 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP1]]
1173 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1174 ; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP9]], 0
1175 ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
1176 ; CHECK-NEXT: [[STRIDE_CHECK4:%.*]] = icmp slt i64 [[TMP14]], 0
1177 ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[STRIDE_CHECK4]]
1178 ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1180 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
1181 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
1182 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1183 ; CHECK: vector.body:
1184 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1185 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 0
1186 ; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[TMP19]], [[TMP15]]
1187 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP20]]
1188 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0
1189 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4, !alias.scope [[META53:![0-9]+]]
1190 ; CHECK-NEXT: [[TMP23:%.*]] = add nsw i64 [[TMP19]], [[TMP16]]
1191 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP23]]
1192 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0
1193 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META56:![0-9]+]], !noalias [[META53]]
1194 ; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> [[WIDE_LOAD5]], [[WIDE_LOAD]]
1195 ; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP25]], align 4, !alias.scope [[META56]], !noalias [[META53]]
1196 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1197 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1198 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
1199 ; CHECK: middle.block:
1200 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
1201 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
1203 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1204 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
1205 ; CHECK: inner.loop:
1206 ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
1207 ; CHECK-NEXT: [[TMP28:%.*]] = add nsw i64 [[INNER_IV]], [[TMP15]]
1208 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP28]]
1209 ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1210 ; CHECK-NEXT: [[TMP30:%.*]] = add nsw i64 [[INNER_IV]], [[TMP16]]
1211 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP30]]
1212 ; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4
1213 ; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP29]]
1214 ; CHECK-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX8]], align 4
1215 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
1216 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1217 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[INNER_LOOP_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
1218 ; CHECK: inner.loop.exit:
1219 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[OUTER_IV]], 1
1220 ; CHECK-NEXT: br i1 [[CMP]], label [[OUTER_LOOP]], label [[OUTER_LOOP_EXIT:%.*]]
1221 ; CHECK: outer.loop.exit:
1222 ; CHECK-NEXT: br label [[EXIT]]
1224 ; CHECK-NEXT: ret void
1227 %cmp21 = icmp slt i32 %m, 1
1228 %cmp2.not18 = icmp slt i32 %n, 0
1229 %or.cond = or i1 %cmp21, %cmp2.not18
1230 br i1 %or.cond, label %exit, label %outer.loop.pre
1233 %0 = add nuw i32 %n, 1
1234 %1 = zext i32 %m to i64
1235 %2 = sext i32 %stride1 to i64
1236 %3 = sext i32 %stride2 to i64
1237 %wide.trip.count = zext i32 %0 to i64
1238 br label %outer.loop
1241 %outer.iv = phi i64 [ %1, %outer.loop.pre ], [ %outer.iv.next, %inner.loop.exit ]
1242 %outer.iv.next = add nsw i64 %outer.iv, -1
1243 %4 = mul nsw i64 %outer.iv, %3
1244 %5 = mul nsw i64 %outer.iv, %2
1245 br label %inner.loop
1248 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
1249 %6 = add nsw i64 %inner.iv, %4
1250 %arrayidx = getelementptr inbounds i32, ptr %src, i64 %6
1251 %7 = load i32, ptr %arrayidx, align 4
1252 %8 = add nsw i64 %inner.iv, %5
1253 %arrayidx8 = getelementptr inbounds i32, ptr %dst, i64 %8
1254 %9 = load i32, ptr %arrayidx8, align 4
1255 %add9 = add nsw i32 %9, %7
1256 store i32 %add9, ptr %arrayidx8, align 4
1257 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
1258 %exitcond.not = icmp eq i64 %inner.iv.next, %wide.trip.count
1259 br i1 %exitcond.not, label %inner.loop.exit, label %inner.loop
1262 %cmp = icmp sgt i64 %outer.iv, 1
1263 br i1 %cmp, label %outer.loop, label %outer.loop.exit
1273 ; Equivalent example in C:
1274 ; void foo(int32_t *dst, int32_t *src, int stride1, int stride2, int m, int n) {
1275 ; for (int i = 0; i < m; i++) {
1276 ; for (int j = 0; j < n; j++) {
1277 ; dst[(i * (n + 1)) + (j * stride1)] += src[(i * n) + (j * stride2)];
1283 ; DEBUG-LABEL: 'unknown_inner_stride'
1284 ; DEBUG: LAA: Found an analyzable loop: inner.loop
1285 ; DEBUG: LAA: Adding RT check for range:
1286 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
1287 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %n to i64))<nuw><nsw> + (4 * (zext i32 (1 + %n) to i64) * (-1 + (zext i32 %m to i64))<nsw>) + %dst)
1288 ; DEBUG-NEXT: LAA: Adding RT check for range:
1289 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
1290 ; DEBUG-NEXT: Start: %src End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %src)
1292 define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i32 noundef %stride1, i32 noundef %stride2, i32 noundef %m, i32 noundef %n) {
1293 ; CHECK-LABEL: define void @unknown_inner_stride
1294 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i32 noundef [[STRIDE1:%.*]], i32 noundef [[STRIDE2:%.*]], i32 noundef [[M:%.*]], i32 noundef [[N:%.*]]) {
1295 ; CHECK-NEXT: entry:
1296 ; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[M]], 0
1297 ; CHECK-NEXT: [[CMP224:%.*]] = icmp sgt i32 [[N]], 0
1298 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP26]], [[CMP224]]
1299 ; CHECK-NEXT: br i1 [[OR_COND]], label [[OUTER_LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
1300 ; CHECK: outer.loop.preheader:
1301 ; CHECK-NEXT: [[ADD6:%.*]] = add nuw nsw i32 [[N]], 1
1302 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[STRIDE2]] to i64
1303 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[STRIDE1]] to i64
1304 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64
1305 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[ADD6]] to i64
1306 ; CHECK-NEXT: [[WIDE_TRIP_COUNT39:%.*]] = zext i32 [[M]] to i64
1307 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1308 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT39]], -1
1309 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP3]]
1310 ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
1311 ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
1312 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP6]], [[TMP7]]
1313 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
1314 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[WIDE_TRIP_COUNT]], [[WIDE_TRIP_COUNT39]]
1315 ; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 2
1316 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP10]]
1317 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
1318 ; CHECK: outer.loop:
1319 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[OUTER_LOOP_PREHEADER]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
1320 ; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP2]]
1321 ; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 [[OUTER_IV]], [[TMP3]]
1322 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
1323 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1324 ; CHECK: vector.scevcheck:
1325 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STRIDE1]], 1
1326 ; CHECK-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i32 [[STRIDE2]], 1
1327 ; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK1]]
1328 ; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
1329 ; CHECK: vector.memcheck:
1330 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
1331 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
1332 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1333 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1335 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
1336 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
1337 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1338 ; CHECK: vector.body:
1339 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1340 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
1341 ; CHECK-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1342 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP15]]
1343 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
1344 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4, !alias.scope [[META60:![0-9]+]]
1345 ; CHECK-NEXT: [[TMP18:%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1346 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP18]]
1347 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
1348 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1349 ; CHECK-NEXT: [[TMP21:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1350 ; CHECK-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP20]], align 4, !alias.scope [[META63]], !noalias [[META60]]
1351 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1352 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1353 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
1354 ; CHECK: middle.block:
1355 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
1356 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
1358 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1359 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
1360 ; CHECK: inner.loop:
1361 ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
1362 ; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1363 ; CHECK-NEXT: [[TMP24:%.*]] = add nsw i64 [[TMP23]], [[TMP11]]
1364 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24]]
1365 ; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1366 ; CHECK-NEXT: [[TMP26:%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1367 ; CHECK-NEXT: [[TMP27:%.*]] = add nsw i64 [[TMP26]], [[TMP12]]
1368 ; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP27]]
1369 ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1370 ; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP28]], [[TMP25]]
1371 ; CHECK-NEXT: store i32 [[ADD12_US]], ptr [[ARRAYIDX11_US]], align 4
1372 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
1373 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1374 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[INNER_LOOP_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP66:![0-9]+]]
1375 ; CHECK: inner.loop.exit:
1376 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
1377 ; CHECK-NEXT: [[EXITCOND40_NOT:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[WIDE_TRIP_COUNT39]]
1378 ; CHECK-NEXT: br i1 [[EXITCOND40_NOT]], label [[EXIT_LOOPEXIT:%.*]], label [[OUTER_LOOP]]
1379 ; CHECK: exit.loopexit:
1380 ; CHECK-NEXT: br label [[EXIT]]
1382 ; CHECK-NEXT: ret void
1385 %cmp26 = icmp sgt i32 %m, 0
1386 %cmp224 = icmp sgt i32 %n, 0
1387 %or.cond = and i1 %cmp26, %cmp224
1388 br i1 %or.cond, label %outer.loop.preheader, label %exit
1390 outer.loop.preheader:
1391 %add6 = add nuw nsw i32 %n, 1
1392 %0 = sext i32 %stride2 to i64
1393 %1 = sext i32 %stride1 to i64
1394 %2 = zext i32 %n to i64
1395 %3 = zext i32 %add6 to i64
1396 %wide.trip.count39 = zext i32 %m to i64
1397 %wide.trip.count = zext i32 %n to i64
1398 br label %outer.loop
1401 %outer.iv = phi i64 [ 0, %outer.loop.preheader ], [ %outer.iv.next, %inner.loop.exit ]
1402 %4 = mul nsw i64 %outer.iv, %2
1403 %5 = mul nsw i64 %outer.iv, %3
1404 br label %inner.loop
1407 %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
1408 %6 = mul nsw i64 %inner.iv, %0
1409 %7 = add nsw i64 %6, %4
1410 %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %7
1411 %8 = load i32, ptr %arrayidx.us, align 4
1412 %9 = mul nsw i64 %inner.iv, %1
1413 %10 = add nsw i64 %9, %5
1414 %arrayidx11.us = getelementptr inbounds i32, ptr %dst, i64 %10
1415 %11 = load i32, ptr %arrayidx11.us, align 4
1416 %add12.us = add nsw i32 %11, %8
1417 store i32 %add12.us, ptr %arrayidx11.us, align 4
1418 %inner.iv.next = add nuw nsw i64 %inner.iv, 1
1419 %exitcond.not = icmp eq i64 %inner.iv.next, %wide.trip.count
1420 br i1 %exitcond.not, label %inner.loop.exit, label %inner.loop
1423 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
1424 %exitcond40.not = icmp eq i64 %outer.iv.next, %wide.trip.count39
1425 br i1 %exitcond40.not, label %exit, label %outer.loop
1432 ; Test case where the AddRec for the pointers in the inner loop have the AddRec
1433 ; of the outer loop as start value. It is sufficient to subtract the start
1434 ; values (%dst, %src) of the outer AddRecs.
1435 define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %m, i64 noundef %n) {
1436 ; CHECK-LABEL: define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec
1437 ; CHECK-SAME: (ptr noundef captures(none) [[DST:%.*]], ptr noundef readonly captures(none) [[SRC:%.*]], i64 noundef [[M:%.*]], i64 noundef [[N:%.*]]) {
1438 ; CHECK-NEXT: entry:
1439 ; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
1440 ; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
1441 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]]
1442 ; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
1443 ; CHECK: outer.loop:
1444 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_EXIT:%.*]] ]
1445 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[OUTER_IV]], [[N]]
1446 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
1447 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1448 ; CHECK: vector.memcheck:
1449 ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
1450 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1452 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
1453 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1454 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1455 ; CHECK: vector.body:
1456 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1457 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
1458 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], [[MUL]]
1459 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
1460 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
1461 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
1462 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]]
1463 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 10)
1464 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
1465 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4
1466 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1467 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1468 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP67:![0-9]+]]
1469 ; CHECK: middle.block:
1470 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1471 ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]]
1473 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1474 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
1475 ; CHECK: inner.loop:
1476 ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
1477 ; CHECK-NEXT: [[IDX:%.*]] = add nuw nsw i64 [[IV_INNER]], [[MUL]]
1478 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IDX]]
1479 ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
1480 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IDX]]
1481 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L]], 10
1482 ; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_DST]], align 4
1483 ; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1
1484 ; CHECK-NEXT: [[INNER_EXIT_COND:%.*]] = icmp eq i64 [[IV_INNER_NEXT]], [[N]]
1485 ; CHECK-NEXT: br i1 [[INNER_EXIT_COND]], label [[INNER_EXIT]], label [[INNER_LOOP]], !llvm.loop [[LOOP68:![0-9]+]]
1486 ; CHECK: inner.exit:
1487 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
1488 ; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[M]]
1489 ; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label [[OUTER_EXIT:%.*]], label [[OUTER_LOOP]]
1490 ; CHECK: outer.exit:
1491 ; CHECK-NEXT: ret void
1494 br label %outer.loop
1497 %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
1498 %mul = mul nsw i64 %outer.iv, %n
1499 br label %inner.loop
1502 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
1503 %idx = add nuw nsw i64 %iv.inner, %mul
1504 %gep.src = getelementptr inbounds i32, ptr %src, i64 %idx
1505 %l = load i32, ptr %gep.src, align 4
1506 %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %idx
1507 %add = add nsw i32 %l, 10
1508 store i32 %add, ptr %gep.dst, align 4
1509 %iv.inner.next = add nuw nsw i64 %iv.inner, 1
1510 %inner.exit.cond = icmp eq i64 %iv.inner.next, %n
1511 br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
1514 %outer.iv.next = add nuw nsw i64 %outer.iv, 1
1515 %outer.exit.cond = icmp eq i64 %outer.iv.next, %m
1516 br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
1522 ; The stride for the access in the inner loop is known to be non-negative via
1524 define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
1525 ; CHECK-LABEL: define void @stride_check_known_via_loop_guard
1526 ; CHECK-SAME: (ptr [[C:%.*]], ptr [[A:%.*]], i32 [[ACOLS:%.*]]) {
1527 ; CHECK-NEXT: entry:
1528 ; CHECK-NEXT: [[PRE_C:%.*]] = icmp ugt i32 [[ACOLS]], 0
1529 ; CHECK-NEXT: br i1 [[PRE_C]], label [[EXIT:%.*]], label [[OUTER_HEADER_PREHEADER:%.*]]
1530 ; CHECK: outer.header.preheader:
1531 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[C]], i64 8000
1532 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
1533 ; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
1534 ; CHECK: outer.header:
1535 ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER]] ]
1536 ; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[OUTER_IV]], [[ACOLS]]
1537 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr double, ptr [[A]], i32 [[MUL_US]]
1538 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1539 ; CHECK: vector.memcheck:
1540 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[C]], [[SCEVGEP1]]
1541 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
1542 ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1543 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1545 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1546 ; CHECK: vector.body:
1547 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1548 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
1549 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP0]]
1550 ; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]]
1551 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i64 0
1552 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1553 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
1554 ; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8, !alias.scope [[META72:![0-9]+]], !noalias [[META69]]
1555 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1556 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
1557 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
1558 ; CHECK: middle.block:
1559 ; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]]
1561 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_HEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1562 ; CHECK-NEXT: br label [[INNER:%.*]]
1564 ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
1565 ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INNER_IV]]
1566 ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8
1567 ; CHECK-NEXT: store double [[L]], ptr [[GEP_C]], align 8
1568 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add i32 [[INNER_IV]], 1
1569 ; CHECK-NEXT: [[INNER_C:%.*]] = icmp eq i32 [[INNER_IV_NEXT]], 1000
1570 ; CHECK-NEXT: br i1 [[INNER_C]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP75:![0-9]+]]
1571 ; CHECK: outer.latch:
1572 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i32 [[OUTER_IV]], 1
1573 ; CHECK-NEXT: [[OUTER_C:%.*]] = icmp ult i32 [[OUTER_IV]], 128
1574 ; CHECK-NEXT: br i1 [[OUTER_C]], label [[EXIT_LOOPEXIT:%.*]], label [[OUTER_HEADER]]
1575 ; CHECK: exit.loopexit:
1576 ; CHECK-NEXT: br label [[EXIT]]
1578 ; CHECK-NEXT: ret void
1581 %pre.c = icmp ugt i32 %Acols, 0
1582 br i1 %pre.c, label %exit, label %outer.header
1585 %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
1586 %mul.us = mul i32 %outer.iv, %Acols
1587 %arrayidx.us = getelementptr double, ptr %A, i32 %mul.us
1591 %inner.iv = phi i32 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
1592 %gep.C = getelementptr inbounds double, ptr %C, i32 %inner.iv
1593 %l = load double, ptr %arrayidx.us, align 8
1594 store double %l, ptr %gep.C, align 8
1595 %inner.iv.next = add i32 %inner.iv, 1
1596 %inner.c = icmp eq i32 %inner.iv.next, 1000
1597 br i1 %inner.c, label %outer.latch, label %inner
1600 %outer.iv.next = add i32 %outer.iv, 1
1601 %outer.c = icmp ult i32 %outer.iv, 128
1602 br i1 %outer.c, label %exit, label %outer.header