1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=loop-versioning -S < %s | FileCheck %s -check-prefix=LV
4 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
8 ; for (int i = 0; i < n; i++) {
9 ; A[2 * index] = A[2 * index] + B[i];
13 ; SCEV is unable to prove that A[2 * i] does not overflow.
15 ; Analyzing the IR does not help us because the GEPs are not
16 ; affine AddRecExprs. However, we can turn them into AddRecExprs
17 ; using SCEV Predicates.
19 ; Once we have an affine expression we need to add an additional NUSW
20 ; to check that the pointers don't wrap since the GEPs are not
23 ; The expression for %mul_ext as analyzed by SCEV is
24 ; (zext i32 {0,+,2}<%for.body> to i64)
25 ; We have added the nusw flag to turn this expression into the SCEV expression:
26 ; i64 {0,+,2}<%for.body>
28 define void @f1(ptr noalias %a,
30 ; LV-NEXT: for.body.lver.check:
31 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
32 ; LV-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
33 ; LV-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
34 ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
35 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
36 ; LV-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
37 ; LV-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]]
38 ; LV-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
39 ; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
40 ; LV-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
41 ; LV-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
42 ; LV: for.body.ph.lver.orig:
43 ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
44 ; LV: for.body.lver.orig:
45 ; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
46 ; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
47 ; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
48 ; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
49 ; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
50 ; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
51 ; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
52 ; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
53 ; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]]
54 ; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2
55 ; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
56 ; LV-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1
57 ; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]]
58 ; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
60 ; LV-NEXT: br label [[FOR_BODY:%.*]]
62 ; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
63 ; LV-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
64 ; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2
65 ; LV-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64
66 ; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]]
67 ; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2
68 ; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]]
69 ; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2
70 ; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]]
71 ; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2
72 ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
73 ; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1
74 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
75 ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
76 ; LV: for.end.loopexit:
77 ; LV-NEXT: br label [[FOR_END:%.*]]
78 ; LV: for.end.loopexit2:
79 ; LV-NEXT: br label [[FOR_END]]
83 ptr noalias %b, i64 %N) {
87 for.body: ; preds = %for.body, %entry
88 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
89 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
91 %mul = mul i32 %ind1, 2
92 %mul_ext = zext i32 %mul to i64
94 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
95 %loadA = load i16, ptr %arrayidxA, align 2
97 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
98 %loadB = load i16, ptr %arrayidxB, align 2
100 %add = mul i16 %loadA, %loadB
102 store i16 %add, ptr %arrayidxA, align 2
104 %inc = add nuw nsw i64 %ind, 1
105 %inc1 = add i32 %ind1, 1
107 %exitcond = icmp eq i64 %inc, %N
108 br i1 %exitcond, label %for.end, label %for.body
110 for.end: ; preds = %for.body
115 ; unsigned index = n;
116 ; for (int i = 0; i < n; i++) {
117 ; A[2 * index] = A[2 * index] + B[i];
121 ; the SCEV expression for 2 * index is not an AddRecExpr
122 ; (and implictly not affine). However, we are able to make assumptions
123 ; that will turn the expression into an affine one and continue the
126 ; Once we have an affine expression we need to add an additional NUSW
127 ; to check that the pointers don't wrap since the GEPs are not
130 ; This loop has a negative stride for A, and the nusw flag is required in
131 ; order to properly extend the increment from i32 -4 to i64 -4.
133 ; The expression for %mul_ext as analyzed by SCEV is
134 ; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
135 ; We have added the nusw flag to turn this expression into the following SCEV:
136 ; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
138 define void @f2(ptr noalias %a,
140 ; LV-NEXT: for.body.lver.check:
141 ; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32
142 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
143 ; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1
144 ; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
145 ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
146 ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
147 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
148 ; LV-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
149 ; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], [[TMP1]]
150 ; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
151 ; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
152 ; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
153 ; LV-NEXT: [[TMP8:%.*]] = trunc i64 [[N]] to i31
154 ; LV-NEXT: [[TMP9:%.*]] = zext i31 [[TMP8]] to i64
155 ; LV-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
156 ; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP10]]
157 ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
158 ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
159 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
160 ; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
161 ; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP11]]
162 ; LV-NEXT: [[TMP13:%.*]] = icmp ugt ptr [[TMP12]], [[SCEVGEP]]
163 ; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW4]]
164 ; LV-NEXT: [[TMP15:%.*]] = or i1 [[TMP7]], [[TMP14]]
165 ; LV-NEXT: br i1 [[TMP15]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
166 ; LV: for.body.ph.lver.orig:
167 ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
168 ; LV: for.body.lver.orig:
169 ; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
170 ; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
171 ; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
172 ; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
173 ; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
174 ; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
175 ; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
176 ; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
177 ; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]]
178 ; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2
179 ; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
180 ; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1
181 ; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]]
182 ; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
184 ; LV-NEXT: br label [[FOR_BODY:%.*]]
186 ; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
187 ; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ]
188 ; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2
189 ; LV-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64
190 ; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]]
191 ; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2
192 ; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]]
193 ; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2
194 ; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]]
195 ; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2
196 ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
197 ; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1
198 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
199 ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
200 ; LV: for.end.loopexit:
201 ; LV-NEXT: br label [[FOR_END:%.*]]
202 ; LV: for.end.loopexit5:
203 ; LV-NEXT: br label [[FOR_END]]
207 ptr noalias %b, i64 %N) {
209 %TruncN = trunc i64 %N to i32
212 for.body: ; preds = %for.body, %entry
213 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
214 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
216 %mul = mul i32 %ind1, 2
217 %mul_ext = zext i32 %mul to i64
219 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
220 %loadA = load i16, ptr %arrayidxA, align 2
222 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
223 %loadB = load i16, ptr %arrayidxB, align 2
225 %add = mul i16 %loadA, %loadB
227 store i16 %add, ptr %arrayidxA, align 2
229 %inc = add nuw nsw i64 %ind, 1
230 %dec = sub i32 %ind1, 1
232 %exitcond = icmp eq i64 %inc, %N
233 br i1 %exitcond, label %for.end, label %for.body
235 for.end: ; preds = %for.body
239 ; We replicate the tests above, but this time sign extend 2 * index instead
240 ; of zero extending it.
242 ; The expression for %mul_ext as analyzed by SCEV is
243 ; i64 (sext i32 {0,+,2}<%for.body> to i64)
244 ; We have added the nssw flag to turn this expression into the following SCEV:
245 ; i64 {0,+,2}<%for.body>
247 define void @f3(ptr noalias %a,
249 ; LV-NEXT: for.body.lver.check:
250 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
251 ; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
252 ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
253 ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
254 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
255 ; LV-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_RESULT]], 0
256 ; LV-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]]
257 ; LV-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
258 ; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
259 ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
260 ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
261 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
262 ; LV-NEXT: [[TMP6:%.*]] = sub i64 0, [[MUL_RESULT3]]
263 ; LV-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT3]]
264 ; LV-NEXT: [[TMP8:%.*]] = icmp ult ptr [[TMP7]], [[A]]
265 ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW4]]
266 ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]]
267 ; LV-NEXT: br i1 [[TMP10]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
268 ; LV: for.body.ph.lver.orig:
269 ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
270 ; LV: for.body.lver.orig:
271 ; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
272 ; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
273 ; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
274 ; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64
275 ; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
276 ; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
277 ; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
278 ; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
279 ; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]]
280 ; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2
281 ; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
282 ; LV-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1
283 ; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]]
284 ; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
286 ; LV-NEXT: br label [[FOR_BODY:%.*]]
288 ; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
289 ; LV-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
290 ; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2
291 ; LV-NEXT: [[MUL_EXT:%.*]] = sext i32 [[MUL]] to i64
292 ; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]]
293 ; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2
294 ; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]]
295 ; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2
296 ; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]]
297 ; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2
298 ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
299 ; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1
300 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
301 ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
302 ; LV: for.end.loopexit:
303 ; LV-NEXT: br label [[FOR_END:%.*]]
304 ; LV: for.end.loopexit5:
305 ; LV-NEXT: br label [[FOR_END]]
309 ptr noalias %b, i64 %N) {
313 for.body: ; preds = %for.body, %entry
314 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
315 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
317 %mul = mul i32 %ind1, 2
318 %mul_ext = sext i32 %mul to i64
320 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
321 %loadA = load i16, ptr %arrayidxA, align 2
323 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
324 %loadB = load i16, ptr %arrayidxB, align 2
326 %add = mul i16 %loadA, %loadB
328 store i16 %add, ptr %arrayidxA, align 2
330 %inc = add nuw nsw i64 %ind, 1
331 %inc1 = add i32 %ind1, 1
333 %exitcond = icmp eq i64 %inc, %N
334 br i1 %exitcond, label %for.end, label %for.body
336 for.end: ; preds = %for.body
340 define void @f4(ptr noalias %a,
342 ; LV-NEXT: for.body.lver.check:
343 ; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32
344 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
345 ; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1
346 ; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
347 ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
348 ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
349 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
350 ; LV-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
351 ; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP1]]
352 ; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
353 ; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
354 ; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
355 ; LV-NEXT: [[TMP8:%.*]] = sext i32 [[TMP1]] to i64
356 ; LV-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP8]], 1
357 ; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP9]]
358 ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
359 ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
360 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
361 ; LV-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]]
362 ; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP10]]
363 ; LV-NEXT: [[TMP12:%.*]] = icmp ugt ptr [[TMP11]], [[SCEVGEP]]
364 ; LV-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW4]]
365 ; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP7]], [[TMP13]]
366 ; LV-NEXT: br i1 [[TMP14]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
367 ; LV: for.body.ph.lver.orig:
368 ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
369 ; LV: for.body.lver.orig:
370 ; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
371 ; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
372 ; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
373 ; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64
374 ; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
375 ; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
376 ; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
377 ; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
378 ; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]]
379 ; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2
380 ; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
381 ; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1
382 ; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]]
383 ; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
385 ; LV-NEXT: br label [[FOR_BODY:%.*]]
387 ; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
388 ; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ]
389 ; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2
390 ; LV-NEXT: [[MUL_EXT:%.*]] = sext i32 [[MUL]] to i64
391 ; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]]
392 ; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2
393 ; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]]
394 ; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2
395 ; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]]
396 ; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2
397 ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
398 ; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1
399 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
400 ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
401 ; LV: for.end.loopexit:
402 ; LV-NEXT: br label [[FOR_END:%.*]]
403 ; LV: for.end.loopexit5:
404 ; LV-NEXT: br label [[FOR_END]]
408 ptr noalias %b, i64 %N) {
410 %TruncN = trunc i64 %N to i32
413 for.body: ; preds = %for.body, %entry
414 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
415 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
417 %mul = mul i32 %ind1, 2
418 %mul_ext = sext i32 %mul to i64
420 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
421 %loadA = load i16, ptr %arrayidxA, align 2
423 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
424 %loadB = load i16, ptr %arrayidxB, align 2
426 %add = mul i16 %loadA, %loadB
428 store i16 %add, ptr %arrayidxA, align 2
430 %inc = add nuw nsw i64 %ind, 1
431 %dec = sub i32 %ind1, 1
433 %exitcond = icmp eq i64 %inc, %N
434 br i1 %exitcond, label %for.end, label %for.body
436 for.end: ; preds = %for.body
440 ; The following function is similar to the one above, but has the GEP
441 ; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
442 ; This means that the SCEV expression for %mul can wrap and we need
443 ; a SCEV predicate to continue analysis.
445 ; We can still analyze this by adding the required no wrap SCEV predicates.
447 define void @f5(ptr noalias %a,
449 ; LV-NEXT: for.body.lver.check:
450 ; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32
451 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
452 ; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1
453 ; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
454 ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
455 ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
456 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
457 ; LV-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
458 ; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP1]]
459 ; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
460 ; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
461 ; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP5]], [[TMP6]]
462 ; LV-NEXT: br i1 [[TMP14]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
463 ; LV: for.body.ph.lver.orig:
464 ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
465 ; LV: for.body.lver.orig:
466 ; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
467 ; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
468 ; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
469 ; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i32 [[MUL_LVER_ORIG]]
470 ; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
471 ; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
472 ; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
473 ; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]]
474 ; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2
475 ; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
476 ; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1
477 ; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]]
478 ; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
480 ; LV-NEXT: br label [[FOR_BODY:%.*]]
482 ; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
483 ; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ]
484 ; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2
485 ; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[MUL]]
486 ; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2
487 ; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[IND]]
488 ; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2
489 ; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]]
490 ; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2
491 ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
492 ; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1
493 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
494 ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
495 ; LV: for.end.loopexit:
496 ; LV-NEXT: br label [[FOR_END:%.*]]
497 ; LV: for.end.loopexit2:
498 ; LV-NEXT: br label [[FOR_END]]
502 ptr noalias %b, i64 %N) {
504 %TruncN = trunc i64 %N to i32
507 for.body: ; preds = %for.body, %entry
508 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
509 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
511 %mul = mul i32 %ind1, 2
513 %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul
514 %loadA = load i16, ptr %arrayidxA, align 2
516 %arrayidxB = getelementptr inbounds i16, ptr %b, i64 %ind
517 %loadB = load i16, ptr %arrayidxB, align 2
519 %add = mul i16 %loadA, %loadB
521 store i16 %add, ptr %arrayidxA, align 2
523 %inc = add nuw nsw i64 %ind, 1
524 %dec = sub i32 %ind1, 1
526 %exitcond = icmp eq i64 %inc, %N
527 br i1 %exitcond, label %for.end, label %for.body
529 for.end: ; preds = %for.body