1 ; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
5 %struct.anon = type { [100 x i32], i32, [100 x i32] }
6 %struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
8 @Foo = common global %struct.anon zeroinitializer, align 4
9 @Bar = common global %struct.anon.0 zeroinitializer, align 4
11 @PB = external global i32*
12 @PA = external global i32*
15 ;; === First, the tests that should always vectorize, whether statically or by adding run-time checks ===
18 ; /// Different objects, positive induction, constant distance
19 ; int noAlias01 (int a) {
21 ; for (i=0; i<SIZE; i++)
22 ; Foo.A[i] = Foo.B[i] + a;
25 ; CHECK-LABEL: define i32 @noAlias01(
26 ; CHECK: add nsw <4 x i32>
29 define i32 @noAlias01(i32 %a) nounwind {
31 %a.addr = alloca i32, align 4
32 %i = alloca i32, align 4
33 store i32 %a, i32* %a.addr, align 4
34 store i32 0, i32* %i, align 4
37 for.cond: ; preds = %for.inc, %entry
38 %0 = load i32, i32* %i, align 4
39 %cmp = icmp slt i32 %0, 100
40 br i1 %cmp, label %for.body, label %for.end
42 for.body: ; preds = %for.cond
43 %1 = load i32, i32* %i, align 4
44 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
45 %2 = load i32, i32* %arrayidx, align 4
46 %3 = load i32, i32* %a.addr, align 4
47 %add = add nsw i32 %2, %3
48 %4 = load i32, i32* %i, align 4
49 %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
50 store i32 %add, i32* %arrayidx1, align 4
53 for.inc: ; preds = %for.body
54 %5 = load i32, i32* %i, align 4
55 %inc = add nsw i32 %5, 1
56 store i32 %inc, i32* %i, align 4
59 for.end: ; preds = %for.cond
60 %6 = load i32, i32* %a.addr, align 4
61 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
62 %7 = load i32, i32* %arrayidx2, align 4
66 ; /// Different objects, positive induction with widening slide
67 ; int noAlias02 (int a) {
69 ; for (i=0; i<SIZE-10; i++)
70 ; Foo.A[i] = Foo.B[i+10] + a;
73 ; CHECK-LABEL: define i32 @noAlias02(
74 ; CHECK: add nsw <4 x i32>
77 define i32 @noAlias02(i32 %a) {
79 %a.addr = alloca i32, align 4
80 %i = alloca i32, align 4
81 store i32 %a, i32* %a.addr, align 4
82 store i32 0, i32* %i, align 4
85 for.cond: ; preds = %for.inc, %entry
86 %0 = load i32, i32* %i, align 4
87 %cmp = icmp slt i32 %0, 90
88 br i1 %cmp, label %for.body, label %for.end
90 for.body: ; preds = %for.cond
91 %1 = load i32, i32* %i, align 4
92 %add = add nsw i32 %1, 10
93 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
94 %2 = load i32, i32* %arrayidx, align 4
95 %3 = load i32, i32* %a.addr, align 4
96 %add1 = add nsw i32 %2, %3
97 %4 = load i32, i32* %i, align 4
98 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
99 store i32 %add1, i32* %arrayidx2, align 4
102 for.inc: ; preds = %for.body
103 %5 = load i32, i32* %i, align 4
104 %inc = add nsw i32 %5, 1
105 store i32 %inc, i32* %i, align 4
108 for.end: ; preds = %for.cond
109 %6 = load i32, i32* %a.addr, align 4
110 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
111 %7 = load i32, i32* %arrayidx3, align 4
115 ; /// Different objects, positive induction with shortening slide
116 ; int noAlias03 (int a) {
118 ; for (i=0; i<SIZE; i++)
119 ; Foo.A[i+10] = Foo.B[i] + a;
122 ; CHECK-LABEL: define i32 @noAlias03(
123 ; CHECK: add nsw <4 x i32>
126 define i32 @noAlias03(i32 %a) {
128 %a.addr = alloca i32, align 4
129 %i = alloca i32, align 4
130 store i32 %a, i32* %a.addr, align 4
131 store i32 0, i32* %i, align 4
134 for.cond: ; preds = %for.inc, %entry
135 %0 = load i32, i32* %i, align 4
136 %cmp = icmp slt i32 %0, 100
137 br i1 %cmp, label %for.body, label %for.end
139 for.body: ; preds = %for.cond
140 %1 = load i32, i32* %i, align 4
141 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
142 %2 = load i32, i32* %arrayidx, align 4
143 %3 = load i32, i32* %a.addr, align 4
144 %add = add nsw i32 %2, %3
145 %4 = load i32, i32* %i, align 4
146 %add1 = add nsw i32 %4, 10
147 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
148 store i32 %add, i32* %arrayidx2, align 4
151 for.inc: ; preds = %for.body
152 %5 = load i32, i32* %i, align 4
153 %inc = add nsw i32 %5, 1
154 store i32 %inc, i32* %i, align 4
157 for.end: ; preds = %for.cond
158 %6 = load i32, i32* %a.addr, align 4
159 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
160 %7 = load i32, i32* %arrayidx3, align 4
164 ; /// Pointer access, positive stride, run-time check added
165 ; int noAlias04 (int a) {
167 ; for (i=0; i<SIZE; i++)
168 ; *(PA+i) = *(PB+i) + a;
171 ; CHECK-LABEL: define i32 @noAlias04(
172 ; CHECK-NOT: add nsw <4 x i32>
175 ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
176 ; Check why it's not being vectorized even when forcing vectorization
178 define i32 @noAlias04(i32 %a) #0 {
180 %a.addr = alloca i32, align 4
181 %i = alloca i32, align 4
182 store i32 %a, i32* %a.addr, align 4
183 store i32 0, i32* %i, align 4
186 for.cond: ; preds = %for.inc, %entry
187 %0 = load i32, i32* %i, align 4
188 %cmp = icmp slt i32 %0, 100
189 br i1 %cmp, label %for.body, label %for.end
191 for.body: ; preds = %for.cond
192 %1 = load i32*, i32** @PB, align 4
193 %2 = load i32, i32* %i, align 4
194 %add.ptr = getelementptr inbounds i32, i32* %1, i32 %2
195 %3 = load i32, i32* %add.ptr, align 4
196 %4 = load i32, i32* %a.addr, align 4
197 %add = add nsw i32 %3, %4
198 %5 = load i32*, i32** @PA, align 4
199 %6 = load i32, i32* %i, align 4
200 %add.ptr1 = getelementptr inbounds i32, i32* %5, i32 %6
201 store i32 %add, i32* %add.ptr1, align 4
204 for.inc: ; preds = %for.body
205 %7 = load i32, i32* %i, align 4
206 %inc = add nsw i32 %7, 1
207 store i32 %inc, i32* %i, align 4
210 for.end: ; preds = %for.cond
211 %8 = load i32*, i32** @PA, align 4
212 %9 = load i32, i32* %a.addr, align 4
213 %add.ptr2 = getelementptr inbounds i32, i32* %8, i32 %9
214 %10 = load i32, i32* %add.ptr2, align 4
218 ; /// Different objects, positive induction, multi-array
219 ; int noAlias05 (int a) {
221 ; for (i=0; i<SIZE; i++)
222 ; Bar.A[N][i] = Bar.B[N][i] + a;
223 ; return Bar.A[N][a];
225 ; CHECK-LABEL: define i32 @noAlias05(
226 ; CHECK: add nsw <4 x i32>
229 define i32 @noAlias05(i32 %a) #0 {
231 %a.addr = alloca i32, align 4
232 %i = alloca i32, align 4
233 %N = alloca i32, align 4
234 store i32 %a, i32* %a.addr, align 4
235 store i32 10, i32* %N, align 4
236 store i32 0, i32* %i, align 4
239 for.cond: ; preds = %for.inc, %entry
240 %0 = load i32, i32* %i, align 4
241 %cmp = icmp slt i32 %0, 100
242 br i1 %cmp, label %for.body, label %for.end
244 for.body: ; preds = %for.cond
245 %1 = load i32, i32* %i, align 4
246 %2 = load i32, i32* %N, align 4
247 %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
248 %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
249 %3 = load i32, i32* %arrayidx1, align 4
250 %4 = load i32, i32* %a.addr, align 4
251 %add = add nsw i32 %3, %4
252 %5 = load i32, i32* %i, align 4
253 %6 = load i32, i32* %N, align 4
254 %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
255 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx2, i32 0, i32 %5
256 store i32 %add, i32* %arrayidx3, align 4
259 for.inc: ; preds = %for.body
260 %7 = load i32, i32* %i, align 4
261 %inc = add nsw i32 %7, 1
262 store i32 %inc, i32* %i, align 4
265 for.end: ; preds = %for.cond
266 %8 = load i32, i32* %a.addr, align 4
267 %9 = load i32, i32* %N, align 4
268 %arrayidx4 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
269 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx4, i32 0, i32 %8
270 %10 = load i32, i32* %arrayidx5, align 4
274 ; /// Same objects, positive induction, multi-array, different sub-elements
275 ; int noAlias06 (int a) {
277 ; for (i=0; i<SIZE; i++)
278 ; Bar.A[N][i] = Bar.A[N+1][i] + a;
279 ; return Bar.A[N][a];
281 ; CHECK-LABEL: define i32 @noAlias06(
282 ; CHECK: add nsw <4 x i32>
285 define i32 @noAlias06(i32 %a) #0 {
287 %a.addr = alloca i32, align 4
288 %i = alloca i32, align 4
289 %N = alloca i32, align 4
290 store i32 %a, i32* %a.addr, align 4
291 store i32 10, i32* %N, align 4
292 store i32 0, i32* %i, align 4
295 for.cond: ; preds = %for.inc, %entry
296 %0 = load i32, i32* %i, align 4
297 %cmp = icmp slt i32 %0, 100
298 br i1 %cmp, label %for.body, label %for.end
300 for.body: ; preds = %for.cond
301 %1 = load i32, i32* %i, align 4
302 %2 = load i32, i32* %N, align 4
303 %add = add nsw i32 %2, 1
304 %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
305 %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
306 %3 = load i32, i32* %arrayidx1, align 4
307 %4 = load i32, i32* %a.addr, align 4
308 %add2 = add nsw i32 %3, %4
309 %5 = load i32, i32* %i, align 4
310 %6 = load i32, i32* %N, align 4
311 %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
312 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx3, i32 0, i32 %5
313 store i32 %add2, i32* %arrayidx4, align 4
316 for.inc: ; preds = %for.body
317 %7 = load i32, i32* %i, align 4
318 %inc = add nsw i32 %7, 1
319 store i32 %inc, i32* %i, align 4
322 for.end: ; preds = %for.cond
323 %8 = load i32, i32* %a.addr, align 4
324 %9 = load i32, i32* %N, align 4
325 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
326 %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %8
327 %10 = load i32, i32* %arrayidx6, align 4
331 ; /// Different objects, negative induction, constant distance
332 ; int noAlias07 (int a) {
334 ; for (i=0; i<SIZE; i++)
335 ; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
338 ; CHECK-LABEL: define i32 @noAlias07(
339 ; CHECK: store <4 x i32>
341 define i32 @noAlias07(i32 %a) #0 {
343 %a.addr = alloca i32, align 4
344 %i = alloca i32, align 4
345 store i32 %a, i32* %a.addr, align 4
346 store i32 0, i32* %i, align 4
349 for.cond: ; preds = %for.inc, %entry
350 %0 = load i32, i32* %i, align 4
351 %cmp = icmp slt i32 %0, 100
352 br i1 %cmp, label %for.body, label %for.end
354 for.body: ; preds = %for.cond
355 %1 = load i32, i32* %i, align 4
356 %sub = sub nsw i32 100, %1
357 %sub1 = sub nsw i32 %sub, 1
358 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
359 %2 = load i32, i32* %arrayidx, align 4
360 %3 = load i32, i32* %a.addr, align 4
361 %add = add nsw i32 %2, %3
362 %4 = load i32, i32* %i, align 4
363 %sub2 = sub nsw i32 100, %4
364 %sub3 = sub nsw i32 %sub2, 1
365 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
366 store i32 %add, i32* %arrayidx4, align 4
369 for.inc: ; preds = %for.body
370 %5 = load i32, i32* %i, align 4
371 %inc = add nsw i32 %5, 1
372 store i32 %inc, i32* %i, align 4
375 for.end: ; preds = %for.cond
376 %6 = load i32, i32* %a.addr, align 4
377 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
378 %7 = load i32, i32* %arrayidx5, align 4
382 ; /// Different objects, negative induction, shortening slide
383 ; int noAlias08 (int a) {
385 ; for (i=0; i<SIZE-10; i++)
386 ; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
389 ; CHECK-LABEL: define i32 @noAlias08(
390 ; CHECK: load <4 x i32>
393 define i32 @noAlias08(i32 %a) #0 {
395 %a.addr = alloca i32, align 4
396 %i = alloca i32, align 4
397 store i32 %a, i32* %a.addr, align 4
398 store i32 0, i32* %i, align 4
401 for.cond: ; preds = %for.inc, %entry
402 %0 = load i32, i32* %i, align 4
403 %cmp = icmp slt i32 %0, 90
404 br i1 %cmp, label %for.body, label %for.end
406 for.body: ; preds = %for.cond
407 %1 = load i32, i32* %i, align 4
408 %sub = sub nsw i32 100, %1
409 %sub1 = sub nsw i32 %sub, 10
410 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
411 %2 = load i32, i32* %arrayidx, align 4
412 %3 = load i32, i32* %a.addr, align 4
413 %add = add nsw i32 %2, %3
414 %4 = load i32, i32* %i, align 4
415 %sub2 = sub nsw i32 100, %4
416 %sub3 = sub nsw i32 %sub2, 1
417 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
418 store i32 %add, i32* %arrayidx4, align 4
421 for.inc: ; preds = %for.body
422 %5 = load i32, i32* %i, align 4
423 %inc = add nsw i32 %5, 1
424 store i32 %inc, i32* %i, align 4
427 for.end: ; preds = %for.cond
428 %6 = load i32, i32* %a.addr, align 4
429 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
430 %7 = load i32, i32* %arrayidx5, align 4
434 ; /// Different objects, negative induction, widening slide
435 ; int noAlias09 (int a) {
437 ; for (i=0; i<SIZE; i++)
438 ; Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
441 ; CHECK-LABEL: define i32 @noAlias09(
442 ; CHECK: load <4 x i32>
445 define i32 @noAlias09(i32 %a) #0 {
447 %a.addr = alloca i32, align 4
448 %i = alloca i32, align 4
449 store i32 %a, i32* %a.addr, align 4
450 store i32 0, i32* %i, align 4
453 for.cond: ; preds = %for.inc, %entry
454 %0 = load i32, i32* %i, align 4
455 %cmp = icmp slt i32 %0, 100
456 br i1 %cmp, label %for.body, label %for.end
458 for.body: ; preds = %for.cond
459 %1 = load i32, i32* %i, align 4
460 %sub = sub nsw i32 100, %1
461 %sub1 = sub nsw i32 %sub, 1
462 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
463 %2 = load i32, i32* %arrayidx, align 4
464 %3 = load i32, i32* %a.addr, align 4
465 %add = add nsw i32 %2, %3
466 %4 = load i32, i32* %i, align 4
467 %sub2 = sub nsw i32 100, %4
468 %sub3 = sub nsw i32 %sub2, 10
469 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
470 store i32 %add, i32* %arrayidx4, align 4
473 for.inc: ; preds = %for.body
474 %5 = load i32, i32* %i, align 4
475 %inc = add nsw i32 %5, 1
476 store i32 %inc, i32* %i, align 4
479 for.end: ; preds = %for.cond
480 %6 = load i32, i32* %a.addr, align 4
481 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
482 %7 = load i32, i32* %arrayidx5, align 4
486 ; /// Pointer access, negative stride, run-time check added
487 ; int noAlias10 (int a) {
489 ; for (i=0; i<SIZE; i++)
490 ; *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
493 ; CHECK-LABEL: define i32 @noAlias10(
494 ; CHECK-NOT: sub {{.*}} <4 x i32>
497 ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
498 ; Check why it's not being vectorized even when forcing vectorization
500 define i32 @noAlias10(i32 %a) #0 {
502 %a.addr = alloca i32, align 4
503 %i = alloca i32, align 4
504 store i32 %a, i32* %a.addr, align 4
505 store i32 0, i32* %i, align 4
508 for.cond: ; preds = %for.inc, %entry
509 %0 = load i32, i32* %i, align 4
510 %cmp = icmp slt i32 %0, 100
511 br i1 %cmp, label %for.body, label %for.end
513 for.body: ; preds = %for.cond
514 %1 = load i32*, i32** @PB, align 4
515 %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
516 %2 = load i32, i32* %i, align 4
517 %idx.neg = sub i32 0, %2
518 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
519 %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
520 %3 = load i32, i32* %add.ptr2, align 4
521 %4 = load i32, i32* %a.addr, align 4
522 %add = add nsw i32 %3, %4
523 %5 = load i32*, i32** @PA, align 4
524 %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 100
525 %6 = load i32, i32* %i, align 4
526 %idx.neg4 = sub i32 0, %6
527 %add.ptr5 = getelementptr inbounds i32, i32* %add.ptr3, i32 %idx.neg4
528 %add.ptr6 = getelementptr inbounds i32, i32* %add.ptr5, i32 -1
529 store i32 %add, i32* %add.ptr6, align 4
532 for.inc: ; preds = %for.body
533 %7 = load i32, i32* %i, align 4
534 %inc = add nsw i32 %7, 1
535 store i32 %inc, i32* %i, align 4
538 for.end: ; preds = %for.cond
539 %8 = load i32*, i32** @PA, align 4
540 %9 = load i32, i32* %a.addr, align 4
541 %add.ptr7 = getelementptr inbounds i32, i32* %8, i32 %9
542 %10 = load i32, i32* %add.ptr7, align 4
546 ; /// Different objects, negative induction, multi-array
547 ; int noAlias11 (int a) {
549 ; for (i=0; i<SIZE; i++)
550 ; Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
551 ; return Bar.A[N][a];
553 ; CHECK-LABEL: define i32 @noAlias11(
554 ; CHECK: store <4 x i32>
557 define i32 @noAlias11(i32 %a) #0 {
559 %a.addr = alloca i32, align 4
560 %i = alloca i32, align 4
561 %N = alloca i32, align 4
562 store i32 %a, i32* %a.addr, align 4
563 store i32 10, i32* %N, align 4
564 store i32 0, i32* %i, align 4
567 for.cond: ; preds = %for.inc, %entry
568 %0 = load i32, i32* %i, align 4
569 %cmp = icmp slt i32 %0, 100
570 br i1 %cmp, label %for.body, label %for.end
572 for.body: ; preds = %for.cond
573 %1 = load i32, i32* %i, align 4
574 %sub = sub nsw i32 100, %1
575 %sub1 = sub nsw i32 %sub, 1
576 %2 = load i32, i32* %N, align 4
577 %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
578 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
579 %3 = load i32, i32* %arrayidx2, align 4
580 %4 = load i32, i32* %a.addr, align 4
581 %add = add nsw i32 %3, %4
582 %5 = load i32, i32* %i, align 4
583 %sub3 = sub nsw i32 100, %5
584 %sub4 = sub nsw i32 %sub3, 1
585 %6 = load i32, i32* %N, align 4
586 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
587 %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %sub4
588 store i32 %add, i32* %arrayidx6, align 4
591 for.inc: ; preds = %for.body
592 %7 = load i32, i32* %i, align 4
593 %inc = add nsw i32 %7, 1
594 store i32 %inc, i32* %i, align 4
597 for.end: ; preds = %for.cond
598 %8 = load i32, i32* %a.addr, align 4
599 %9 = load i32, i32* %N, align 4
600 %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
601 %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx7, i32 0, i32 %8
602 %10 = load i32, i32* %arrayidx8, align 4
606 ; /// Same objects, negative induction, multi-array, different sub-elements
607 ; int noAlias12 (int a) {
609 ; for (i=0; i<SIZE; i++)
610 ; Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
611 ; return Bar.A[N][a];
613 ; CHECK-LABEL: define i32 @noAlias12(
614 ; CHECK: store <4 x i32>
617 define i32 @noAlias12(i32 %a) #0 {
619 %a.addr = alloca i32, align 4
620 %i = alloca i32, align 4
621 %N = alloca i32, align 4
622 store i32 %a, i32* %a.addr, align 4
623 store i32 10, i32* %N, align 4
624 store i32 0, i32* %i, align 4
627 for.cond: ; preds = %for.inc, %entry
628 %0 = load i32, i32* %i, align 4
629 %cmp = icmp slt i32 %0, 100
630 br i1 %cmp, label %for.body, label %for.end
632 for.body: ; preds = %for.cond
633 %1 = load i32, i32* %i, align 4
634 %sub = sub nsw i32 100, %1
635 %sub1 = sub nsw i32 %sub, 1
636 %2 = load i32, i32* %N, align 4
637 %add = add nsw i32 %2, 1
638 %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
639 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
640 %3 = load i32, i32* %arrayidx2, align 4
641 %4 = load i32, i32* %a.addr, align 4
642 %add3 = add nsw i32 %3, %4
643 %5 = load i32, i32* %i, align 4
644 %sub4 = sub nsw i32 100, %5
645 %sub5 = sub nsw i32 %sub4, 1
646 %6 = load i32, i32* %N, align 4
647 %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
648 %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx6, i32 0, i32 %sub5
649 store i32 %add3, i32* %arrayidx7, align 4
652 for.inc: ; preds = %for.body
653 %7 = load i32, i32* %i, align 4
654 %inc = add nsw i32 %7, 1
655 store i32 %inc, i32* %i, align 4
658 for.end: ; preds = %for.cond
659 %8 = load i32, i32* %a.addr, align 4
660 %9 = load i32, i32* %N, align 4
661 %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
662 %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx8, i32 0, i32 %8
663 %10 = load i32, i32* %arrayidx9, align 4
667 ; /// Same objects, positive induction, constant distance, just enough for vector size
668 ; int noAlias13 (int a) {
670 ; for (i=0; i<SIZE; i++)
671 ; Foo.A[i] = Foo.A[i+4] + a;
674 ; CHECK-LABEL: define i32 @noAlias13(
675 ; CHECK: add nsw <4 x i32>
678 define i32 @noAlias13(i32 %a) #0 {
680 %a.addr = alloca i32, align 4
681 %i = alloca i32, align 4
682 store i32 %a, i32* %a.addr, align 4
683 store i32 0, i32* %i, align 4
686 for.cond: ; preds = %for.inc, %entry
687 %0 = load i32, i32* %i, align 4
688 %cmp = icmp slt i32 %0, 100
689 br i1 %cmp, label %for.body, label %for.end
691 for.body: ; preds = %for.cond
692 %1 = load i32, i32* %i, align 4
693 %add = add nsw i32 %1, 4
694 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
695 %2 = load i32, i32* %arrayidx, align 4
696 %3 = load i32, i32* %a.addr, align 4
697 %add1 = add nsw i32 %2, %3
698 %4 = load i32, i32* %i, align 4
699 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
700 store i32 %add1, i32* %arrayidx2, align 4
703 for.inc: ; preds = %for.body
704 %5 = load i32, i32* %i, align 4
705 %inc = add nsw i32 %5, 1
706 store i32 %inc, i32* %i, align 4
709 for.end: ; preds = %for.cond
710 %6 = load i32, i32* %a.addr, align 4
711 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
712 %7 = load i32, i32* %arrayidx3, align 4
716 ; /// Same objects, negative induction, constant distance, just enough for vector size
717 ; int noAlias14 (int a) {
719 ; for (i=0; i<SIZE; i++)
720 ; Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
723 ; CHECK-LABEL: define i32 @noAlias14(
724 ; CHECK: load <4 x i32>
727 define i32 @noAlias14(i32 %a) #0 {
729 %a.addr = alloca i32, align 4
730 %i = alloca i32, align 4
731 store i32 %a, i32* %a.addr, align 4
732 store i32 0, i32* %i, align 4
735 for.cond: ; preds = %for.inc, %entry
736 %0 = load i32, i32* %i, align 4
737 %cmp = icmp slt i32 %0, 100
738 br i1 %cmp, label %for.body, label %for.end
740 for.body: ; preds = %for.cond
741 %1 = load i32, i32* %i, align 4
742 %sub = sub nsw i32 100, %1
743 %sub1 = sub nsw i32 %sub, 5
744 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
745 %2 = load i32, i32* %arrayidx, align 4
746 %3 = load i32, i32* %a.addr, align 4
747 %add = add nsw i32 %2, %3
748 %4 = load i32, i32* %i, align 4
749 %sub2 = sub nsw i32 100, %4
750 %sub3 = sub nsw i32 %sub2, 1
751 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
752 store i32 %add, i32* %arrayidx4, align 4
755 for.inc: ; preds = %for.body
756 %5 = load i32, i32* %i, align 4
757 %inc = add nsw i32 %5, 1
758 store i32 %inc, i32* %i, align 4
761 for.end: ; preds = %for.cond
762 %6 = load i32, i32* %a.addr, align 4
763 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
764 %7 = load i32, i32* %arrayidx5, align 4
769 ;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
772 ; /// Different objects, swapped induction, alias at the end
773 ; int mayAlias01 (int a) {
775 ; for (i=0; i<SIZE; i++)
776 ; Foo.A[i] = Foo.B[SIZE-i-1] + a;
779 ; CHECK-LABEL: define i32 @mayAlias01(
780 ; CHECK-NOT: add nsw <4 x i32>
783 define i32 @mayAlias01(i32 %a) nounwind {
785 %a.addr = alloca i32, align 4
786 %i = alloca i32, align 4
787 store i32 %a, i32* %a.addr, align 4
788 store i32 0, i32* %i, align 4
791 for.cond: ; preds = %for.inc, %entry
792 %0 = load i32, i32* %i, align 4
793 %cmp = icmp slt i32 %0, 100
794 br i1 %cmp, label %for.body, label %for.end
796 for.body: ; preds = %for.cond
797 %1 = load i32, i32* %i, align 4
798 %sub = sub nsw i32 100, %1
799 %sub1 = sub nsw i32 %sub, 1
800 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
801 %2 = load i32, i32* %arrayidx, align 4
802 %3 = load i32, i32* %a.addr, align 4
803 %add = add nsw i32 %2, %3
804 %4 = load i32, i32* %i, align 4
805 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
806 store i32 %add, i32* %arrayidx2, align 4
809 for.inc: ; preds = %for.body
810 %5 = load i32, i32* %i, align 4
811 %inc = add nsw i32 %5, 1
812 store i32 %inc, i32* %i, align 4
815 for.end: ; preds = %for.cond
816 %6 = load i32, i32* %a.addr, align 4
817 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
818 %7 = load i32, i32* %arrayidx3, align 4
822 ; /// Different objects, swapped induction, alias at the beginning
823 ; int mayAlias02 (int a) {
825 ; for (i=0; i<SIZE; i++)
826 ; Foo.A[SIZE-i-1] = Foo.B[i] + a;
829 ; CHECK-LABEL: define i32 @mayAlias02(
830 ; CHECK-NOT: add nsw <4 x i32>
833 define i32 @mayAlias02(i32 %a) nounwind {
835 %a.addr = alloca i32, align 4
836 %i = alloca i32, align 4
837 store i32 %a, i32* %a.addr, align 4
838 store i32 0, i32* %i, align 4
841 for.cond: ; preds = %for.inc, %entry
842 %0 = load i32, i32* %i, align 4
843 %cmp = icmp slt i32 %0, 100
844 br i1 %cmp, label %for.body, label %for.end
846 for.body: ; preds = %for.cond
847 %1 = load i32, i32* %i, align 4
848 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
849 %2 = load i32, i32* %arrayidx, align 4
850 %3 = load i32, i32* %a.addr, align 4
851 %add = add nsw i32 %2, %3
852 %4 = load i32, i32* %i, align 4
853 %sub = sub nsw i32 100, %4
854 %sub1 = sub nsw i32 %sub, 1
855 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
856 store i32 %add, i32* %arrayidx2, align 4
859 for.inc: ; preds = %for.body
860 %5 = load i32, i32* %i, align 4
861 %inc = add nsw i32 %5, 1
862 store i32 %inc, i32* %i, align 4
865 for.end: ; preds = %for.cond
866 %6 = load i32, i32* %a.addr, align 4
867 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
868 %7 = load i32, i32* %arrayidx3, align 4
872 ; /// Pointer access, run-time check added
873 ; int mayAlias03 (int a) {
875 ; for (i=0; i<SIZE; i++)
876 ; *(PA+i) = *(PB+SIZE-i-1) + a;
879 ; CHECK-LABEL: define i32 @mayAlias03(
880 ; CHECK-NOT: add nsw <4 x i32>
883 define i32 @mayAlias03(i32 %a) nounwind {
885 %a.addr = alloca i32, align 4
886 %i = alloca i32, align 4
887 store i32 %a, i32* %a.addr, align 4
888 store i32 0, i32* %i, align 4
891 for.cond: ; preds = %for.inc, %entry
892 %0 = load i32, i32* %i, align 4
893 %cmp = icmp slt i32 %0, 100
894 br i1 %cmp, label %for.body, label %for.end
896 for.body: ; preds = %for.cond
897 %1 = load i32*, i32** @PB, align 4
898 %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
899 %2 = load i32, i32* %i, align 4
900 %idx.neg = sub i32 0, %2
901 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
902 %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
903 %3 = load i32, i32* %add.ptr2, align 4
904 %4 = load i32, i32* %a.addr, align 4
905 %add = add nsw i32 %3, %4
906 %5 = load i32*, i32** @PA, align 4
907 %6 = load i32, i32* %i, align 4
908 %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 %6
909 store i32 %add, i32* %add.ptr3, align 4
912 for.inc: ; preds = %for.body
913 %7 = load i32, i32* %i, align 4
914 %inc = add nsw i32 %7, 1
915 store i32 %inc, i32* %i, align 4
918 for.end: ; preds = %for.cond
919 %8 = load i32*, i32** @PA, align 4
920 %9 = load i32, i32* %a.addr, align 4
921 %add.ptr4 = getelementptr inbounds i32, i32* %8, i32 %9
922 %10 = load i32, i32* %add.ptr4, align 4
927 ;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
930 ; int mustAlias01 (int a) {
932 ; for (i=0; i<SIZE; i++)
933 ; Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
936 ; CHECK-LABEL: define i32 @mustAlias01(
937 ; CHECK-NOT: add nsw <4 x i32>
940 define i32 @mustAlias01(i32 %a) nounwind {
942 %a.addr = alloca i32, align 4
943 %i = alloca i32, align 4
944 store i32 %a, i32* %a.addr, align 4
945 store i32 0, i32* %i, align 4
948 for.cond: ; preds = %for.inc, %entry
949 %0 = load i32, i32* %i, align 4
950 %cmp = icmp slt i32 %0, 100
951 br i1 %cmp, label %for.body, label %for.end
953 for.body: ; preds = %for.cond
954 %1 = load i32, i32* %i, align 4
955 %sub = sub nsw i32 100, %1
956 %sub1 = sub nsw i32 %sub, 1
957 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
958 %2 = load i32, i32* %arrayidx, align 4
959 %3 = load i32, i32* %a.addr, align 4
960 %add = add nsw i32 %2, %3
961 %4 = load i32, i32* %i, align 4
962 %add2 = add nsw i32 %4, 10
963 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
964 store i32 %add, i32* %arrayidx3, align 4
967 for.inc: ; preds = %for.body
968 %5 = load i32, i32* %i, align 4
969 %inc = add nsw i32 %5, 1
970 store i32 %inc, i32* %i, align 4
973 for.end: ; preds = %for.cond
974 %6 = load i32, i32* %a.addr, align 4
975 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
976 %7 = load i32, i32* %arrayidx4, align 4
980 ; int mustAlias02 (int a) {
982 ; for (i=0; i<SIZE; i++)
983 ; Foo.A[i] = Foo.B[SIZE-i-10] + a;
986 ; CHECK-LABEL: define i32 @mustAlias02(
987 ; CHECK-NOT: add nsw <4 x i32>
990 define i32 @mustAlias02(i32 %a) nounwind {
992 %a.addr = alloca i32, align 4
993 %i = alloca i32, align 4
994 store i32 %a, i32* %a.addr, align 4
995 store i32 0, i32* %i, align 4
998 for.cond: ; preds = %for.inc, %entry
999 %0 = load i32, i32* %i, align 4
1000 %cmp = icmp slt i32 %0, 100
1001 br i1 %cmp, label %for.body, label %for.end
1003 for.body: ; preds = %for.cond
1004 %1 = load i32, i32* %i, align 4
1005 %sub = sub nsw i32 100, %1
1006 %sub1 = sub nsw i32 %sub, 10
1007 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1008 %2 = load i32, i32* %arrayidx, align 4
1009 %3 = load i32, i32* %a.addr, align 4
1010 %add = add nsw i32 %2, %3
1011 %4 = load i32, i32* %i, align 4
1012 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
1013 store i32 %add, i32* %arrayidx2, align 4
1016 for.inc: ; preds = %for.body
1017 %5 = load i32, i32* %i, align 4
1018 %inc = add nsw i32 %5, 1
1019 store i32 %inc, i32* %i, align 4
1022 for.end: ; preds = %for.cond
1023 %6 = load i32, i32* %a.addr, align 4
1024 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1025 %7 = load i32, i32* %arrayidx3, align 4
1029 ; int mustAlias03 (int a) {
1031 ; for (i=0; i<SIZE; i++)
1032 ; Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
1035 ; CHECK-LABEL: define i32 @mustAlias03(
1036 ; CHECK-NOT: add nsw <4 x i32>
1039 define i32 @mustAlias03(i32 %a) nounwind {
1041 %a.addr = alloca i32, align 4
1042 %i = alloca i32, align 4
1043 store i32 %a, i32* %a.addr, align 4
1044 store i32 0, i32* %i, align 4
1047 for.cond: ; preds = %for.inc, %entry
1048 %0 = load i32, i32* %i, align 4
1049 %cmp = icmp slt i32 %0, 100
1050 br i1 %cmp, label %for.body, label %for.end
1052 for.body: ; preds = %for.cond
1053 %1 = load i32, i32* %i, align 4
1054 %sub = sub nsw i32 100, %1
1055 %sub1 = sub nsw i32 %sub, 10
1056 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1057 %2 = load i32, i32* %arrayidx, align 4
1058 %3 = load i32, i32* %a.addr, align 4
1059 %add = add nsw i32 %2, %3
1060 %4 = load i32, i32* %i, align 4
1061 %add2 = add nsw i32 %4, 10
1062 %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
1063 store i32 %add, i32* %arrayidx3, align 4
1066 for.inc: ; preds = %for.body
1067 %5 = load i32, i32* %i, align 4
1068 %inc = add nsw i32 %5, 1
1069 store i32 %inc, i32* %i, align 4
1072 for.end: ; preds = %for.cond
1073 %6 = load i32, i32* %a.addr, align 4
1074 %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1075 %7 = load i32, i32* %arrayidx4, align 4