1 ; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5 ; Following cases are no dependence.
7 ; void nodep_Read_Write(int *A) {
9 ; for (unsigned i = 0; i < 1024; i+=3)
13 ; CHECK: function 'nodep_Read_Write':
14 ; CHECK-NEXT: for.body:
15 ; CHECK-NEXT: Memory dependences are safe
16 ; CHECK-NEXT: Dependences:
17 ; CHECK-NEXT: Run-time memory checks:
19 define void @nodep_Read_Write(i32* nocapture %A) {
21 %add.ptr = getelementptr inbounds i32, i32* %A, i64 1
24 for.cond.cleanup: ; preds = %for.body
27 for.body: ; preds = %entry, %for.body
28 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
29 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
30 %0 = load i32, i32* %arrayidx, align 4
31 %add = add nsw i32 %0, 1
32 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
33 store i32 %add, i32* %arrayidx2, align 4
34 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
35 %cmp = icmp ult i64 %indvars.iv.next, 1024
36 br i1 %cmp, label %for.body, label %for.cond.cleanup
39 ; int nodep_Write_Read(int *A) {
41 ; for (unsigned i = 0; i < 1024; i+=4) {
49 ; CHECK: function 'nodep_Write_Read':
50 ; CHECK-NEXT: for.body:
51 ; CHECK-NEXT: Memory dependences are safe
52 ; CHECK-NEXT: Dependences:
53 ; CHECK-NEXT: Run-time memory checks:
55 define i32 @nodep_Write_Read(i32* nocapture %A) {
59 for.cond.cleanup: ; preds = %for.body
62 for.body: ; preds = %entry, %for.body
63 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
64 %sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
65 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
66 %0 = trunc i64 %indvars.iv to i32
67 store i32 %0, i32* %arrayidx, align 4
68 %1 = or i64 %indvars.iv, 3
69 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %1
70 %2 = load i32, i32* %arrayidx2, align 4
71 %add3 = add nsw i32 %2, %sum.013
72 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
73 %cmp = icmp ult i64 %indvars.iv.next, 1024
74 br i1 %cmp, label %for.body, label %for.cond.cleanup
77 ; void nodep_Write_Write(int *A) {
78 ; for (unsigned i = 0; i < 1024; i+=2) {
84 ; CHECK: function 'nodep_Write_Write':
85 ; CHECK-NEXT: for.body:
86 ; CHECK-NEXT: Memory dependences are safe
87 ; CHECK-NEXT: Dependences:
88 ; CHECK-NEXT: Run-time memory checks:
90 define void @nodep_Write_Write(i32* nocapture %A) {
94 for.cond.cleanup: ; preds = %for.body
97 for.body: ; preds = %entry, %for.body
98 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
99 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
100 %0 = trunc i64 %indvars.iv to i32
101 store i32 %0, i32* %arrayidx, align 4
102 %1 = or i64 %indvars.iv, 1
103 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %1
104 %2 = trunc i64 %1 to i32
105 store i32 %2, i32* %arrayidx3, align 4
106 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
107 %cmp = icmp ult i64 %indvars.iv.next, 1024
108 br i1 %cmp, label %for.body, label %for.cond.cleanup
111 ; Following cases are unsafe depdences and are not vectorizable.
113 ; void unsafe_Read_Write(int *A) {
114 ; for (unsigned i = 0; i < 1024; i+=3)
118 ; CHECK: function 'unsafe_Read_Write':
119 ; CHECK-NEXT: for.body:
120 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
121 ; CHECK-NEXT: Dependences:
122 ; CHECK-NEXT: Backward:
123 ; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
124 ; CHECK-NEXT: store i32 %add, i32* %arrayidx3, align 4
126 define void @unsafe_Read_Write(i32* nocapture %A) {
130 for.cond.cleanup: ; preds = %for.body
133 for.body: ; preds = %entry, %for.body
134 %i.010 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
135 %idxprom = zext i32 %i.010 to i64
136 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
137 %0 = load i32, i32* %arrayidx, align 4
138 %add = add nsw i32 %0, 1
139 %add1 = add i32 %i.010, 3
140 %idxprom2 = zext i32 %add1 to i64
141 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
142 store i32 %add, i32* %arrayidx3, align 4
143 %cmp = icmp ult i32 %add1, 1024
144 br i1 %cmp, label %for.body, label %for.cond.cleanup
147 ; int unsafe_Write_Read(int *A) {
149 ; for (unsigned i = 0; i < 1024; i+=4) {
157 ; CHECK: function 'unsafe_Write_Read':
158 ; CHECK-NEXT: for.body:
159 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
160 ; CHECK-NEXT: Dependences:
161 ; CHECK-NEXT: Backward:
162 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
163 ; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
165 define i32 @unsafe_Write_Read(i32* nocapture %A) {
169 for.cond.cleanup: ; preds = %for.body
172 for.body: ; preds = %entry, %for.body
173 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
174 %sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
175 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
176 %0 = trunc i64 %indvars.iv to i32
177 store i32 %0, i32* %arrayidx, align 4
178 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
179 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
180 %1 = load i32, i32* %arrayidx2, align 4
181 %add3 = add nsw i32 %1, %sum.013
182 %cmp = icmp ult i64 %indvars.iv.next, 1024
183 br i1 %cmp, label %for.body, label %for.cond.cleanup
186 ; void unsafe_Write_Write(int *A) {
187 ; for (unsigned i = 0; i < 1024; i+=2) {
193 ; CHECK: function 'unsafe_Write_Write':
194 ; CHECK-NEXT: for.body:
195 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
196 ; CHECK-NEXT: Dependences:
197 ; CHECK-NEXT: Backward:
198 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
199 ; CHECK-NEXT: store i32 %2, i32* %arrayidx3, align 4
201 define void @unsafe_Write_Write(i32* nocapture %A) {
205 for.cond.cleanup: ; preds = %for.body
208 for.body: ; preds = %entry, %for.body
209 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
210 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
211 %0 = trunc i64 %indvars.iv to i32
212 store i32 %0, i32* %arrayidx, align 4
213 %1 = or i64 %indvars.iv, 1
214 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
215 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
216 %2 = trunc i64 %1 to i32
217 store i32 %2, i32* %arrayidx3, align 4
218 %cmp = icmp ult i64 %indvars.iv.next, 1024
219 br i1 %cmp, label %for.body, label %for.cond.cleanup
222 ; Following cases check that strided accesses can be vectorized.
224 ; void vectorizable_Read_Write(int *A) {
226 ; for (unsigned i = 0; i < 1024; i+=2)
230 ; CHECK: function 'vectorizable_Read_Write':
231 ; CHECK-NEXT: for.body:
232 ; CHECK-NEXT: Memory dependences are safe
233 ; CHECK-NEXT: Dependences:
234 ; CHECK-NEXT: BackwardVectorizable:
235 ; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
236 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
238 define void @vectorizable_Read_Write(i32* nocapture %A) {
240 %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
243 for.cond.cleanup: ; preds = %for.body
246 for.body: ; preds = %entry, %for.body
247 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
248 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
249 %0 = load i32, i32* %arrayidx, align 4
250 %add = add nsw i32 %0, 1
251 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
252 store i32 %add, i32* %arrayidx2, align 4
253 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
254 %cmp = icmp ult i64 %indvars.iv.next, 1024
255 br i1 %cmp, label %for.body, label %for.cond.cleanup
258 ; int vectorizable_Write_Read(int *A) {
261 ; for (unsigned i = 0; i < 1024; i+=2) {
269 ; CHECK: function 'vectorizable_Write_Read':
270 ; CHECK-NEXT: for.body:
271 ; CHECK-NEXT: Memory dependences are safe
272 ; CHECK-NEXT: Dependences:
273 ; CHECK-NEXT: BackwardVectorizable:
274 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
275 ; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
277 define i32 @vectorizable_Write_Read(i32* nocapture %A) {
279 %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
282 for.cond.cleanup: ; preds = %for.body
285 for.body: ; preds = %entry, %for.body
286 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
287 %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
288 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
289 %0 = trunc i64 %indvars.iv to i32
290 store i32 %0, i32* %arrayidx, align 4
291 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
292 %1 = load i32, i32* %arrayidx2, align 4
293 %add = add nsw i32 %1, %sum.013
294 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
295 %cmp = icmp ult i64 %indvars.iv.next, 1024
296 br i1 %cmp, label %for.body, label %for.cond.cleanup
299 ; void vectorizable_Write_Write(int *A) {
301 ; for (unsigned i = 0; i < 1024; i+=2) {
307 ; CHECK: function 'vectorizable_Write_Write':
308 ; CHECK-NEXT: for.body:
309 ; CHECK-NEXT: Memory dependences are safe
310 ; CHECK-NEXT: Dependences:
311 ; CHECK-NEXT: BackwardVectorizable:
312 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
313 ; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4
315 define void @vectorizable_Write_Write(i32* nocapture %A) {
317 %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
320 for.cond.cleanup: ; preds = %for.body
323 for.body: ; preds = %entry, %for.body
324 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
325 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
326 %0 = trunc i64 %indvars.iv to i32
327 store i32 %0, i32* %arrayidx, align 4
328 %1 = or i64 %indvars.iv, 1
329 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
330 %2 = trunc i64 %1 to i32
331 store i32 %2, i32* %arrayidx2, align 4
332 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
333 %cmp = icmp ult i64 %indvars.iv.next, 1024
334 br i1 %cmp, label %for.body, label %for.cond.cleanup
337 ; void vectorizable_unscaled_Read_Write(int *A) {
338 ; int *B = (int *)((char *)A + 14);
339 ; for (unsigned i = 0; i < 1024; i+=2)
343 ; FIXME: This case looks like previous case @vectorizable_Read_Write. It sould
346 ; CHECK: function 'vectorizable_unscaled_Read_Write':
347 ; CHECK-NEXT: for.body:
348 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
349 ; CHECK-NEXT: Dependences:
350 ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
351 ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
352 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
354 define void @vectorizable_unscaled_Read_Write(i32* nocapture %A) {
356 %0 = bitcast i32* %A to i8*
357 %add.ptr = getelementptr inbounds i8, i8* %0, i64 14
358 %1 = bitcast i8* %add.ptr to i32*
361 for.cond.cleanup: ; preds = %for.body
364 for.body: ; preds = %entry, %for.body
365 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
366 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
367 %2 = load i32, i32* %arrayidx, align 4
368 %add = add nsw i32 %2, 1
369 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
370 store i32 %add, i32* %arrayidx2, align 4
371 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
372 %cmp = icmp ult i64 %indvars.iv.next, 1024
373 br i1 %cmp, label %for.body, label %for.cond.cleanup
376 ; int vectorizable_unscaled_Write_Read(int *A) {
377 ; int *B = (int *)((char *)A + 17);
379 ; for (unsigned i = 0; i < 1024; i+=2) {
387 ; CHECK: function 'vectorizable_unscaled_Write_Read':
388 ; CHECK-NEXT: for.body:
389 ; CHECK-NEXT: Memory dependences are safe
390 ; CHECK-NEXT: Dependences:
391 ; CHECK-NEXT: BackwardVectorizable:
392 ; CHECK-NEXT: store i32 %2, i32* %arrayidx, align 4 ->
393 ; CHECK-NEXT: %3 = load i32, i32* %arrayidx2, align 4
395 define i32 @vectorizable_unscaled_Write_Read(i32* nocapture %A) {
397 %0 = bitcast i32* %A to i8*
398 %add.ptr = getelementptr inbounds i8, i8* %0, i64 17
399 %1 = bitcast i8* %add.ptr to i32*
402 for.cond.cleanup: ; preds = %for.body
405 for.body: ; preds = %entry, %for.body
406 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
407 %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
408 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
409 %2 = trunc i64 %indvars.iv to i32
410 store i32 %2, i32* %arrayidx, align 4
411 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
412 %3 = load i32, i32* %arrayidx2, align 4
413 %add = add nsw i32 %3, %sum.013
414 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
415 %cmp = icmp ult i64 %indvars.iv.next, 1024
416 br i1 %cmp, label %for.body, label %for.cond.cleanup
419 ; void unsafe_unscaled_Read_Write(int *A) {
420 ; int *B = (int *)((char *)A + 11);
421 ; for (unsigned i = 0; i < 1024; i+=2)
425 ; CHECK: function 'unsafe_unscaled_Read_Write':
426 ; CHECK-NEXT: for.body:
427 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
428 ; CHECK-NEXT: Dependences:
429 ; CHECK-NEXT: Backward:
430 ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
431 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
433 define void @unsafe_unscaled_Read_Write(i32* nocapture %A) {
435 %0 = bitcast i32* %A to i8*
436 %add.ptr = getelementptr inbounds i8, i8* %0, i64 11
437 %1 = bitcast i8* %add.ptr to i32*
440 for.cond.cleanup: ; preds = %for.body
443 for.body: ; preds = %entry, %for.body
444 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
445 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
446 %2 = load i32, i32* %arrayidx, align 4
447 %add = add nsw i32 %2, 1
448 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
449 store i32 %add, i32* %arrayidx2, align 4
450 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
451 %cmp = icmp ult i64 %indvars.iv.next, 1024
452 br i1 %cmp, label %for.body, label %for.cond.cleanup
455 ; CHECK: function 'unsafe_unscaled_Read_Write2':
456 ; CHECK-NEXT: for.body:
457 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
458 ; CHECK-NEXT: Dependences:
459 ; CHECK-NEXT: Backward:
460 ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
461 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
463 ; void unsafe_unscaled_Read_Write2(int *A) {
464 ; int *B = (int *)((char *)A + 1);
465 ; for (unsigned i = 0; i < 1024; i+=2)
469 define void @unsafe_unscaled_Read_Write2(i32* nocapture %A) {
471 %0 = bitcast i32* %A to i8*
472 %add.ptr = getelementptr inbounds i8, i8* %0, i64 1
473 %1 = bitcast i8* %add.ptr to i32*
476 for.cond.cleanup: ; preds = %for.body
479 for.body: ; preds = %entry, %for.body
480 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
481 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
482 %2 = load i32, i32* %arrayidx, align 4
483 %add = add nsw i32 %2, 1
484 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
485 store i32 %add, i32* %arrayidx2, align 4
486 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
487 %cmp = icmp ult i64 %indvars.iv.next, 1024
488 br i1 %cmp, label %for.body, label %for.cond.cleanup
491 ; Following case checks that interleaved stores have dependences with another
492 ; store and can not pass dependence check.
494 ; void interleaved_stores(int *A) {
495 ; int *B = (int *) ((char *)A + 1);
496 ; for(int i = 0; i < 1024; i+=2) {
498 ; A[i+1] = i + 1; // (2)
499 ; B[i+1] = i + 1; // (3)
503 ; The access (2) has overlaps with (1) and (3).
505 ; CHECK: function 'interleaved_stores':
506 ; CHECK-NEXT: for.body:
507 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
508 ; CHECK-NEXT: Dependences:
509 ; CHECK-NEXT: Backward:
510 ; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4 ->
511 ; CHECK-NEXT: store i32 %4, i32* %arrayidx9, align 4
513 ; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4 ->
514 ; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4
516 define void @interleaved_stores(i32* nocapture %A) {
518 %0 = bitcast i32* %A to i8*
519 %incdec.ptr = getelementptr inbounds i8, i8* %0, i64 1
520 %1 = bitcast i8* %incdec.ptr to i32*
523 for.cond.cleanup: ; preds = %for.body
526 for.body: ; preds = %entry, %for.body
527 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
528 %2 = trunc i64 %indvars.iv to i32
529 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
530 store i32 %2, i32* %arrayidx2, align 4
531 %3 = or i64 %indvars.iv, 1
532 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %3
533 %4 = trunc i64 %3 to i32
534 store i32 %4, i32* %arrayidx5, align 4
535 %arrayidx9 = getelementptr inbounds i32, i32* %1, i64 %3
536 store i32 %4, i32* %arrayidx9, align 4
537 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
538 %cmp = icmp slt i64 %indvars.iv.next, 1024
539 br i1 %cmp, label %for.body, label %for.cond.cleanup