1 ; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
2 ; RUN: opt -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' -disable-output < %s 2>&1 | FileCheck %s
4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
6 ; Following cases are no dependence.
8 ; void nodep_Read_Write(int *A) {
10 ; for (unsigned i = 0; i < 1024; i+=3)
14 ; CHECK: function 'nodep_Read_Write':
15 ; CHECK-NEXT: for.body:
16 ; CHECK-NEXT: Memory dependences are safe
17 ; CHECK-NEXT: Dependences:
18 ; CHECK-NEXT: Run-time memory checks:
20 define void @nodep_Read_Write(i32* nocapture %A) {
22 %add.ptr = getelementptr inbounds i32, i32* %A, i64 1
25 for.cond.cleanup: ; preds = %for.body
28 for.body: ; preds = %entry, %for.body
29 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
30 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
31 %0 = load i32, i32* %arrayidx, align 4
32 %add = add nsw i32 %0, 1
33 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
34 store i32 %add, i32* %arrayidx2, align 4
35 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
36 %cmp = icmp ult i64 %indvars.iv.next, 1024
37 br i1 %cmp, label %for.body, label %for.cond.cleanup
40 ; int nodep_Write_Read(int *A) {
42 ; for (unsigned i = 0; i < 1024; i+=4) {
50 ; CHECK: function 'nodep_Write_Read':
51 ; CHECK-NEXT: for.body:
52 ; CHECK-NEXT: Memory dependences are safe
53 ; CHECK-NEXT: Dependences:
54 ; CHECK-NEXT: Run-time memory checks:
56 define i32 @nodep_Write_Read(i32* nocapture %A) {
60 for.cond.cleanup: ; preds = %for.body
63 for.body: ; preds = %entry, %for.body
64 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
65 %sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
66 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
67 %0 = trunc i64 %indvars.iv to i32
68 store i32 %0, i32* %arrayidx, align 4
69 %1 = or i64 %indvars.iv, 3
70 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %1
71 %2 = load i32, i32* %arrayidx2, align 4
72 %add3 = add nsw i32 %2, %sum.013
73 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
74 %cmp = icmp ult i64 %indvars.iv.next, 1024
75 br i1 %cmp, label %for.body, label %for.cond.cleanup
78 ; void nodep_Write_Write(int *A) {
79 ; for (unsigned i = 0; i < 1024; i+=2) {
85 ; CHECK: function 'nodep_Write_Write':
86 ; CHECK-NEXT: for.body:
87 ; CHECK-NEXT: Memory dependences are safe
88 ; CHECK-NEXT: Dependences:
89 ; CHECK-NEXT: Run-time memory checks:
91 define void @nodep_Write_Write(i32* nocapture %A) {
95 for.cond.cleanup: ; preds = %for.body
98 for.body: ; preds = %entry, %for.body
99 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
100 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
101 %0 = trunc i64 %indvars.iv to i32
102 store i32 %0, i32* %arrayidx, align 4
103 %1 = or i64 %indvars.iv, 1
104 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %1
105 %2 = trunc i64 %1 to i32
106 store i32 %2, i32* %arrayidx3, align 4
107 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
108 %cmp = icmp ult i64 %indvars.iv.next, 1024
109 br i1 %cmp, label %for.body, label %for.cond.cleanup
112 ; Following cases are unsafe depdences and are not vectorizable.
114 ; void unsafe_Read_Write(int *A) {
115 ; for (unsigned i = 0; i < 1024; i+=3)
119 ; CHECK: function 'unsafe_Read_Write':
120 ; CHECK-NEXT: for.body:
121 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
122 ; CHECK-NEXT: Dependences:
123 ; CHECK-NEXT: Backward:
124 ; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
125 ; CHECK-NEXT: store i32 %add, i32* %arrayidx3, align 4
127 define void @unsafe_Read_Write(i32* nocapture %A) {
131 for.cond.cleanup: ; preds = %for.body
134 for.body: ; preds = %entry, %for.body
135 %i.010 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
136 %idxprom = zext i32 %i.010 to i64
137 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
138 %0 = load i32, i32* %arrayidx, align 4
139 %add = add nsw i32 %0, 1
140 %add1 = add i32 %i.010, 3
141 %idxprom2 = zext i32 %add1 to i64
142 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
143 store i32 %add, i32* %arrayidx3, align 4
144 %cmp = icmp ult i32 %add1, 1024
145 br i1 %cmp, label %for.body, label %for.cond.cleanup
148 ; int unsafe_Write_Read(int *A) {
150 ; for (unsigned i = 0; i < 1024; i+=4) {
158 ; CHECK: function 'unsafe_Write_Read':
159 ; CHECK-NEXT: for.body:
160 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
161 ; CHECK-NEXT: Dependences:
162 ; CHECK-NEXT: Backward:
163 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
164 ; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
166 define i32 @unsafe_Write_Read(i32* nocapture %A) {
170 for.cond.cleanup: ; preds = %for.body
173 for.body: ; preds = %entry, %for.body
174 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
175 %sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
176 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
177 %0 = trunc i64 %indvars.iv to i32
178 store i32 %0, i32* %arrayidx, align 4
179 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
180 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
181 %1 = load i32, i32* %arrayidx2, align 4
182 %add3 = add nsw i32 %1, %sum.013
183 %cmp = icmp ult i64 %indvars.iv.next, 1024
184 br i1 %cmp, label %for.body, label %for.cond.cleanup
187 ; void unsafe_Write_Write(int *A) {
188 ; for (unsigned i = 0; i < 1024; i+=2) {
194 ; CHECK: function 'unsafe_Write_Write':
195 ; CHECK-NEXT: for.body:
196 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
197 ; CHECK-NEXT: Dependences:
198 ; CHECK-NEXT: Backward:
199 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
200 ; CHECK-NEXT: store i32 %2, i32* %arrayidx3, align 4
202 define void @unsafe_Write_Write(i32* nocapture %A) {
206 for.cond.cleanup: ; preds = %for.body
209 for.body: ; preds = %entry, %for.body
210 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
211 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
212 %0 = trunc i64 %indvars.iv to i32
213 store i32 %0, i32* %arrayidx, align 4
214 %1 = or i64 %indvars.iv, 1
215 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
216 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
217 %2 = trunc i64 %1 to i32
218 store i32 %2, i32* %arrayidx3, align 4
219 %cmp = icmp ult i64 %indvars.iv.next, 1024
220 br i1 %cmp, label %for.body, label %for.cond.cleanup
223 ; Following cases check that strided accesses can be vectorized.
225 ; void vectorizable_Read_Write(int *A) {
227 ; for (unsigned i = 0; i < 1024; i+=2)
231 ; CHECK: function 'vectorizable_Read_Write':
232 ; CHECK-NEXT: for.body:
233 ; CHECK-NEXT: Memory dependences are safe
234 ; CHECK-NEXT: Dependences:
235 ; CHECK-NEXT: BackwardVectorizable:
236 ; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
237 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
239 define void @vectorizable_Read_Write(i32* nocapture %A) {
241 %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
244 for.cond.cleanup: ; preds = %for.body
247 for.body: ; preds = %entry, %for.body
248 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
249 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
250 %0 = load i32, i32* %arrayidx, align 4
251 %add = add nsw i32 %0, 1
252 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
253 store i32 %add, i32* %arrayidx2, align 4
254 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
255 %cmp = icmp ult i64 %indvars.iv.next, 1024
256 br i1 %cmp, label %for.body, label %for.cond.cleanup
259 ; int vectorizable_Write_Read(int *A) {
262 ; for (unsigned i = 0; i < 1024; i+=2) {
270 ; CHECK: function 'vectorizable_Write_Read':
271 ; CHECK-NEXT: for.body:
272 ; CHECK-NEXT: Memory dependences are safe
273 ; CHECK-NEXT: Dependences:
274 ; CHECK-NEXT: BackwardVectorizable:
275 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
276 ; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
278 define i32 @vectorizable_Write_Read(i32* nocapture %A) {
280 %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
283 for.cond.cleanup: ; preds = %for.body
286 for.body: ; preds = %entry, %for.body
287 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
288 %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
289 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
290 %0 = trunc i64 %indvars.iv to i32
291 store i32 %0, i32* %arrayidx, align 4
292 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
293 %1 = load i32, i32* %arrayidx2, align 4
294 %add = add nsw i32 %1, %sum.013
295 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
296 %cmp = icmp ult i64 %indvars.iv.next, 1024
297 br i1 %cmp, label %for.body, label %for.cond.cleanup
300 ; void vectorizable_Write_Write(int *A) {
302 ; for (unsigned i = 0; i < 1024; i+=2) {
308 ; CHECK: function 'vectorizable_Write_Write':
309 ; CHECK-NEXT: for.body:
310 ; CHECK-NEXT: Memory dependences are safe
311 ; CHECK-NEXT: Dependences:
312 ; CHECK-NEXT: BackwardVectorizable:
313 ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
314 ; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4
316 define void @vectorizable_Write_Write(i32* nocapture %A) {
318 %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
321 for.cond.cleanup: ; preds = %for.body
324 for.body: ; preds = %entry, %for.body
325 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
326 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
327 %0 = trunc i64 %indvars.iv to i32
328 store i32 %0, i32* %arrayidx, align 4
329 %1 = or i64 %indvars.iv, 1
330 %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
331 %2 = trunc i64 %1 to i32
332 store i32 %2, i32* %arrayidx2, align 4
333 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
334 %cmp = icmp ult i64 %indvars.iv.next, 1024
335 br i1 %cmp, label %for.body, label %for.cond.cleanup
338 ; void vectorizable_unscaled_Read_Write(int *A) {
339 ; int *B = (int *)((char *)A + 14);
340 ; for (unsigned i = 0; i < 1024; i+=2)
344 ; FIXME: This case looks like previous case @vectorizable_Read_Write. It sould
347 ; CHECK: function 'vectorizable_unscaled_Read_Write':
348 ; CHECK-NEXT: for.body:
349 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
350 ; CHECK-NEXT: Dependences:
351 ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
352 ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
353 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
355 define void @vectorizable_unscaled_Read_Write(i32* nocapture %A) {
357 %0 = bitcast i32* %A to i8*
358 %add.ptr = getelementptr inbounds i8, i8* %0, i64 14
359 %1 = bitcast i8* %add.ptr to i32*
362 for.cond.cleanup: ; preds = %for.body
365 for.body: ; preds = %entry, %for.body
366 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
367 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
368 %2 = load i32, i32* %arrayidx, align 4
369 %add = add nsw i32 %2, 1
370 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
371 store i32 %add, i32* %arrayidx2, align 4
372 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
373 %cmp = icmp ult i64 %indvars.iv.next, 1024
374 br i1 %cmp, label %for.body, label %for.cond.cleanup
377 ; int vectorizable_unscaled_Write_Read(int *A) {
378 ; int *B = (int *)((char *)A + 17);
380 ; for (unsigned i = 0; i < 1024; i+=2) {
388 ; CHECK: function 'vectorizable_unscaled_Write_Read':
389 ; CHECK-NEXT: for.body:
390 ; CHECK-NEXT: Memory dependences are safe
391 ; CHECK-NEXT: Dependences:
392 ; CHECK-NEXT: BackwardVectorizable:
393 ; CHECK-NEXT: store i32 %2, i32* %arrayidx, align 4 ->
394 ; CHECK-NEXT: %3 = load i32, i32* %arrayidx2, align 4
396 define i32 @vectorizable_unscaled_Write_Read(i32* nocapture %A) {
398 %0 = bitcast i32* %A to i8*
399 %add.ptr = getelementptr inbounds i8, i8* %0, i64 17
400 %1 = bitcast i8* %add.ptr to i32*
403 for.cond.cleanup: ; preds = %for.body
406 for.body: ; preds = %entry, %for.body
407 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
408 %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
409 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
410 %2 = trunc i64 %indvars.iv to i32
411 store i32 %2, i32* %arrayidx, align 4
412 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
413 %3 = load i32, i32* %arrayidx2, align 4
414 %add = add nsw i32 %3, %sum.013
415 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
416 %cmp = icmp ult i64 %indvars.iv.next, 1024
417 br i1 %cmp, label %for.body, label %for.cond.cleanup
420 ; void unsafe_unscaled_Read_Write(int *A) {
421 ; int *B = (int *)((char *)A + 11);
422 ; for (unsigned i = 0; i < 1024; i+=2)
426 ; CHECK: function 'unsafe_unscaled_Read_Write':
427 ; CHECK-NEXT: for.body:
428 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
429 ; CHECK-NEXT: Dependences:
430 ; CHECK-NEXT: Backward:
431 ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
432 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
434 define void @unsafe_unscaled_Read_Write(i32* nocapture %A) {
436 %0 = bitcast i32* %A to i8*
437 %add.ptr = getelementptr inbounds i8, i8* %0, i64 11
438 %1 = bitcast i8* %add.ptr to i32*
441 for.cond.cleanup: ; preds = %for.body
444 for.body: ; preds = %entry, %for.body
445 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
446 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
447 %2 = load i32, i32* %arrayidx, align 4
448 %add = add nsw i32 %2, 1
449 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
450 store i32 %add, i32* %arrayidx2, align 4
451 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
452 %cmp = icmp ult i64 %indvars.iv.next, 1024
453 br i1 %cmp, label %for.body, label %for.cond.cleanup
456 ; CHECK: function 'unsafe_unscaled_Read_Write2':
457 ; CHECK-NEXT: for.body:
458 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
459 ; CHECK-NEXT: Dependences:
460 ; CHECK-NEXT: Backward:
461 ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
462 ; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
464 ; void unsafe_unscaled_Read_Write2(int *A) {
465 ; int *B = (int *)((char *)A + 1);
466 ; for (unsigned i = 0; i < 1024; i+=2)
470 define void @unsafe_unscaled_Read_Write2(i32* nocapture %A) {
472 %0 = bitcast i32* %A to i8*
473 %add.ptr = getelementptr inbounds i8, i8* %0, i64 1
474 %1 = bitcast i8* %add.ptr to i32*
477 for.cond.cleanup: ; preds = %for.body
480 for.body: ; preds = %entry, %for.body
481 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
482 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
483 %2 = load i32, i32* %arrayidx, align 4
484 %add = add nsw i32 %2, 1
485 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
486 store i32 %add, i32* %arrayidx2, align 4
487 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
488 %cmp = icmp ult i64 %indvars.iv.next, 1024
489 br i1 %cmp, label %for.body, label %for.cond.cleanup
492 ; Following case checks that interleaved stores have dependences with another
493 ; store and can not pass dependence check.
495 ; void interleaved_stores(int *A) {
496 ; int *B = (int *) ((char *)A + 1);
497 ; for(int i = 0; i < 1024; i+=2) {
499 ; A[i+1] = i + 1; // (2)
500 ; B[i+1] = i + 1; // (3)
504 ; The access (2) has overlaps with (1) and (3).
506 ; CHECK: function 'interleaved_stores':
507 ; CHECK-NEXT: for.body:
508 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
509 ; CHECK-NEXT: Dependences:
510 ; CHECK-NEXT: Backward:
511 ; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4 ->
512 ; CHECK-NEXT: store i32 %4, i32* %arrayidx9, align 4
514 ; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4 ->
515 ; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4
517 define void @interleaved_stores(i32* nocapture %A) {
519 %0 = bitcast i32* %A to i8*
520 %incdec.ptr = getelementptr inbounds i8, i8* %0, i64 1
521 %1 = bitcast i8* %incdec.ptr to i32*
524 for.cond.cleanup: ; preds = %for.body
527 for.body: ; preds = %entry, %for.body
528 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
529 %2 = trunc i64 %indvars.iv to i32
530 %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
531 store i32 %2, i32* %arrayidx2, align 4
532 %3 = or i64 %indvars.iv, 1
533 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %3
534 %4 = trunc i64 %3 to i32
535 store i32 %4, i32* %arrayidx5, align 4
536 %arrayidx9 = getelementptr inbounds i32, i32* %1, i64 %3
537 store i32 %4, i32* %arrayidx9, align 4
538 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
539 %cmp = icmp slt i64 %indvars.iv.next, 1024
540 br i1 %cmp, label %for.body, label %for.cond.cleanup