1 ; RUN: opt -passes='print<access-info>' -aa-pipeline='basic-aa' -disable-output < %s 2>&1 | FileCheck %s --check-prefix=LAA
3 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
7 ; for (int i = 0; i < n; i++) {
8 ; A[2 * index] = A[2 * index] + B[i];
12 ; SCEV is unable to prove that A[2 * i] does not overflow.
14 ; Analyzing the IR does not help us because the GEPs are not
15 ; affine AddRecExprs. However, we can turn them into AddRecExprs
16 ; using SCEV Predicates.
18 ; Once we have an affine expression we need to add an additional NUSW
19 ; to check that the pointers don't wrap since the GEPs are not
23 ; LAA: Memory dependences are safe{{$}}
24 ; LAA: SCEV assumptions:
25 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
26 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
28 ; The expression for %mul_ext as analyzed by SCEV is
29 ; (zext i32 {0,+,2}<%for.body> to i64)
30 ; We have added the nusw flag to turn this expression into the SCEV expression:
31 ; i64 {0,+,2}<%for.body>
33 ; LAA: [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
34 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
35 ; LAA-NEXT: --> {%a,+,4}<%for.body>
38 define void @f1(ptr noalias %a,
39 ptr noalias %b, i64 %N) {
43 for.body: ; preds = %for.body, %entry
44 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
45 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
47 %mul = mul i32 %ind1, 2
48 %mul_ext = zext i32 %mul to i64
50 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
51 %loadA = load i16, ptr %arrayidxA, align 2
53 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
54 %loadB = load i16, ptr %arrayidxB, align 2
56 %add = mul i16 %loadA, %loadB
58 store i16 %add, ptr %arrayidxA, align 2
60 %inc = add nuw nsw i64 %ind, 1
61 %inc1 = add i32 %ind1, 1
63 %exitcond = icmp eq i64 %inc, %N
64 br i1 %exitcond, label %for.end, label %for.body
66 for.end: ; preds = %for.body
72 ; for (int i = 0; i < n; i++) {
73 ; A[2 * index] = A[2 * index] + B[i];
77 ; the SCEV expression for 2 * index is not an AddRecExpr
78 ; (and implictly not affine). However, we are able to make assumptions
79 ; that will turn the expression into an affine one and continue the
82 ; Once we have an affine expression we need to add an additional NUSW
83 ; to check that the pointers don't wrap since the GEPs are not
86 ; This loop has a negative stride for A, and the nusw flag is required in
87 ; order to properly extend the increment from i32 -4 to i64 -4.
90 ; LAA: Memory dependences are safe{{$}}
91 ; LAA: SCEV assumptions:
92 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
93 ; LAA-NEXT: {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
95 ; The expression for %mul_ext as analyzed by SCEV is
96 ; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
97 ; We have added the nusw flag to turn this expression into the following SCEV:
98 ; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
100 ; LAA: [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
101 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
102 ; LAA-NEXT: --> {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body>
104 define void @f2(ptr noalias %a,
105 ptr noalias %b, i64 %N) {
107 %TruncN = trunc i64 %N to i32
110 for.body: ; preds = %for.body, %entry
111 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
112 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
114 %mul = mul i32 %ind1, 2
115 %mul_ext = zext i32 %mul to i64
117 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
118 %loadA = load i16, ptr %arrayidxA, align 2
120 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
121 %loadB = load i16, ptr %arrayidxB, align 2
123 %add = mul i16 %loadA, %loadB
125 store i16 %add, ptr %arrayidxA, align 2
127 %inc = add nuw nsw i64 %ind, 1
128 %dec = sub i32 %ind1, 1
130 %exitcond = icmp eq i64 %inc, %N
131 br i1 %exitcond, label %for.end, label %for.body
133 for.end: ; preds = %for.body
137 ; We replicate the tests above, but this time sign extend 2 * index instead
138 ; of zero extending it.
141 ; LAA: Memory dependences are safe{{$}}
142 ; LAA: SCEV assumptions:
143 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
144 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
146 ; The expression for %mul_ext as analyzed by SCEV is
147 ; i64 (sext i32 {0,+,2}<%for.body> to i64)
148 ; We have added the nssw flag to turn this expression into the following SCEV:
149 ; i64 {0,+,2}<%for.body>
151 ; LAA: [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
152 ; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
153 ; LAA-NEXT: --> {%a,+,4}<%for.body>
155 define void @f3(ptr noalias %a,
156 ptr noalias %b, i64 %N) {
160 for.body: ; preds = %for.body, %entry
161 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
162 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
164 %mul = mul i32 %ind1, 2
165 %mul_ext = sext i32 %mul to i64
167 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
168 %loadA = load i16, ptr %arrayidxA, align 2
170 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
171 %loadB = load i16, ptr %arrayidxB, align 2
173 %add = mul i16 %loadA, %loadB
175 store i16 %add, ptr %arrayidxA, align 2
177 %inc = add nuw nsw i64 %ind, 1
178 %inc1 = add i32 %ind1, 1
180 %exitcond = icmp eq i64 %inc, %N
181 br i1 %exitcond, label %for.end, label %for.body
183 for.end: ; preds = %for.body
188 ; LAA: Memory dependences are safe{{$}}
189 ; LAA: SCEV assumptions:
190 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
191 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
193 ; The expression for %mul_ext as analyzed by SCEV is
194 ; i64 (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
195 ; We have added the nssw flag to turn this expression into the following SCEV:
196 ; i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
198 ; LAA: [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
199 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
200 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
202 define void @f4(ptr noalias %a,
203 ptr noalias %b, i64 %N) {
205 %TruncN = trunc i64 %N to i32
208 for.body: ; preds = %for.body, %entry
209 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
210 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
212 %mul = mul i32 %ind1, 2
213 %mul_ext = sext i32 %mul to i64
215 %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
216 %loadA = load i16, ptr %arrayidxA, align 2
218 %arrayidxB = getelementptr i16, ptr %b, i64 %ind
219 %loadB = load i16, ptr %arrayidxB, align 2
221 %add = mul i16 %loadA, %loadB
223 store i16 %add, ptr %arrayidxA, align 2
225 %inc = add nuw nsw i64 %ind, 1
226 %dec = sub i32 %ind1, 1
228 %exitcond = icmp eq i64 %inc, %N
229 br i1 %exitcond, label %for.end, label %for.body
231 for.end: ; preds = %for.body
235 ; The following function is similar to the one above, but has the GEP
236 ; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
237 ; This means that the SCEV expression for %mul can wrap and we need
238 ; a SCEV predicate to continue analysis.
240 ; We can still analyze this by adding the required no wrap SCEV predicates.
243 ; LAA: Memory dependences are safe{{$}}
244 ; LAA: SCEV assumptions:
245 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
248 ; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul:
249 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
250 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
252 define void @f5(ptr noalias %a,
253 ptr noalias %b, i64 %N) {
255 %TruncN = trunc i64 %N to i32
258 for.body: ; preds = %for.body, %entry
259 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
260 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
262 %mul = mul i32 %ind1, 2
264 %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul
265 %loadA = load i16, ptr %arrayidxA, align 2
267 %arrayidxB = getelementptr inbounds i16, ptr %b, i64 %ind
268 %loadB = load i16, ptr %arrayidxB, align 2
270 %add = mul i16 %loadA, %loadB
272 store i16 %add, ptr %arrayidxA, align 2
274 %inc = add nuw nsw i64 %ind, 1
275 %dec = sub i32 %ind1, 1
277 %exitcond = icmp eq i64 %inc, %N
278 br i1 %exitcond, label %for.end, label %for.body
280 for.end: ; preds = %for.body