test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll

   1 ; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s -check-prefix=LAA
   2 ; RUN: opt -passes='require<aa>,require<scalar-evolution>,require<aa>,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s  2>&1 | FileCheck %s --check-prefix=LAA
   3 ; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
   4
   5 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
   6
   7 ; For this loop:
   8 ;   unsigned index = 0;
   9 ;   for (int i = 0; i < n; i++) {
  10 ;    A[2 * index] = A[2 * index] + B[i];
  11 ;    index++;
  12 ;   }
  13 ;
  14 ; SCEV is unable to prove that A[2 * i] does not overflow.
  15 ;
  16 ; Analyzing the IR does not help us because the GEPs are not
  17 ; affine AddRecExprs. However, we can turn them into AddRecExprs
  18 ; using SCEV Predicates.
  19 ;
  20 ; Once we have an affine expression we need to add an additional NUSW
  21 ; to check that the pointers don't wrap since the GEPs are not
  22 ; inbound.
  23
  24 ; LAA-LABEL: f1
  25 ; LAA: Memory dependences are safe{{$}}
  26 ; LAA: SCEV assumptions:
  27 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
  28 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
  29
  30 ; The expression for %mul_ext as analyzed by SCEV is
  31 ;    (zext i32 {0,+,2}<%for.body> to i64)
  32 ; We have added the nusw flag to turn this expression into the SCEV expression:
  33 ;    i64 {0,+,2}<%for.body>
  34
  35 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
  36 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
  37 ; LAA-NEXT: --> {%a,+,4}<%for.body>
  38
  39
  40 ; LV-LABEL: f1
  41 ; LV-LABEL: for.body.lver.check
  42
  43 ; LV:      [[BETrunc:%[^ ]*]] = trunc i64 [[BE:%[^ ]*]] to i32
  44 ; LV-NEXT: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc]])
  45 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
  46 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
  47 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
  48 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
  49 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], 0
  50 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], 0
  51 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
  52 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
  53 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
  54 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
  55
  56 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
  57
  58 ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
  59 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
  60 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
  61 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
  62 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
  63 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
  64 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
  65 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
  66 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
  67
  68 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
  69 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
  70 define void @f1(i16* noalias %a,
  71                 i16* noalias %b, i64 %N) {
  72 entry:
  73   br label %for.body
  74
  75 for.body:                                         ; preds = %for.body, %entry
  76   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  77   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
  78
  79   %mul = mul i32 %ind1, 2
  80   %mul_ext = zext i32 %mul to i64
  81
  82   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
  83   %loadA = load i16, i16* %arrayidxA, align 2
  84
  85   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
  86   %loadB = load i16, i16* %arrayidxB, align 2
  87
  88   %add = mul i16 %loadA, %loadB
  89
  90   store i16 %add, i16* %arrayidxA, align 2
  91
  92   %inc = add nuw nsw i64 %ind, 1
  93   %inc1 = add i32 %ind1, 1
  94
  95   %exitcond = icmp eq i64 %inc, %N
  96   br i1 %exitcond, label %for.end, label %for.body
  97
  98 for.end:                                          ; preds = %for.body
  99   ret void
 100 }
 101
 102 ; For this loop:
 103 ;   unsigned index = n;
 104 ;   for (int i = 0; i < n; i++) {
 105 ;    A[2 * index] = A[2 * index] + B[i];
 106 ;    index--;
 107 ;   }
 108 ;
 109 ; the SCEV expression for 2 * index is not an AddRecExpr
 110 ; (and implictly not affine). However, we are able to make assumptions
 111 ; that will turn the expression into an affine one and continue the
 112 ; analysis.
 113 ;
 114 ; Once we have an affine expression we need to add an additional NUSW
 115 ; to check that the pointers don't wrap since the GEPs are not
 116 ; inbounds.
 117 ;
 118 ; This loop has a negative stride for A, and the nusw flag is required in
 119 ; order to properly extend the increment from i32 -4 to i64 -4.
 120
 121 ; LAA-LABEL: f2
 122 ; LAA: Memory dependences are safe{{$}}
 123 ; LAA: SCEV assumptions:
 124 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
 125 ; LAA-NEXT: {((4 * (zext i31 (trunc i64 %N to i31) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
 126
 127 ; The expression for %mul_ext as analyzed by SCEV is
 128 ;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
 129 ; We have added the nusw flag to turn this expression into the following SCEV:
 130 ;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
 131
 132 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
 133 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
 134 ; LAA-NEXT: --> {((4 * (zext i31 (trunc i64 %N to i31) to i64)) + %a),+,-4}<%for.body>
 135
 136 ; LV-LABEL: f2
 137 ; LV-LABEL: for.body.lver.check
 138
 139 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
 140 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
 141 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
 142 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
 143 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
 144 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], [[Start]]
 145 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], [[Start]]
 146 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
 147 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
 148 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
 149 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
 150
 151 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
 152
 153 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
 154 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 155 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
 156 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
 157 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
 158 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
 159 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
 160 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 161 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 162
 163 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
 164 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
 165 define void @f2(i16* noalias %a,
 166                 i16* noalias %b, i64 %N) {
 167 entry:
 168   %TruncN = trunc i64 %N to i32
 169   br label %for.body
 170
 171 for.body:                                         ; preds = %for.body, %entry
 172   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 173   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 174
 175   %mul = mul i32 %ind1, 2
 176   %mul_ext = zext i32 %mul to i64
 177
 178   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
 179   %loadA = load i16, i16* %arrayidxA, align 2
 180
 181   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 182   %loadB = load i16, i16* %arrayidxB, align 2
 183
 184   %add = mul i16 %loadA, %loadB
 185
 186   store i16 %add, i16* %arrayidxA, align 2
 187
 188   %inc = add nuw nsw i64 %ind, 1
 189   %dec = sub i32 %ind1, 1
 190
 191   %exitcond = icmp eq i64 %inc, %N
 192   br i1 %exitcond, label %for.end, label %for.body
 193
 194 for.end:                                          ; preds = %for.body
 195   ret void
 196 }
 197
 198 ; We replicate the tests above, but this time sign extend 2 * index instead
 199 ; of zero extending it.
 200
 201 ; LAA-LABEL: f3
 202 ; LAA: Memory dependences are safe{{$}}
 203 ; LAA: SCEV assumptions:
 204 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
 205 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
 206
 207 ; The expression for %mul_ext as analyzed by SCEV is
 208 ;     i64 (sext i32 {0,+,2}<%for.body> to i64)
 209 ; We have added the nssw flag to turn this expression into the following SCEV:
 210 ;     i64 {0,+,2}<%for.body>
 211
 212 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
 213 ; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
 214 ; LAA-NEXT: --> {%a,+,4}<%for.body>
 215
 216 ; LV-LABEL: f3
 217 ; LV-LABEL: for.body.lver.check
 218
 219 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
 220 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
 221 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
 222 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
 223 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
 224 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], 0
 225 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], 0
 226 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
 227 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
 228 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
 229 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
 230
 231 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
 232
 233 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
 234 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 235 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
 236 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
 237 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
 238 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
 239 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
 240 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 241 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 242
 243 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
 244 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
 245 define void @f3(i16* noalias %a,
 246                 i16* noalias %b, i64 %N) {
 247 entry:
 248   br label %for.body
 249
 250 for.body:                                         ; preds = %for.body, %entry
 251   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 252   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
 253
 254   %mul = mul i32 %ind1, 2
 255   %mul_ext = sext i32 %mul to i64
 256
 257   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
 258   %loadA = load i16, i16* %arrayidxA, align 2
 259
 260   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 261   %loadB = load i16, i16* %arrayidxB, align 2
 262
 263   %add = mul i16 %loadA, %loadB
 264
 265   store i16 %add, i16* %arrayidxA, align 2
 266
 267   %inc = add nuw nsw i64 %ind, 1
 268   %inc1 = add i32 %ind1, 1
 269
 270   %exitcond = icmp eq i64 %inc, %N
 271   br i1 %exitcond, label %for.end, label %for.body
 272
 273 for.end:                                          ; preds = %for.body
 274   ret void
 275 }
 276
 277 ; LAA-LABEL: f4
 278 ; LAA: Memory dependences are safe{{$}}
 279 ; LAA: SCEV assumptions:
 280 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
 281 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 282
 283 ; The expression for %mul_ext as analyzed by SCEV is
 284 ;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
 285 ; We have added the nssw flag to turn this expression into the following SCEV:
 286 ;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
 287
 288 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
 289 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
 290 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 291
 292 ; LV-LABEL: f4
 293 ; LV-LABEL: for.body.lver.check
 294
 295 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
 296 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
 297 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
 298 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
 299 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
 300 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
 301 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
 302 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
 303 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
 304 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
 305 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
 306
 307 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
 308
 309 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
 310 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 311 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
 312 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
 313 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
 314 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
 315 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
 316 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 317 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 318
 319 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
 320 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
 321 define void @f4(i16* noalias %a,
 322                 i16* noalias %b, i64 %N) {
 323 entry:
 324   %TruncN = trunc i64 %N to i32
 325   br label %for.body
 326
 327 for.body:                                         ; preds = %for.body, %entry
 328   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 329   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 330
 331   %mul = mul i32 %ind1, 2
 332   %mul_ext = sext i32 %mul to i64
 333
 334   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
 335   %loadA = load i16, i16* %arrayidxA, align 2
 336
 337   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 338   %loadB = load i16, i16* %arrayidxB, align 2
 339
 340   %add = mul i16 %loadA, %loadB
 341
 342   store i16 %add, i16* %arrayidxA, align 2
 343
 344   %inc = add nuw nsw i64 %ind, 1
 345   %dec = sub i32 %ind1, 1
 346
 347   %exitcond = icmp eq i64 %inc, %N
 348   br i1 %exitcond, label %for.end, label %for.body
 349
 350 for.end:                                          ; preds = %for.body
 351   ret void
 352 }
 353
 354 ; The following function is similar to the one above, but has the GEP
 355 ; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
 356 ; This means that the SCEV expression for %mul can wrap and we need
 357 ; a SCEV predicate to continue analysis.
 358 ;
 359 ; We can still analyze this by adding the required no wrap SCEV predicates.
 360
 361 ; LAA-LABEL: f5
 362 ; LAA: Memory dependences are safe{{$}}
 363 ; LAA: SCEV assumptions:
 364 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
 365 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 366
 367 ; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
 368 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)<nsw>
 369 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 370
 371 ; LV-LABEL: f5
 372 ; LV-LABEL: for.body.lver.check
 373 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
 374 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
 375 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
 376 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
 377 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
 378 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
 379 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
 380 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
 381 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
 382 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
 383 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
 384
 385 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
 386
 387 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
 388 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 389 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
 390 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
 391 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
 392 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
 393 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
 394 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 395 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 396
 397 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
 398 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
 399 define void @f5(i16* noalias %a,
 400                 i16* noalias %b, i64 %N) {
 401 entry:
 402   %TruncN = trunc i64 %N to i32
 403   br label %for.body
 404
 405 for.body:                                         ; preds = %for.body, %entry
 406   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 407   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 408
 409   %mul = mul i32 %ind1, 2
 410
 411   %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
 412   %loadA = load i16, i16* %arrayidxA, align 2
 413
 414   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
 415   %loadB = load i16, i16* %arrayidxB, align 2
 416
 417   %add = mul i16 %loadA, %loadB
 418
 419   store i16 %add, i16* %arrayidxA, align 2
 420
 421   %inc = add nuw nsw i64 %ind, 1
 422   %dec = sub i32 %ind1, 1
 423
 424   %exitcond = icmp eq i64 %inc, %N
 425   br i1 %exitcond, label %for.end, label %for.body
 426
 427 for.end:                                          ; preds = %for.body
 428   ret void
 429 }