llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll

   1 ; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s -check-prefix=LAA
   2 ; RUN: opt -passes='require<aa>,require<scalar-evolution>,require<aa>,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s  2>&1 | FileCheck %s --check-prefix=LAA
   3
   4 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
   5
   6 ; For this loop:
   7 ;   unsigned index = 0;
   8 ;   for (int i = 0; i < n; i++) {
   9 ;    A[2 * index] = A[2 * index] + B[i];
  10 ;    index++;
  11 ;   }
  12 ;
  13 ; SCEV is unable to prove that A[2 * i] does not overflow.
  14 ;
  15 ; Analyzing the IR does not help us because the GEPs are not
  16 ; affine AddRecExprs. However, we can turn them into AddRecExprs
  17 ; using SCEV Predicates.
  18 ;
  19 ; Once we have an affine expression we need to add an additional NUSW
  20 ; to check that the pointers don't wrap since the GEPs are not
  21 ; inbound.
  22
  23 ; LAA-LABEL: f1
  24 ; LAA: Memory dependences are safe{{$}}
  25 ; LAA: SCEV assumptions:
  26 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
  27 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
  28
  29 ; The expression for %mul_ext as analyzed by SCEV is
  30 ;    (zext i32 {0,+,2}<%for.body> to i64)
  31 ; We have added the nusw flag to turn this expression into the SCEV expression:
  32 ;    i64 {0,+,2}<%for.body>
  33
  34 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
  35 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
  36 ; LAA-NEXT: --> {%a,+,4}<%for.body>
  37
  38
  39 define void @f1(i16* noalias %a,
  40                 i16* noalias %b, i64 %N) {
  41 entry:
  42   br label %for.body
  43
  44 for.body:                                         ; preds = %for.body, %entry
  45   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  46   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
  47
  48   %mul = mul i32 %ind1, 2
  49   %mul_ext = zext i32 %mul to i64
  50
  51   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
  52   %loadA = load i16, i16* %arrayidxA, align 2
  53
  54   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
  55   %loadB = load i16, i16* %arrayidxB, align 2
  56
  57   %add = mul i16 %loadA, %loadB
  58
  59   store i16 %add, i16* %arrayidxA, align 2
  60
  61   %inc = add nuw nsw i64 %ind, 1
  62   %inc1 = add i32 %ind1, 1
  63
  64   %exitcond = icmp eq i64 %inc, %N
  65   br i1 %exitcond, label %for.end, label %for.body
  66
  67 for.end:                                          ; preds = %for.body
  68   ret void
  69 }
  70
  71 ; For this loop:
  72 ;   unsigned index = n;
  73 ;   for (int i = 0; i < n; i++) {
  74 ;    A[2 * index] = A[2 * index] + B[i];
  75 ;    index--;
  76 ;   }
  77 ;
  78 ; the SCEV expression for 2 * index is not an AddRecExpr
  79 ; (and implictly not affine). However, we are able to make assumptions
  80 ; that will turn the expression into an affine one and continue the
  81 ; analysis.
  82 ;
  83 ; Once we have an affine expression we need to add an additional NUSW
  84 ; to check that the pointers don't wrap since the GEPs are not
  85 ; inbounds.
  86 ;
  87 ; This loop has a negative stride for A, and the nusw flag is required in
  88 ; order to properly extend the increment from i32 -4 to i64 -4.
  89
  90 ; LAA-LABEL: f2
  91 ; LAA: Memory dependences are safe{{$}}
  92 ; LAA: SCEV assumptions:
  93 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
  94 ; LAA-NEXT: {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
  95
  96 ; The expression for %mul_ext as analyzed by SCEV is
  97 ;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
  98 ; We have added the nusw flag to turn this expression into the following SCEV:
  99 ;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
 100
 101 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
 102 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
 103 ; LAA-NEXT: --> {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body>
 104
 105 define void @f2(i16* noalias %a,
 106                 i16* noalias %b, i64 %N) {
 107 entry:
 108   %TruncN = trunc i64 %N to i32
 109   br label %for.body
 110
 111 for.body:                                         ; preds = %for.body, %entry
 112   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 113   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 114
 115   %mul = mul i32 %ind1, 2
 116   %mul_ext = zext i32 %mul to i64
 117
 118   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
 119   %loadA = load i16, i16* %arrayidxA, align 2
 120
 121   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 122   %loadB = load i16, i16* %arrayidxB, align 2
 123
 124   %add = mul i16 %loadA, %loadB
 125
 126   store i16 %add, i16* %arrayidxA, align 2
 127
 128   %inc = add nuw nsw i64 %ind, 1
 129   %dec = sub i32 %ind1, 1
 130
 131   %exitcond = icmp eq i64 %inc, %N
 132   br i1 %exitcond, label %for.end, label %for.body
 133
 134 for.end:                                          ; preds = %for.body
 135   ret void
 136 }
 137
 138 ; We replicate the tests above, but this time sign extend 2 * index instead
 139 ; of zero extending it.
 140
 141 ; LAA-LABEL: f3
 142 ; LAA: Memory dependences are safe{{$}}
 143 ; LAA: SCEV assumptions:
 144 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
 145 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
 146
 147 ; The expression for %mul_ext as analyzed by SCEV is
 148 ;     i64 (sext i32 {0,+,2}<%for.body> to i64)
 149 ; We have added the nssw flag to turn this expression into the following SCEV:
 150 ;     i64 {0,+,2}<%for.body>
 151
 152 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
 153 ; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
 154 ; LAA-NEXT: --> {%a,+,4}<%for.body>
 155
 156 define void @f3(i16* noalias %a,
 157                 i16* noalias %b, i64 %N) {
 158 entry:
 159   br label %for.body
 160
 161 for.body:                                         ; preds = %for.body, %entry
 162   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 163   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
 164
 165   %mul = mul i32 %ind1, 2
 166   %mul_ext = sext i32 %mul to i64
 167
 168   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
 169   %loadA = load i16, i16* %arrayidxA, align 2
 170
 171   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 172   %loadB = load i16, i16* %arrayidxB, align 2
 173
 174   %add = mul i16 %loadA, %loadB
 175
 176   store i16 %add, i16* %arrayidxA, align 2
 177
 178   %inc = add nuw nsw i64 %ind, 1
 179   %inc1 = add i32 %ind1, 1
 180
 181   %exitcond = icmp eq i64 %inc, %N
 182   br i1 %exitcond, label %for.end, label %for.body
 183
 184 for.end:                                          ; preds = %for.body
 185   ret void
 186 }
 187
 188 ; LAA-LABEL: f4
 189 ; LAA: Memory dependences are safe{{$}}
 190 ; LAA: SCEV assumptions:
 191 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
 192 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 193
 194 ; The expression for %mul_ext as analyzed by SCEV is
 195 ;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
 196 ; We have added the nssw flag to turn this expression into the following SCEV:
 197 ;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
 198
 199 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
 200 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
 201 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 202
 203 define void @f4(i16* noalias %a,
 204                 i16* noalias %b, i64 %N) {
 205 entry:
 206   %TruncN = trunc i64 %N to i32
 207   br label %for.body
 208
 209 for.body:                                         ; preds = %for.body, %entry
 210   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 211   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 212
 213   %mul = mul i32 %ind1, 2
 214   %mul_ext = sext i32 %mul to i64
 215
 216   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
 217   %loadA = load i16, i16* %arrayidxA, align 2
 218
 219   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 220   %loadB = load i16, i16* %arrayidxB, align 2
 221
 222   %add = mul i16 %loadA, %loadB
 223
 224   store i16 %add, i16* %arrayidxA, align 2
 225
 226   %inc = add nuw nsw i64 %ind, 1
 227   %dec = sub i32 %ind1, 1
 228
 229   %exitcond = icmp eq i64 %inc, %N
 230   br i1 %exitcond, label %for.end, label %for.body
 231
 232 for.end:                                          ; preds = %for.body
 233   ret void
 234 }
 235
 236 ; The following function is similar to the one above, but has the GEP
 237 ; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
 238 ; This means that the SCEV expression for %mul can wrap and we need
 239 ; a SCEV predicate to continue analysis.
 240 ;
 241 ; We can still analyze this by adding the required no wrap SCEV predicates.
 242
 243 ; LAA-LABEL: f5
 244 ; LAA: Memory dependences are safe{{$}}
 245 ; LAA: SCEV assumptions:
 246 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
 247 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 248
 249 ; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
 250 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
 251 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 252
 253 define void @f5(i16* noalias %a,
 254                 i16* noalias %b, i64 %N) {
 255 entry:
 256   %TruncN = trunc i64 %N to i32
 257   br label %for.body
 258
 259 for.body:                                         ; preds = %for.body, %entry
 260   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 261   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 262
 263   %mul = mul i32 %ind1, 2
 264
 265   %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
 266   %loadA = load i16, i16* %arrayidxA, align 2
 267
 268   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
 269   %loadB = load i16, i16* %arrayidxB, align 2
 270
 271   %add = mul i16 %loadA, %loadB
 272
 273   store i16 %add, i16* %arrayidxA, align 2
 274
 275   %inc = add nuw nsw i64 %ind, 1
 276   %dec = sub i32 %ind1, 1
 277
 278   %exitcond = icmp eq i64 %inc, %N
 279   br i1 %exitcond, label %for.end, label %for.body
 280
 281 for.end:                                          ; preds = %for.body
 282   ret void
 283 }