llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll

   1 ; RUN: opt -passes='print<access-info>' -aa-pipeline='basic-aa' -disable-output < %s  2>&1 | FileCheck %s --check-prefix=LAA
   2
   3 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
   4
   5 ; For this loop:
   6 ;   unsigned index = 0;
   7 ;   for (int i = 0; i < n; i++) {
   8 ;    A[2 * index] = A[2 * index] + B[i];
   9 ;    index++;
  10 ;   }
  11 ;
  12 ; SCEV is unable to prove that A[2 * i] does not overflow.
  13 ;
  14 ; Analyzing the IR does not help us because the GEPs are not
  15 ; affine AddRecExprs. However, we can turn them into AddRecExprs
  16 ; using SCEV Predicates.
  17 ;
  18 ; Once we have an affine expression we need to add an additional NUSW
  19 ; to check that the pointers don't wrap since the GEPs are not
  20 ; inbound.
  21
  22 ; LAA-LABEL: f1
  23 ; LAA: Memory dependences are safe{{$}}
  24 ; LAA: SCEV assumptions:
  25 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
  26 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
  27
  28 ; The expression for %mul_ext as analyzed by SCEV is
  29 ;    (zext i32 {0,+,2}<%for.body> to i64)
  30 ; We have added the nusw flag to turn this expression into the SCEV expression:
  31 ;    i64 {0,+,2}<%for.body>
  32
  33 ; LAA: [PSE]  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
  34 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
  35 ; LAA-NEXT: --> {%a,+,4}<%for.body>
  36
  37
  38 define void @f1(ptr noalias %a,
  39                 ptr noalias %b, i64 %N) {
  40 entry:
  41   br label %for.body
  42
  43 for.body:                                         ; preds = %for.body, %entry
  44   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  45   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
  46
  47   %mul = mul i32 %ind1, 2
  48   %mul_ext = zext i32 %mul to i64
  49
  50   %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
  51   %loadA = load i16, ptr %arrayidxA, align 2
  52
  53   %arrayidxB = getelementptr i16, ptr %b, i64 %ind
  54   %loadB = load i16, ptr %arrayidxB, align 2
  55
  56   %add = mul i16 %loadA, %loadB
  57
  58   store i16 %add, ptr %arrayidxA, align 2
  59
  60   %inc = add nuw nsw i64 %ind, 1
  61   %inc1 = add i32 %ind1, 1
  62
  63   %exitcond = icmp eq i64 %inc, %N
  64   br i1 %exitcond, label %for.end, label %for.body
  65
  66 for.end:                                          ; preds = %for.body
  67   ret void
  68 }
  69
  70 ; For this loop:
  71 ;   unsigned index = n;
  72 ;   for (int i = 0; i < n; i++) {
  73 ;    A[2 * index] = A[2 * index] + B[i];
  74 ;    index--;
  75 ;   }
  76 ;
  77 ; the SCEV expression for 2 * index is not an AddRecExpr
  78 ; (and implictly not affine). However, we are able to make assumptions
  79 ; that will turn the expression into an affine one and continue the
  80 ; analysis.
  81 ;
  82 ; Once we have an affine expression we need to add an additional NUSW
  83 ; to check that the pointers don't wrap since the GEPs are not
  84 ; inbounds.
  85 ;
  86 ; This loop has a negative stride for A, and the nusw flag is required in
  87 ; order to properly extend the increment from i32 -4 to i64 -4.
  88
  89 ; LAA-LABEL: f2
  90 ; LAA: Memory dependences are safe{{$}}
  91 ; LAA: SCEV assumptions:
  92 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
  93 ; LAA-NEXT: {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
  94
  95 ; The expression for %mul_ext as analyzed by SCEV is
  96 ;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
  97 ; We have added the nusw flag to turn this expression into the following SCEV:
  98 ;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
  99
 100 ; LAA: [PSE]  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
 101 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
 102 ; LAA-NEXT: --> {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body>
 103
 104 define void @f2(ptr noalias %a,
 105                 ptr noalias %b, i64 %N) {
 106 entry:
 107   %TruncN = trunc i64 %N to i32
 108   br label %for.body
 109
 110 for.body:                                         ; preds = %for.body, %entry
 111   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 112   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 113
 114   %mul = mul i32 %ind1, 2
 115   %mul_ext = zext i32 %mul to i64
 116
 117   %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
 118   %loadA = load i16, ptr %arrayidxA, align 2
 119
 120   %arrayidxB = getelementptr i16, ptr %b, i64 %ind
 121   %loadB = load i16, ptr %arrayidxB, align 2
 122
 123   %add = mul i16 %loadA, %loadB
 124
 125   store i16 %add, ptr %arrayidxA, align 2
 126
 127   %inc = add nuw nsw i64 %ind, 1
 128   %dec = sub i32 %ind1, 1
 129
 130   %exitcond = icmp eq i64 %inc, %N
 131   br i1 %exitcond, label %for.end, label %for.body
 132
 133 for.end:                                          ; preds = %for.body
 134   ret void
 135 }
 136
 137 ; We replicate the tests above, but this time sign extend 2 * index instead
 138 ; of zero extending it.
 139
 140 ; LAA-LABEL: f3
 141 ; LAA: Memory dependences are safe{{$}}
 142 ; LAA: SCEV assumptions:
 143 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
 144 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
 145
 146 ; The expression for %mul_ext as analyzed by SCEV is
 147 ;     i64 (sext i32 {0,+,2}<%for.body> to i64)
 148 ; We have added the nssw flag to turn this expression into the following SCEV:
 149 ;     i64 {0,+,2}<%for.body>
 150
 151 ; LAA: [PSE]  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
 152 ; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
 153 ; LAA-NEXT: --> {%a,+,4}<%for.body>
 154
 155 define void @f3(ptr noalias %a,
 156                 ptr noalias %b, i64 %N) {
 157 entry:
 158   br label %for.body
 159
 160 for.body:                                         ; preds = %for.body, %entry
 161   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 162   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
 163
 164   %mul = mul i32 %ind1, 2
 165   %mul_ext = sext i32 %mul to i64
 166
 167   %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
 168   %loadA = load i16, ptr %arrayidxA, align 2
 169
 170   %arrayidxB = getelementptr i16, ptr %b, i64 %ind
 171   %loadB = load i16, ptr %arrayidxB, align 2
 172
 173   %add = mul i16 %loadA, %loadB
 174
 175   store i16 %add, ptr %arrayidxA, align 2
 176
 177   %inc = add nuw nsw i64 %ind, 1
 178   %inc1 = add i32 %ind1, 1
 179
 180   %exitcond = icmp eq i64 %inc, %N
 181   br i1 %exitcond, label %for.end, label %for.body
 182
 183 for.end:                                          ; preds = %for.body
 184   ret void
 185 }
 186
 187 ; LAA-LABEL: f4
 188 ; LAA: Memory dependences are safe{{$}}
 189 ; LAA: SCEV assumptions:
 190 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
 191 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 192
 193 ; The expression for %mul_ext as analyzed by SCEV is
 194 ;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
 195 ; We have added the nssw flag to turn this expression into the following SCEV:
 196 ;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
 197
 198 ; LAA: [PSE]  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
 199 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
 200 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 201
 202 define void @f4(ptr noalias %a,
 203                 ptr noalias %b, i64 %N) {
 204 entry:
 205   %TruncN = trunc i64 %N to i32
 206   br label %for.body
 207
 208 for.body:                                         ; preds = %for.body, %entry
 209   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 210   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 211
 212   %mul = mul i32 %ind1, 2
 213   %mul_ext = sext i32 %mul to i64
 214
 215   %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
 216   %loadA = load i16, ptr %arrayidxA, align 2
 217
 218   %arrayidxB = getelementptr i16, ptr %b, i64 %ind
 219   %loadB = load i16, ptr %arrayidxB, align 2
 220
 221   %add = mul i16 %loadA, %loadB
 222
 223   store i16 %add, ptr %arrayidxA, align 2
 224
 225   %inc = add nuw nsw i64 %ind, 1
 226   %dec = sub i32 %ind1, 1
 227
 228   %exitcond = icmp eq i64 %inc, %N
 229   br i1 %exitcond, label %for.end, label %for.body
 230
 231 for.end:                                          ; preds = %for.body
 232   ret void
 233 }
 234
 235 ; The following function is similar to the one above, but has the GEP
 236 ; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
 237 ; This means that the SCEV expression for %mul can wrap and we need
 238 ; a SCEV predicate to continue analysis.
 239 ;
 240 ; We can still analyze this by adding the required no wrap SCEV predicates.
 241
 242 ; LAA-LABEL: f5
 243 ; LAA: Memory dependences are safe{{$}}
 244 ; LAA: SCEV assumptions:
 245 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
 246 ; LAA-EMPTY:
 247
 248 ; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul:
 249 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
 250 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 251
 252 define void @f5(ptr noalias %a,
 253                 ptr noalias %b, i64 %N) {
 254 entry:
 255   %TruncN = trunc i64 %N to i32
 256   br label %for.body
 257
 258 for.body:                                         ; preds = %for.body, %entry
 259   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 260   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
 261
 262   %mul = mul i32 %ind1, 2
 263
 264   %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul
 265   %loadA = load i16, ptr %arrayidxA, align 2
 266
 267   %arrayidxB = getelementptr inbounds i16, ptr %b, i64 %ind
 268   %loadB = load i16, ptr %arrayidxB, align 2
 269
 270   %add = mul i16 %loadA, %loadB
 271
 272   store i16 %add, ptr %arrayidxA, align 2
 273
 274   %inc = add nuw nsw i64 %ind, 1
 275   %dec = sub i32 %ind1, 1
 276
 277   %exitcond = icmp eq i64 %inc, %N
 278   br i1 %exitcond, label %for.end, label %for.body
 279
 280 for.end:                                          ; preds = %for.body
 281   ret void
 282 }