llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll

   1 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
   2 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
   3
   4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   5
   6 ; Given a loop with an induction variable which is being
   7 ; truncated/extended using casts that had been proven to
   8 ; be redundant under a runtime test, we want to make sure
   9 ; that these casts, do not get vectorized/scalarized/widened.
  10 ; This is the case for inductions whose SCEV expression is
  11 ; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
  12 ; can be a result of the IR sequences we check below.
  13 ;
  14 ; See also pr30654.
  15 ;
  16
  17 ; Case1: Check the following induction pattern:
  18 ;
  19 ;  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  20 ;  %sext = shl i32 %p.09, 24
  21 ;  %conv = ashr exact i32 %sext, 24
  22 ;  %add = add nsw i32 %conv, %step
  23 ;
  24 ; This is the case in the following code:
  25 ;
  26 ; void doit1(int n, int step) {
  27 ;   int i;
  28 ;   char p = 0;
  29 ;   for (i = 0; i < n; i++) {
  30 ;      a[i] = p;
  31 ;      p = p + step;
  32 ;   }
  33 ; }
  34 ;
  35 ; The "ExtTrunc" IR sequence here is:
  36 ;  "%sext = shl i32 %p.09, 24"
  37 ;  "%conv = ashr exact i32 %sext, 24"
  38 ; We check that it does not appear in the vector loop body, whether
  39 ; we vectorize or scalarize the induction.
  40 ; In the case of widened induction, this means that the induction phi
  41 ; is directly used, without shl/ashr on the way.
  42
  43 ; VF8-LABEL: @doit1
  44 ; VF8: vector.body:
  45 ; VF8: %vec.ind = phi <8 x i32>
  46 ; VF8: store <8 x i32> %vec.ind
  47 ; VF8: middle.block:
  48
  49 ; VF1-LABEL: @doit1
  50 ; VF1: vector.body:
  51 ; VF1-NOT: %{{.*}} = shl i32
  52 ; VF1: middle.block:
  53
  54 @a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
  55
  56 define void @doit1(i32 %n, i32 %step) {
  57 entry:
  58   %cmp7 = icmp sgt i32 %n, 0
  59   br i1 %cmp7, label %for.body.lr.ph, label %for.end
  60
  61 for.body.lr.ph:
  62   %wide.trip.count = zext i32 %n to i64
  63   br label %for.body
  64
  65 for.body:
  66   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
  67   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  68   %sext = shl i32 %p.09, 24
  69   %conv = ashr exact i32 %sext, 24
  70   %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
  71   store i32 %conv, ptr %arrayidx, align 4
  72   %add = add nsw i32 %conv, %step
  73   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  74   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  75   br i1 %exitcond, label %for.end.loopexit, label %for.body
  76
  77 for.end.loopexit:
  78   br label %for.end
  79
  80 for.end:
  81   ret void
  82 }
  83
  84
  85 ; Case2: Another variant of the above pattern is where the induction variable
  86 ; is used only for address compuation (i.e. it is a GEP index) and therefore
  87 ; the induction is not vectorized but rather only the step is widened.
  88 ;
  89 ; This is the case in the following code, where the induction variable 'w_ix'
  90 ; is only used to access the array 'in':
  91 ;
  92 ; void doit2(int *in, int *out, size_t size, size_t step)
  93 ; {
  94 ;    int w_ix = 0;
  95 ;    for (size_t offset = 0; offset < size; ++offset)
  96 ;     {
  97 ;        int w = in[w_ix];
  98 ;        out[offset] = w;
  99 ;        w_ix += step;
 100 ;     }
 101 ; }
 102 ;
 103 ; The "ExtTrunc" IR sequence here is similar to the previous case:
 104 ;  "%sext = shl i64 %w_ix.012, 32
 105 ;  %idxprom = ashr exact i64 %sext, 32"
 106 ; We check that it does not appear in the vector loop body, whether
 107 ; we widen or scalarize the induction.
 108 ; In the case of widened induction, this means that the induction phi
 109 ; is directly used, without shl/ashr on the way.
 110
 111 ; VF8-LABEL: @doit2
 112 ; VF8: vector.body:
 113 ; VF8-NEXT:  [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
 114 ; VF8-NEXT:  [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
 115 ; VF8-NEXT:  [[MUL0:%.+]] = mul i64 0, %step
 116 ; VF8-NEXT:  [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
 117 ; VF8:       [[I0:%.+]] = add i64 [[INDEX]], 0
 118 ; VF8:       getelementptr inbounds i32, ptr %in, i64 [[ADD]]
 119 ; VF8: middle.block:
 120
 121 ; VF1-LABEL: @doit2
 122 ; VF1: vector.body:
 123 ; VF1-NOT: %{{.*}} = shl i64
 124 ; VF1: middle.block:
 125 ;
 126
 127 define void @doit2(ptr nocapture readonly %in, ptr nocapture %out, i64 %size, i64 %step)  {
 128 entry:
 129   %cmp9 = icmp eq i64 %size, 0
 130   br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
 131
 132 for.body.lr.ph:
 133   br label %for.body
 134
 135 for.cond.cleanup.loopexit:
 136   br label %for.cond.cleanup
 137
 138 for.cond.cleanup:
 139   ret void
 140
 141 for.body:
 142   %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
 143   %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
 144   %sext = shl i64 %w_ix.011, 32
 145   %idxprom = ashr exact i64 %sext, 32
 146   %arrayidx = getelementptr inbounds i32, ptr %in, i64 %idxprom
 147   %0 = load i32, ptr %arrayidx, align 4
 148   %arrayidx1 = getelementptr inbounds i32, ptr %out, i64 %offset.010
 149   store i32 %0, ptr %arrayidx1, align 4
 150   %add = add i64 %idxprom, %step
 151   %inc = add nuw i64 %offset.010, 1
 152   %exitcond = icmp eq i64 %inc, %size
 153   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 154 }
 155
 156 ; Case3: Lastly, check also the following induction pattern:
 157 ;
 158 ;  %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
 159 ;  %conv = and i32 %p.09, 255
 160 ;  %add = add nsw i32 %conv, %step
 161 ;
 162 ; This is the case in the following code:
 163 ;
 164 ; int a[N];
 165 ; void doit3(int n, int step) {
 166 ;   int i;
 167 ;   unsigned char p = 0;
 168 ;   for (i = 0; i < n; i++) {
 169 ;      a[i] = p;
 170 ;      p = p + step;
 171 ;   }
 172 ; }
 173 ;
 174 ; The "ExtTrunc" IR sequence here is:
 175 ;  "%conv = and i32 %p.09, 255".
 176 ; We check that it does not appear in the vector loop body, whether
 177 ; we vectorize or scalarize the induction.
 178
 179 ; VF8-LABEL: @doit3
 180 ; VF8: vector.body:
 181 ; VF8: %vec.ind = phi <8 x i32>
 182 ; VF8: store <8 x i32> %vec.ind
 183 ; VF8: middle.block:
 184
 185 ; VF1-LABEL: @doit3
 186 ; VF1: vector.body:
 187 ; VF1-NOT: %{{.*}} = and i32
 188 ; VF1: middle.block:
 189
 190 define void @doit3(i32 %n, i32 %step) {
 191 entry:
 192   %cmp7 = icmp sgt i32 %n, 0
 193   br i1 %cmp7, label %for.body.lr.ph, label %for.end
 194
 195 for.body.lr.ph:
 196   %wide.trip.count = zext i32 %n to i64
 197   br label %for.body
 198
 199 for.body:
 200   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
 201   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
 202   %conv = and i32 %p.09, 255
 203   %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
 204   store i32 %conv, ptr %arrayidx, align 4
 205   %add = add nsw i32 %conv, %step
 206   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 207   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 208   br i1 %exitcond, label %for.end.loopexit, label %for.body
 209
 210 for.end.loopexit:
 211   br label %for.end
 212
 213 for.end:
 214   ret void
 215 }
 216
 217 ; VF8-LABEL: @test_conv_in_latch_block
 218 ; VF8: vector.body:
 219 ; VF8-NEXT: %index = phi i64
 220 ; VF8-NEXT: %vec.ind = phi <8 x i32>
 221 ; VF8: store <8 x i32> %vec.ind
 222 ; VF8: middle.block:
 223 ;
 224 define void @test_conv_in_latch_block(i32 %n, i32 %step, ptr noalias %A, ptr noalias %B) {
 225 entry:
 226   %wide.trip.count = zext i32 %n to i64
 227   br label %loop
 228
 229 loop:
 230   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
 231   %p.09 = phi i32 [ 0, %entry ], [ %add, %latch ]
 232   %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
 233   %l = load i32, ptr %B.gep
 234   %c = icmp eq i32 %l, 0
 235   br i1 %c, label %then, label %latch
 236
 237 then:
 238   %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
 239   store i32 0, ptr %A.gep
 240   br label %latch
 241
 242 latch:
 243   %sext = shl i32 %p.09, 24
 244   %conv = ashr exact i32 %sext, 24
 245   %add = add nsw i32 %conv, %step
 246   store i32 %conv, ptr %B.gep, align 4
 247   %iv.next = add nuw nsw i64 %iv, 1
 248   %exitcond = icmp eq i64 %iv.next, %wide.trip.count
 249   br i1 %exitcond, label %exit, label %loop
 250
 251 exit:
 252   ret void
 253 }