1 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
2 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6 ; Given a loop with an induction variable which is being
7 ; truncated/extended using casts that had been proven to
8 ; be redundant under a runtime test, we want to make sure
9 ; that these casts, do not get vectorized/scalarized/widened.
10 ; This is the case for inductions whose SCEV expression is
11 ; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
12 ; can be a result of the IR sequences we check below.
17 ; Case1: Check the following induction pattern:
19 ; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
20 ; %sext = shl i32 %p.09, 24
21 ; %conv = ashr exact i32 %sext, 24
22 ; %add = add nsw i32 %conv, %step
24 ; This is the case in the following code:
26 ; void doit1(int n, int step) {
29 ; for (i = 0; i < n; i++) {
35 ; The "ExtTrunc" IR sequence here is:
36 ; "%sext = shl i32 %p.09, 24"
37 ; "%conv = ashr exact i32 %sext, 24"
38 ; We check that it does not appear in the vector loop body, whether
39 ; we vectorize or scalarize the induction.
40 ; In the case of widened induction, this means that the induction phi
41 ; is directly used, without shl/ashr on the way.
45 ; VF8: %vec.ind = phi <8 x i32>
46 ; VF8: store <8 x i32> %vec.ind
51 ; VF1-NOT: %{{.*}} = shl i32
54 @a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
56 define void @doit1(i32 %n, i32 %step) {
58 %cmp7 = icmp sgt i32 %n, 0
59 br i1 %cmp7, label %for.body.lr.ph, label %for.end
62 %wide.trip.count = zext i32 %n to i64
66 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
67 %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
68 %sext = shl i32 %p.09, 24
69 %conv = ashr exact i32 %sext, 24
70 %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
71 store i32 %conv, ptr %arrayidx, align 4
72 %add = add nsw i32 %conv, %step
73 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
74 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
75 br i1 %exitcond, label %for.end.loopexit, label %for.body
85 ; Case2: Another variant of the above pattern is where the induction variable
86 ; is used only for address compuation (i.e. it is a GEP index) and therefore
87 ; the induction is not vectorized but rather only the step is widened.
89 ; This is the case in the following code, where the induction variable 'w_ix'
90 ; is only used to access the array 'in':
92 ; void doit2(int *in, int *out, size_t size, size_t step)
95 ; for (size_t offset = 0; offset < size; ++offset)
103 ; The "ExtTrunc" IR sequence here is similar to the previous case:
104 ; "%sext = shl i64 %w_ix.012, 32
105 ; %idxprom = ashr exact i64 %sext, 32"
106 ; We check that it does not appear in the vector loop body, whether
107 ; we widen or scalarize the induction.
108 ; In the case of widened induction, this means that the induction phi
109 ; is directly used, without shl/ashr on the way.
113 ; VF8-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
114 ; VF8-NEXT: [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
115 ; VF8-NEXT: [[MUL0:%.+]] = mul i64 0, %step
116 ; VF8-NEXT: [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
117 ; VF8: [[I0:%.+]] = add i64 [[INDEX]], 0
118 ; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD]]
123 ; VF1-NOT: %{{.*}} = shl i64
127 define void @doit2(ptr nocapture readonly %in, ptr nocapture %out, i64 %size, i64 %step) {
129 %cmp9 = icmp eq i64 %size, 0
130 br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
135 for.cond.cleanup.loopexit:
136 br label %for.cond.cleanup
142 %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
143 %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
144 %sext = shl i64 %w_ix.011, 32
145 %idxprom = ashr exact i64 %sext, 32
146 %arrayidx = getelementptr inbounds i32, ptr %in, i64 %idxprom
147 %0 = load i32, ptr %arrayidx, align 4
148 %arrayidx1 = getelementptr inbounds i32, ptr %out, i64 %offset.010
149 store i32 %0, ptr %arrayidx1, align 4
150 %add = add i64 %idxprom, %step
151 %inc = add nuw i64 %offset.010, 1
152 %exitcond = icmp eq i64 %inc, %size
153 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
156 ; Case3: Lastly, check also the following induction pattern:
158 ; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
159 ; %conv = and i32 %p.09, 255
160 ; %add = add nsw i32 %conv, %step
162 ; This is the case in the following code:
165 ; void doit3(int n, int step) {
167 ; unsigned char p = 0;
168 ; for (i = 0; i < n; i++) {
174 ; The "ExtTrunc" IR sequence here is:
175 ; "%conv = and i32 %p.09, 255".
176 ; We check that it does not appear in the vector loop body, whether
177 ; we vectorize or scalarize the induction.
181 ; VF8: %vec.ind = phi <8 x i32>
182 ; VF8: store <8 x i32> %vec.ind
187 ; VF1-NOT: %{{.*}} = and i32
190 define void @doit3(i32 %n, i32 %step) {
192 %cmp7 = icmp sgt i32 %n, 0
193 br i1 %cmp7, label %for.body.lr.ph, label %for.end
196 %wide.trip.count = zext i32 %n to i64
200 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
201 %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
202 %conv = and i32 %p.09, 255
203 %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
204 store i32 %conv, ptr %arrayidx, align 4
205 %add = add nsw i32 %conv, %step
206 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
207 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
208 br i1 %exitcond, label %for.end.loopexit, label %for.body
217 ; VF8-LABEL: @test_conv_in_latch_block
219 ; VF8-NEXT: %index = phi i64
220 ; VF8-NEXT: %vec.ind = phi <8 x i32>
221 ; VF8: store <8 x i32> %vec.ind
224 define void @test_conv_in_latch_block(i32 %n, i32 %step, ptr noalias %A, ptr noalias %B) {
226 %wide.trip.count = zext i32 %n to i64
230 %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
231 %p.09 = phi i32 [ 0, %entry ], [ %add, %latch ]
232 %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
233 %l = load i32, ptr %B.gep
234 %c = icmp eq i32 %l, 0
235 br i1 %c, label %then, label %latch
238 %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
239 store i32 0, ptr %A.gep
243 %sext = shl i32 %p.09, 24
244 %conv = ashr exact i32 %sext, 24
245 %add = add nsw i32 %conv, %step
246 store i32 %conv, ptr %B.gep, align 4
247 %iv.next = add nuw nsw i64 %iv, 1
248 %exitcond = icmp eq i64 %iv.next, %wide.trip.count
249 br i1 %exitcond, label %exit, label %loop