1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
6 @f = common global i32 0, align 4
7 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
8 @c = common global i32 0, align 4
9 @a = common global i32 0, align 4
10 @b = common global i32 0, align 4
11 @e = common global i32 0, align 4
13 ; It has a value that is used outside of the loop
14 ; and is not a recognized reduction variable "tmp17".
15 ; However, tmp17 is a non-header phi which is an allowed exit.
19 ; CHECK-LABEL: define i32 @test1() {
20 ; CHECK-NEXT: [[BB:.*]]:
21 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
22 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
23 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
24 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
25 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
26 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
27 ; CHECK: [[VECTOR_PH]]:
28 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
29 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
30 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
31 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
32 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
33 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
34 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
35 ; CHECK: [[VECTOR_BODY]]:
36 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
37 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
38 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
39 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
40 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
41 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
42 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
43 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
44 ; CHECK: [[MIDDLE_BLOCK]]:
45 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
46 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
47 ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
48 ; CHECK: [[_LR_PH_I]]:
49 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
50 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
51 ; CHECK: [[_LR_PH_I1:.*:]]
52 ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
53 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10
54 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
56 ; CHECK-NEXT: br label %[[BB16]]
58 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ 0, %[[BB10]] ], [ 1, %[[DOTLR_PH_I]] ]
59 ; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP8]], 1
60 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[TMP18]], 4
61 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
62 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
63 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[BB16]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
64 ; CHECK-NEXT: ret i32 [[DOTLCSSA]]
67 %b.promoted = load i32, ptr @b, align 4
71 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
72 %tmp2 = icmp sgt i32 %tmp8, 10
73 br i1 %tmp2, label %bb16, label %bb10
79 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
80 %tmp18 = add nsw i32 %tmp8, 1
81 %tmp19 = icmp slt i32 %tmp18, 4
82 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
85 %.lcssa = phi i32 [ %tmp17, %bb16 ]
89 ; non-hdr phi depends on header phi.
92 ; CHECK-LABEL: define i32 @test2() {
93 ; CHECK-NEXT: [[BB:.*]]:
94 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
95 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
96 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
97 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
98 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
99 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
100 ; CHECK: [[VECTOR_PH]]:
101 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
102 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
103 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
104 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
105 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
106 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
107 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
108 ; CHECK: [[VECTOR_BODY]]:
109 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
110 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
111 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
112 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]]
113 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
114 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
115 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
116 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
117 ; CHECK: [[MIDDLE_BLOCK]]:
118 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
119 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
120 ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
121 ; CHECK: [[_LR_PH_I]]:
122 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
123 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
124 ; CHECK: [[_LR_PH_I1:.*:]]
125 ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
126 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10
127 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
129 ; CHECK-NEXT: br label %[[BB16]]
131 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP8]], %[[BB10]] ], [ 1, %[[DOTLR_PH_I]] ]
132 ; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP8]], 1
133 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[TMP18]], 4
134 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
135 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
136 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[BB16]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
137 ; CHECK-NEXT: ret i32 [[DOTLCSSA]]
140 %b.promoted = load i32, ptr @b, align 4
144 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
145 %tmp2 = icmp sgt i32 %tmp8, 10
146 br i1 %tmp2, label %bb16, label %bb10
152 %tmp17 = phi i32 [ %tmp8, %bb10 ], [ 1, %.lr.ph.i ]
153 %tmp18 = add nsw i32 %tmp8, 1
154 %tmp19 = icmp slt i32 %tmp18, 4
155 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
158 %.lcssa = phi i32 [ %tmp17, %bb16 ]
162 ; more than 2 incoming values for tmp17 phi that is used outside loop.
163 define i32 @test3(i32 %N) {
164 ; CHECK-LABEL: define i32 @test3(
165 ; CHECK-SAME: i32 [[N:%.*]]) {
166 ; CHECK-NEXT: [[BB:.*]]:
167 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
168 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
169 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
170 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
171 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
172 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
173 ; CHECK: [[VECTOR_PH]]:
174 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
175 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
176 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
177 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
178 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
179 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
180 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i64 0
181 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
182 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
183 ; CHECK: [[VECTOR_BODY]]:
184 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
185 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
186 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
187 ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
188 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
189 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP4]], <2 x i1> zeroinitializer
190 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
191 ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]]
192 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
193 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
194 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
195 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
196 ; CHECK: [[MIDDLE_BLOCK]]:
197 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1
198 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
199 ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
200 ; CHECK: [[_LR_PH_I]]:
201 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
202 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
203 ; CHECK: [[_LR_PH_I1:.*:]]
204 ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
205 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10
206 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
208 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], [[N]]
209 ; CHECK-NEXT: br i1 [[CMP]], label %[[BB12:.*]], label %[[BB16]]
211 ; CHECK-NEXT: br label %[[BB16]]
213 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ 0, %[[BB10]] ], [ 1, %[[DOTLR_PH_I]] ], [ 2, %[[BB12]] ]
214 ; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP8]], 1
215 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[TMP18]], 4
216 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
217 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
218 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[BB16]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
219 ; CHECK-NEXT: ret i32 [[DOTLCSSA]]
222 %b.promoted = load i32, ptr @b, align 4
226 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
227 %tmp2 = icmp sgt i32 %tmp8, 10
228 br i1 %tmp2, label %bb16, label %bb10
231 %cmp = icmp sgt i32 %tmp8, %N
232 br i1 %cmp, label %bb12, label %bb16
238 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ], [ 2, %bb12 ]
239 %tmp18 = add nsw i32 %tmp8, 1
240 %tmp19 = icmp slt i32 %tmp18, 4
241 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
244 %.lcssa = phi i32 [ %tmp17, %bb16 ]
248 ; more than one incoming value for outside user: %.lcssa
249 define i32 @test4(i32 %N) {
250 ; CHECK-LABEL: define i32 @test4(
251 ; CHECK-SAME: i32 [[N:%.*]]) {
252 ; CHECK-NEXT: [[BB:.*]]:
253 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
254 ; CHECK-NEXT: [[ICMP:%.*]] = icmp slt i32 [[B_PROMOTED]], [[N]]
255 ; CHECK-NEXT: br i1 [[ICMP]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[DOTLR_PH_I_PREHEADER:.*]]
256 ; CHECK: [[_LR_PH_I_PREHEADER:.*:]]
257 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
258 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
259 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
260 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
261 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
262 ; CHECK: [[VECTOR_PH]]:
263 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
264 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
265 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
266 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
267 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
268 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
269 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
270 ; CHECK: [[VECTOR_BODY]]:
271 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
272 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
273 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
274 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
275 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
276 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
277 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
278 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
279 ; CHECK: [[MIDDLE_BLOCK]]:
280 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
281 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
282 ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
283 ; CHECK: [[_LR_PH_I]]:
284 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ]
285 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
286 ; CHECK: [[_LR_PH_I1:.*:]]
287 ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
288 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10
289 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
291 ; CHECK-NEXT: br label %[[BB16]]
293 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ 0, %[[BB10]] ], [ 1, %[[DOTLR_PH_I]] ]
294 ; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP8]], 1
295 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[TMP18]], 4
296 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]]
297 ; CHECK: [[F1_EXIT_LOOPEXIT_LOOPEXIT]]:
298 ; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[TMP17]], %[[BB16]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
299 ; CHECK-NEXT: br label %[[F1_EXIT_LOOPEXIT]]
300 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
301 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ 2, %[[BB]] ], [ [[TMP17_LCSSA]], %[[F1_EXIT_LOOPEXIT_LOOPEXIT]] ]
302 ; CHECK-NEXT: ret i32 [[DOTLCSSA]]
305 %b.promoted = load i32, ptr @b, align 4
306 %icmp = icmp slt i32 %b.promoted, %N
307 br i1 %icmp, label %f1.exit.loopexit, label %.lr.ph.i
310 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
311 %tmp2 = icmp sgt i32 %tmp8, 10
312 br i1 %tmp2, label %bb16, label %bb10
318 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
319 %tmp18 = add nsw i32 %tmp8, 1
320 %tmp19 = icmp slt i32 %tmp18, 4
321 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
324 %.lcssa = phi i32 [ %tmp17, %bb16 ], [ 2, %bb ]
328 ; non hdr phi that depends on reduction and is used outside the loop.
329 ; reduction phis are only allowed to have bump or reduction operations as the inside user, so we should
330 ; not vectorize this.
331 define i32 @reduction_sum(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
332 ; CHECK-LABEL: define i32 @reduction_sum(
333 ; CHECK-SAME: i32 [[N:%.*]], ptr noalias nocapture [[A:%.*]], ptr noalias nocapture [[B:%.*]]) #[[ATTR0:[0-9]+]] {
334 ; CHECK-NEXT: [[ENTRY:.*]]:
335 ; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[N]], 0
336 ; CHECK-NEXT: br i1 [[C1]], label %[[HEADER_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
337 ; CHECK: [[HEADER_PREHEADER]]:
338 ; CHECK-NEXT: br label %[[HEADER:.*]]
340 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB16:.*]] ], [ 0, %[[HEADER_PREHEADER]] ]
341 ; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[C9:%.*]], %[[BB16]] ], [ 0, %[[HEADER_PREHEADER]] ]
342 ; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
343 ; CHECK-NEXT: [[C3:%.*]] = load i32, ptr [[C2]], align 4
344 ; CHECK-NEXT: [[C4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
345 ; CHECK-NEXT: [[C5:%.*]] = load i32, ptr [[C4]], align 4
346 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SUM_02]], 10
347 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
349 ; CHECK-NEXT: br label %[[BB16]]
351 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[SUM_02]], %[[BB10]] ], [ 1, %[[HEADER]] ]
352 ; CHECK-NEXT: [[C6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
353 ; CHECK-NEXT: [[C7:%.*]] = add i32 [[SUM_02]], [[C6]]
354 ; CHECK-NEXT: [[C8:%.*]] = add i32 [[C7]], [[C3]]
355 ; CHECK-NEXT: [[C9]] = add i32 [[C8]], [[C5]]
356 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
357 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
358 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
359 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]], label %[[HEADER]]
360 ; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
361 ; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[TMP17]], %[[BB16]] ]
362 ; CHECK-NEXT: [[C9_LCSSA:%.*]] = phi i32 [ [[C9]], %[[BB16]] ]
363 ; CHECK-NEXT: br [[DOT_CRIT_EDGE]]
364 ; CHECK: [[__CRIT_EDGE:.*:]]
365 ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[C9_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
366 ; CHECK-NEXT: [[NONHDR_LCSSA:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
367 ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
370 %c1 = icmp sgt i32 %n, 0
371 br i1 %c1, label %header, label %._crit_edge
373 header: ; preds = %0, %.lr.ph
374 %indvars.iv = phi i64 [ %indvars.iv.next, %bb16 ], [ 0, %entry ]
375 %sum.02 = phi i32 [ %c9, %bb16 ], [ 0, %entry ]
376 %c2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
377 %c3 = load i32, ptr %c2, align 4
378 %c4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
379 %c5 = load i32, ptr %c4, align 4
380 %tmp2 = icmp sgt i32 %sum.02, 10
381 br i1 %tmp2, label %bb16, label %bb10
387 %tmp17 = phi i32 [ %sum.02, %bb10 ], [ 1, %header ]
388 %c6 = trunc i64 %indvars.iv to i32
389 %c7 = add i32 %sum.02, %c6
390 %c8 = add i32 %c7, %c3
391 %c9 = add i32 %c8, %c5
392 %indvars.iv.next = add i64 %indvars.iv, 1
393 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
394 %exitcond = icmp eq i32 %lftr.wideiv, %n
395 br i1 %exitcond, label %._crit_edge, label %header
397 ._crit_edge: ; preds = %.lr.ph, %0
398 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %c9, %bb16 ]
399 %nonhdr.lcssa = phi i32 [ 1, %entry], [ %tmp17, %bb16 ]
403 ; invalid cyclic dependency with header phi iv, which prevents iv from being
404 ; recognized as induction var.
406 define i32 @cyclic_dep_with_indvar() {
407 ; CHECK-LABEL: define i32 @cyclic_dep_with_indvar() {
408 ; CHECK-NEXT: [[BB:.*]]:
409 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
410 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
411 ; CHECK: [[_LR_PH_I:.*:]]
412 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[BB16:.*]] ], [ [[B_PROMOTED]], %[[BB]] ]
413 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[IV]], 10
414 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
416 ; CHECK-NEXT: br label %[[BB16]]
418 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ 0, %[[BB10]] ], [ [[IV]], %[[DOTLR_PH_I]] ]
419 ; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[TMP17]], 1
420 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[IVNEXT]], 4
421 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT:.*]]
422 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
423 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[BB16]] ]
424 ; CHECK-NEXT: ret i32 [[DOTLCSSA]]
427 %b.promoted = load i32, ptr @b, align 4
431 %iv = phi i32 [ %ivnext, %bb16 ], [ %b.promoted, %bb ]
432 %tmp2 = icmp sgt i32 %iv, 10
433 br i1 %tmp2, label %bb16, label %bb10
439 %tmp17 = phi i32 [ 0, %bb10 ], [ %iv, %.lr.ph.i ]
440 %ivnext = add nsw i32 %tmp17, 1
441 %tmp19 = icmp slt i32 %ivnext, 4
442 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
445 %.lcssa = phi i32 [ %tmp17, %bb16 ]
449 ; non-reduction phi 'tmp17' used outside loop has cyclic dependence with %x.05 phi
451 define i32 @not_valid_reduction(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
452 ; CHECK-LABEL: define i32 @not_valid_reduction(
453 ; CHECK-SAME: i32 [[N:%.*]], ptr noalias nocapture [[A:%.*]]) #[[ATTR1:[0-9]+]] {
454 ; CHECK-NEXT: [[ENTRY:.*]]:
455 ; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
456 ; CHECK-NEXT: br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
457 ; CHECK: [[FOR_BODY_PREHEADER]]:
458 ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
459 ; CHECK: [[FOR_BODY]]:
460 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
461 ; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[TMP17:%.*]], %[[LATCH]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
462 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
463 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
464 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 10
465 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X_05]], [[TMP0]]
466 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16:.*]], label %[[BB10:.*]]
468 ; CHECK-NEXT: br label %[[BB16]]
470 ; CHECK-NEXT: [[TMP17]] = phi i32 [ 1, %[[BB10]] ], [ [[SUB]], %[[FOR_BODY]] ]
471 ; CHECK-NEXT: br label %[[LATCH]]
473 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
474 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
475 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
476 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
477 ; CHECK: [[FOR_END_LOOPEXIT]]:
478 ; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[TMP17]], %[[LATCH]] ]
479 ; CHECK-NEXT: br label %[[FOR_END]]
480 ; CHECK: [[FOR_END]]:
481 ; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
482 ; CHECK-NEXT: ret i32 [[X_0_LCSSA]]
485 %cmp4 = icmp sgt i32 %n, 0
486 br i1 %cmp4, label %for.body, label %for.end
488 for.body: ; preds = %entry, %for.body
489 %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
490 %x.05 = phi i32 [ %tmp17, %latch ], [ 0, %entry ]
491 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
492 %tmp0 = load i32, ptr %arrayidx, align 4
493 %tmp2 = icmp sgt i64 %indvars.iv, 10
494 %sub = sub nsw i32 %x.05, %tmp0
495 br i1 %tmp2, label %bb16, label %bb10
501 %tmp17 = phi i32 [ 1, %bb10 ], [ %sub, %for.body ]
505 %indvars.iv.next = add i64 %indvars.iv, 1
506 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
507 %exitcond = icmp eq i32 %lftr.wideiv, %n
508 br i1 %exitcond, label %for.end, label %for.body
510 for.end: ; preds = %for.body, %entry
511 %x.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17 , %latch ]
515 define i8 @outside_user_non_phi() {
516 ; CHECK-LABEL: define i8 @outside_user_non_phi() {
517 ; CHECK-NEXT: [[BB:.*]]:
518 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
519 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
520 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
521 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
522 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
523 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]]
524 ; CHECK: [[VECTOR_PH]]:
525 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
526 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
527 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
528 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
529 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
530 ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
531 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
532 ; CHECK: [[VECTOR_BODY]]:
533 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
534 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
535 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
536 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
537 ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8>
538 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
539 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
540 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
541 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
542 ; CHECK: [[MIDDLE_BLOCK]]:
543 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
544 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
545 ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
546 ; CHECK: [[_LR_PH_I]]:
547 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
548 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
549 ; CHECK: [[_LR_PH_I1:.*:]]
550 ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
551 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10
552 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]]
554 ; CHECK-NEXT: br label %[[BB16]]
556 ; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ 0, %[[BB10]] ], [ 1, %[[DOTLR_PH_I]] ]
557 ; CHECK-NEXT: [[TMP17_TRUNC:%.*]] = trunc i32 [[TMP17]] to i8
558 ; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP8]], 1
559 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[TMP18]], 4
560 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP11:![0-9]+]]
561 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
562 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8 [ [[TMP17_TRUNC]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
563 ; CHECK-NEXT: ret i8 [[DOTLCSSA]]
566 %b.promoted = load i32, ptr @b, align 4
570 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
571 %tmp2 = icmp sgt i32 %tmp8, 10
572 br i1 %tmp2, label %bb16, label %bb10
578 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
579 %tmp17.trunc = trunc i32 %tmp17 to i8
580 %tmp18 = add nsw i32 %tmp8, 1
581 %tmp19 = icmp slt i32 %tmp18, 4
582 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
585 %.lcssa = phi i8 [ %tmp17.trunc, %bb16 ]
589 define i32 @no_vectorize_reduction_with_outside_use(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
590 ; CHECK-LABEL: define i32 @no_vectorize_reduction_with_outside_use(
591 ; CHECK-SAME: i32 [[N:%.*]], ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) #[[ATTR1]] {
592 ; CHECK-NEXT: [[ENTRY:.*]]:
593 ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0
594 ; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
595 ; CHECK: [[FOR_BODY_PREHEADER]]:
596 ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
597 ; CHECK: [[FOR_BODY]]:
598 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
599 ; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
600 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
601 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
602 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
603 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
604 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
605 ; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]]
606 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
607 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
608 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
609 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
610 ; CHECK: [[FOR_END_LOOPEXIT]]:
611 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP1]], %[[FOR_BODY]] ]
612 ; CHECK-NEXT: br label %[[FOR_END]]
613 ; CHECK: [[FOR_END]]:
614 ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTLCSSA]], %[[FOR_END_LOOPEXIT]] ]
615 ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
618 %cmp7 = icmp sgt i32 %n, 0
619 br i1 %cmp7, label %for.body, label %for.end
621 for.body: ; preds = %entry, %for.body
622 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
623 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
624 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
625 %0 = load i32, ptr %arrayidx, align 4
626 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
627 %1 = load i32, ptr %arrayidx2, align 4
628 %add = add nsw i32 %1, %0
629 %or = or i32 %add, %result.08
630 %indvars.iv.next = add i64 %indvars.iv, 1
631 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
632 %exitcond = icmp eq i32 %lftr.wideiv, %n
633 br i1 %exitcond, label %for.end, label %for.body
635 for.end: ; preds = %for.body, %entry
636 %result.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.body ]
637 ret i32 %result.0.lcssa
640 ; vectorize c[i] = a[i] + b[i] loop where result of c[i] is used outside the
642 define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) {
643 ; CHECK-LABEL: define i32 @sum_arrays_outside_use(
644 ; CHECK-SAME: ptr [[B:%.*]], ptr [[A:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
645 ; CHECK-NEXT: [[BB:.*]]:
646 ; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i32
647 ; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i32
648 ; CHECK-NEXT: [[C1:%.*]] = ptrtoint ptr [[C]] to i32
649 ; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
650 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B_PROMOTED]], 1
651 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]])
652 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
653 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
654 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]]
655 ; CHECK: [[VECTOR_MEMCHECK]]:
656 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]]
657 ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8
658 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]]
659 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8
660 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
661 ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]]
662 ; CHECK: [[VECTOR_PH]]:
663 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
664 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
665 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
666 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
667 ; CHECK: [[VECTOR_BODY]]:
668 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
669 ; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = add i32 [[B_PROMOTED]], [[INDEX]]
670 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX5]], 0
671 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
672 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]]
673 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
674 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
675 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
676 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
677 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i32>, ptr [[TMP9]], align 4
678 ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]]
679 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP5]]
680 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
681 ; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr [[TMP12]], align 4
682 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
683 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
684 ; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
685 ; CHECK: [[MIDDLE_BLOCK]]:
686 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
687 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
688 ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]]
689 ; CHECK: [[_LR_PH_I]]:
690 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ]
691 ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
692 ; CHECK: [[_LR_PH_I1:.*:]]
693 ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ]
694 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64
695 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
696 ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
697 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
698 ; CHECK-NEXT: [[ALOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
699 ; CHECK-NEXT: [[SUM:%.*]] = add nsw i32 [[BLOAD]], [[ALOAD]]
700 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
701 ; CHECK-NEXT: store i32 [[SUM]], ptr [[ARRAYIDX3]], align 4
702 ; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[IV]], 1
703 ; CHECK-NEXT: [[TMP19:%.*]] = icmp slt i32 [[IVNEXT]], [[N]]
704 ; CHECK-NEXT: br i1 [[TMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP13:![0-9]+]]
705 ; CHECK: [[F1_EXIT_LOOPEXIT]]:
706 ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[SUM]], %[[DOTLR_PH_I]] ], [ [[TMP14]], %[[MIDDLE_BLOCK]] ]
707 ; CHECK-NEXT: ret i32 [[DOTLCSSA]]
710 %b.promoted = load i32, ptr @b, align 4
714 %iv = phi i32 [ %ivnext, %.lr.ph.i ], [ %b.promoted, %bb ]
715 %indvars.iv = sext i32 %iv to i64
716 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
717 %Bload = load i32, ptr %arrayidx2, align 4
718 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
719 %Aload = load i32, ptr %arrayidx, align 4
720 %sum = add nsw i32 %Bload, %Aload
721 %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
722 store i32 %sum, ptr %arrayidx3, align 4
723 %ivnext = add nsw i32 %iv, 1
724 %tmp19 = icmp slt i32 %ivnext, %N
725 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
728 %.lcssa = phi i32 [ %sum, %.lr.ph.i ]
732 @tab = common global [32 x i8] zeroinitializer, align 1
734 define i32 @non_uniform_live_out() {
735 ; CHECK-LABEL: define i32 @non_uniform_live_out() {
736 ; CHECK-NEXT: [[ENTRY:.*]]:
737 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
738 ; CHECK: [[VECTOR_PH]]:
739 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
740 ; CHECK: [[VECTOR_BODY]]:
741 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
742 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
743 ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 7)
744 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
745 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP1]]
746 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
747 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
748 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1)
749 ; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP3]], align 1
750 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
751 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
752 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000
753 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
754 ; CHECK: [[MIDDLE_BLOCK]]:
755 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
756 ; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
757 ; CHECK: [[SCALAR_PH]]:
758 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
759 ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
760 ; CHECK: [[FOR_BODY]]:
761 ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
762 ; CHECK-NEXT: [[I_09:%.*]] = add i32 [[I_08]], 7
763 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_09]]
764 ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
765 ; CHECK-NEXT: [[BUMP:%.*]] = add i8 [[TMP7]], 1
766 ; CHECK-NEXT: store i8 [[BUMP]], ptr [[ARRAYIDX]], align 1
767 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
768 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 20000
769 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
770 ; CHECK: [[FOR_END]]:
771 ; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[I_09]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
772 ; CHECK-NEXT: [[ARRAYIDX_OUT:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[LCSSA]]
773 ; CHECK-NEXT: store i8 42, ptr [[ARRAYIDX_OUT]], align 1
774 ; CHECK-NEXT: ret i32 0
779 for.body: ; preds = %for.body, %entry
780 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
781 %i.09 = add i32 %i.08, 7
782 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.09
783 %0 = load i8, ptr %arrayidx, align 1
785 store i8 %bump, ptr %arrayidx, align 1
786 %inc = add nsw i32 %i.08, 1
787 %exitcond = icmp eq i32 %i.08, 20000
788 br i1 %exitcond, label %for.end, label %for.body
790 for.end: ; preds = %for.body
791 %lcssa = phi i32 [%i.09, %for.body]
792 %arrayidx.out = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %lcssa
793 store i8 42, ptr %arrayidx.out, align 1
797 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
798 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
799 ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
800 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
801 ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
802 ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
803 ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
804 ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
805 ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
806 ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
807 ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
808 ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
809 ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
810 ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
811 ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
812 ; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}