2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -disable-output -debug-only=loop-vectorize 2>&1 | FileCheck %s
4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
6 ; Test cases for PR50009, which require sinking a replicate-region due to a
7 ; first-order recurrence.
9 define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
10 ; CHECK-LABEL: sink_replicate_region_1
11 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
12 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
14 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
16 ; CHECK-NEXT: vector.ph:
17 ; CHECK-NEXT: Successor(s): vector loop
19 ; CHECK-NEXT: <x1> vector loop: {
20 ; CHECK-NEXT: vector.body:
21 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
22 ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv>
23 ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
24 ; CHECK-NEXT: vp<[[STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1>
25 ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
26 ; CHECK-NEXT: Successor(s): loop.0
29 ; CHECK-NEXT: Successor(s): pred.load
31 ; CHECK-NEXT: <xVFxUF> pred.load: {
32 ; CHECK-NEXT: pred.load.entry:
33 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
34 ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
36 ; CHECK-NEXT: pred.load.if:
37 ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
38 ; CHECK-NEXT: REPLICATE ir<%lv> = load ir<%gep> (S->V)
39 ; CHECK-NEXT: Successor(s): pred.load.continue
41 ; CHECK-NEXT: pred.load.continue:
42 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%lv>
43 ; CHECK-NEXT: No successors
45 ; CHECK-NEXT: Successor(s): loop.1
48 ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED1]]>
49 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv>
50 ; CHECK-NEXT: Successor(s): pred.srem
52 ; CHECK-NEXT: <xVFxUF> pred.srem: {
53 ; CHECK-NEXT: pred.srem.entry:
54 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
55 ; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
57 ; CHECK-NEXT: pred.srem.if:
58 ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> (S->V)
59 ; CHECK-NEXT: Successor(s): pred.srem.continue
61 ; CHECK-NEXT: pred.srem.continue:
62 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem>
63 ; CHECK-NEXT: No successors
65 ; CHECK-NEXT: Successor(s): loop.1.split
67 ; CHECK-NEXT: loop.1.split:
68 ; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<[[PRED2]]>
69 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]>
70 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
71 ; CHECK-NEXT: No successors
73 ; CHECK-NEXT: Successor(s): middle.block
75 ; CHECK-NEXT: middle.block:
76 ; CHECK-NEXT: No successors
83 %0 = phi i32 [ 0, %entry ], [ %conv, %loop ]
84 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
85 %rem = srem i32 %0, %x
86 %gep = getelementptr i8, i8* %ptr, i32 %iv
87 %lv = load i8, i8* %gep
88 %conv = sext i8 %lv to i32
89 %add = add i32 %conv, %rem
90 %iv.next = add nsw i32 %iv, 1
91 %ec = icmp eq i32 %iv.next, 20001
92 br i1 %ec, label %exit, label %loop
98 define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
99 ; CHECK-LABEL: sink_replicate_region_2
100 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
101 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
103 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
105 ; CHECK-NEXT: vector.ph:
106 ; CHECK-NEXT: Successor(s): vector loop
108 ; CHECK-NEXT: <x1> vector loop: {
109 ; CHECK-NEXT: vector.body:
110 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
111 ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
112 ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
113 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1>
114 ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
115 ; CHECK-NEXT: Successor(s): loop.0
117 ; CHECK-NEXT: loop.0:
118 ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
119 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
120 ; CHECK-NEXT: Successor(s): loop.0.split
122 ; CHECK-NEXT: loop.0.split:
123 ; CHECK-NEXT: Successor(s): pred.store
125 ; CHECK-NEXT: <xVFxUF> pred.store: {
126 ; CHECK-NEXT: pred.store.entry:
127 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
128 ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
130 ; CHECK-NEXT: pred.store.if:
131 ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
132 ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
133 ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
134 ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
135 ; CHECK-NEXT: Successor(s): pred.store.continue
137 ; CHECK-NEXT: pred.store.continue:
138 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem>
139 ; CHECK-NEXT: No successors
141 ; CHECK-NEXT: Successor(s): loop.1
143 ; CHECK-NEXT: loop.1:
144 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]>
145 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
146 ; CHECK-NEXT: No successors
148 ; CHECK-NEXT: Successor(s): middle.block
150 ; CHECK-NEXT: middle.block:
151 ; CHECK-NEXT: No successors
158 %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
159 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
160 %rem = srem i32 %recur, %x
161 %recur.next = sext i8 %y to i32
162 %add = add i32 %rem, %recur.next
163 %gep = getelementptr i32, i32* %ptr, i32 %iv
164 store i32 %add, i32* %gep
165 %iv.next = add nsw i32 %iv, 1
166 %ec = icmp eq i32 %iv.next, 20001
167 br i1 %ec, label %exit, label %loop
173 define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize {
174 ; CHECK-LABEL: sink_replicate_region_3_reduction
175 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
176 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
178 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
180 ; CHECK-NEXT: vector.ph:
181 ; CHECK-NEXT: Successor(s): vector loop
183 ; CHECK-NEXT: <x1> vector loop: {
184 ; CHECK-NEXT: vector.body:
185 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
186 ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
187 ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
188 ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]>
189 ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]> vp<[[BTC]]>
190 ; CHECK-NEXT: Successor(s): loop.0
192 ; CHECK-NEXT: loop.0:
193 ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
194 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
195 ; CHECK-NEXT: Successor(s): pred.srem
197 ; CHECK-NEXT: <xVFxUF> pred.srem: {
198 ; CHECK-NEXT: pred.srem.entry:
199 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
200 ; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
202 ; CHECK-NEXT: pred.srem.if:
203 ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> (S->V)
204 ; CHECK-NEXT: Successor(s): pred.srem.continue
206 ; CHECK-NEXT: pred.srem.continue:
207 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem>
208 ; CHECK-NEXT: No successors
210 ; CHECK-NEXT: Successor(s): loop.0.split
212 ; CHECK-NEXT: loop.0.split:
213 ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[PRED]]>, ir<%recur.next>
214 ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
215 ; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]> ir<%and.red.next> ir<%and.red>
216 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]>
217 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
218 ; CHECK-NEXT: No successors
220 ; CHECK-NEXT: Successor(s): middle.block
222 ; CHECK-NEXT: middle.block:
223 ; CHECK-NEXT: No successors
225 ; CHECK-NEXT: Live-out i32 %res = ir<%and.red.next>
232 %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
233 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
234 %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ]
235 %rem = srem i32 %recur, %x
236 %recur.next = sext i8 %y to i32
237 %add = add i32 %rem, %recur.next
238 %and.red.next = and i32 %and.red, %add
239 %iv.next = add nsw i32 %iv, 1
240 %ec = icmp eq i32 %iv.next, 20001
241 br i1 %ec, label %exit, label %loop
244 %res = phi i32 [ %and.red.next, %loop ]
248 ; To sink the replicate region containing %rem, we need to split the block
249 ; containing %conv at the end, because %conv is the last recipe in the block.
250 define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* %ptr) optsize {
251 ; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block
252 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
253 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
255 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
257 ; CHECK-NEXT: vector.ph:
258 ; CHECK-NEXT: Successor(s): vector loop
260 ; CHECK-NEXT: <x1> vector loop: {
261 ; CHECK-NEXT: vector.body:
262 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
263 ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv>
264 ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
265 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1>
266 ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
267 ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
268 ; CHECK-NEXT: Successor(s): loop.0
270 ; CHECK-NEXT: loop.0:
271 ; CHECK-NEXT: Successor(s): pred.load
273 ; CHECK-NEXT: <xVFxUF> pred.load: {
274 ; CHECK-NEXT: pred.load.entry:
275 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
276 ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
278 ; CHECK-NEXT: pred.load.if:
279 ; CHECK-NEXT: REPLICATE ir<%lv> = load ir<%gep> (S->V)
280 ; CHECK-NEXT: Successor(s): pred.load.continue
282 ; CHECK-NEXT: pred.load.continue:
283 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%lv>
284 ; CHECK-NEXT: No successors
286 ; CHECK-NEXT: Successor(s): loop.1
288 ; CHECK-NEXT: loop.1:
289 ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED]]>
290 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv>
291 ; CHECK-NEXT: Successor(s): loop.1.split
293 ; CHECK: loop.1.split:
294 ; CHECK-NEXT: Successor(s): pred.load
296 ; CHECK: <xVFxUF> pred.load: {
297 ; CHECK-NEXT: pred.load.entry:
298 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
299 ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
301 ; CHECK: pred.load.if:
302 ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> (S->V)
303 ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V)
304 ; CHECK-NEXT: Successor(s): pred.load.continue
306 ; CHECK: pred.load.continue:
307 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%rem>
308 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%lv.2>
309 ; CHECK-NEXT: No successors
313 ; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<[[PRED1]]>
314 ; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<[[PRED2]]>
315 ; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2>
316 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]>
317 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
318 ; CHECK-NEXT: No successors
320 ; CHECK-NEXT: Successor(s): middle.block
322 ; CHECK-NEXT: middle.block:
323 ; CHECK-NEXT: No successors
330 %0 = phi i32 [ 0, %entry ], [ %conv, %loop ]
331 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
332 %gep = getelementptr i8, i8* %ptr, i32 %iv
333 %rem = srem i32 %0, %x
334 %lv = load i8, i8* %gep
335 %conv = sext i8 %lv to i32
336 %lv.2 = load i8, i8* %gep
337 %add.1 = add i32 %conv, %rem
338 %conv.lv.2 = sext i8 %lv.2 to i32
339 %add = add i32 %add.1, %conv.lv.2
340 %iv.next = add nsw i32 %iv, 1
341 %ec = icmp eq i32 %iv.next, 20001
342 br i1 %ec, label %exit, label %loop
348 ; Test case that requires sinking a recipe in a replicate region after another replicate region.
349 define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 %y) optsize {
350 ; CHECK-LABEL: sink_replicate_region_after_replicate_region
351 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
352 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
354 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
356 ; CHECK-NEXT: vector.ph:
357 ; CHECK-NEXT: Successor(s): vector loop
359 ; CHECK-NEXT: <x1> vector loop: {
360 ; CHECK-NEXT: vector.body:
361 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
362 ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
363 ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
364 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1>
365 ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
366 ; CHECK-NEXT: Successor(s): loop.0
368 ; CHECK-NEXT: loop.0:
369 ; CHECK-NEXT: Successor(s): loop.1
371 ; CHECK-NEXT: loop.1:
372 ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
373 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
374 ; CHECK-NEXT: Successor(s): pred.srem
376 ; CHECK-NEXT: <xVFxUF> pred.srem: {
377 ; CHECK-NEXT: pred.srem.entry:
378 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
379 ; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
381 ; CHECK-NEXT: pred.srem.if:
382 ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
383 ; CHECK-NEXT: Successor(s): pred.srem.continue
385 ; CHECK-NEXT: pred.srem.continue:
386 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem>
387 ; CHECK-NEXT: No successors
389 ; CHECK-NEXT: Successor(s): loop.1.split
391 ; CHECK-NEXT: loop.1.split:
392 ; CHECK-NEXT: Successor(s): pred.store
394 ; CHECK-NEXT: <xVFxUF> pred.store: {
395 ; CHECK-NEXT: pred.store.entry:
396 ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
397 ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
399 ; CHECK-NEXT: pred.store.if:
400 ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<[[PRED]]>
401 ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
402 ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep>
403 ; CHECK-NEXT: Successor(s): pred.store.continue
405 ; CHECK-NEXT: pred.store.continue:
406 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem.div>
407 ; CHECK-NEXT: No successors
409 ; CHECK-NEXT: Successor(s): loop.2
411 ; CHECK-NEXT: loop.2:
412 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]>
413 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
414 ; CHECK-NEXT: No successors
416 ; CHECK-NEXT: Successor(s): middle.block
418 ; CHECK-NEXT: middle.block:
419 ; CHECK-NEXT: No successors
425 loop: ; preds = %loop, %entry
426 %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
427 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
428 %rem = srem i32 %recur, %x
429 %rem.div = sdiv i32 20, %rem
430 %recur.next = sext i8 %y to i32
431 %gep = getelementptr i32, i32* %ptr, i32 %iv
432 store i32 %rem.div, i32* %gep
433 %iv.next = add nsw i32 %iv, 1
434 %C = icmp sgt i32 %iv.next, %recur.next
435 br i1 %C, label %exit, label %loop
437 exit: ; preds = %loop