1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
6 target triple = "aarch64-unknown-linux-gnu"
8 ;; Given the choice between a masked and unmasked variant for the same VF (4)
9 ;; where no mask is required, make sure we choose the unmasked variant.
11 ; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m'
12 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
13 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
14 ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
15 ; CHECK-NEXT: Live-in ir<1024> = original trip-count
17 ; CHECK-NEXT: vector.ph:
18 ; CHECK-NEXT: Successor(s): vector loop
20 ; CHECK-NEXT: <x1> vector loop: {
21 ; CHECK-NEXT: vector.body:
22 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
23 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
24 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
25 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
26 ; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
27 ; CHECK-NEXT: REPLICATE ir<%call> = call @foo(ir<%load>)
28 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
29 ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
30 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
31 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
32 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
33 ; CHECK-NEXT: No successors
35 ; CHECK-NEXT: Successor(s): middle.block
37 ; CHECK-NEXT: middle.block:
38 ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
39 ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
40 ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
42 ; CHECK-NEXT: scalar.ph:
43 ; CHECK-NEXT: Successor(s): ir-bb<for.body>
45 ; CHECK-NEXT: ir-bb<for.body>:
46 ; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
47 ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
48 ; CHECK-NEXT: No successors
50 ; CHECK-NEXT: ir-bb<for.cond.cleanup>:
51 ; CHECK-NEXT: No successors
54 ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
55 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
56 ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
57 ; CHECK-NEXT: Live-in ir<1024> = original trip-count
59 ; CHECK-NEXT: vector.ph:
60 ; CHECK-NEXT: Successor(s): vector loop
62 ; CHECK-NEXT: <x1> vector loop: {
63 ; CHECK-NEXT: vector.body:
64 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
65 ; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
66 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
67 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
68 ; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
69 ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask)
70 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
71 ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
72 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
73 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
74 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
75 ; CHECK-NEXT: No successors
77 ; CHECK-NEXT: Successor(s): middle.block
79 ; CHECK-NEXT: middle.block:
80 ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
81 ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
82 ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
84 ; CHECK-NEXT: scalar.ph:
85 ; CHECK-NEXT: Successor(s): ir-bb<for.body>
87 ; CHECK-NEXT: ir-bb<for.body>:
88 ; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
89 ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
90 ; CHECK-NEXT: No successors
92 ; CHECK-NEXT: ir-bb<for.cond.cleanup>:
93 ; CHECK-NEXT: No successors
96 ;; If we have a masked variant at one VF and an unmasked variant at a different
97 ;; VF, ensure we create appropriate recipes (including a synthesized all-true
98 ;; mask for the masked variant)
100 ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m'
101 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
102 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
103 ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
104 ; CHECK-NEXT: Live-in ir<1024> = original trip-count
106 ; CHECK-NEXT: vector.ph:
107 ; CHECK-NEXT: Successor(s): vector loop
109 ; CHECK-NEXT: <x1> vector loop: {
110 ; CHECK-NEXT: vector.body:
111 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
112 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
113 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
114 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
115 ; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
116 ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask)
117 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
118 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
119 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%call>
120 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXST:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
121 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
122 ; CHECK-NEXT: No successors
124 ; CHECK-NEXT: Successor(s): middle.block
126 ; CHECK-NEXT: middle.block:
127 ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
128 ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
129 ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
131 ; CHECK-NEXT: scalar.ph:
132 ; CHECK-NEXT: Successor(s): ir-bb<for.body>
134 ; CHECK-NEXT: ir-bb<for.body>:
135 ; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
136 ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
137 ; CHECK-NEXT: No successors
139 ; CHECK-NEXT: ir-bb<for.cond.cleanup>:
140 ; CHECK-NEXT: No successors
143 ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
144 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
145 ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
146 ; CHECK-NEXT: Live-in ir<1024> = original trip-count
148 ; CHECK-NEXT: vector.ph:
149 ; CHECK-NEXT: Successor(s): vector loop
151 ; CHECK-NEXT: <x1> vector loop: {
152 ; CHECK-NEXT: vector.body:
153 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
154 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
155 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
156 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
157 ; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
158 ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>, ir<true>) (using library function: foo_vector_fixed4_mask)
159 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
160 ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
161 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
162 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
163 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
164 ; CHECK-NEXT: No successors
166 ; CHECK-NEXT: Successor(s): middle.block
168 ; CHECK-NEXT: middle.block:
169 ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
170 ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
171 ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
173 ; CHECK-NEXT: scalar.ph:
174 ; CHECK-NEXT: Successor(s): ir-bb<for.body>
176 ; CHECK-NEXT: ir-bb<for.body>:
177 ; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
178 ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
179 ; CHECK-NEXT: No successors
181 ; CHECK-NEXT: ir-bb<for.cond.cleanup>:
182 ; CHECK-NEXT: No successors
185 ;; If we have two variants at different VFs, neither of which are masked, we
186 ;; still expect to see a different vplan per VF.
188 ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4'
189 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
190 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
191 ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
192 ; CHECK-NEXT: Live-in ir<1024> = original trip-count
194 ; CHECK-NEXT: vector.ph:
195 ; CHECK-NEXT: Successor(s): vector loop
197 ; CHECK-NEXT: <x1> vector loop: {
198 ; CHECK-NEXT: vector.body:
199 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
200 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
201 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
202 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
203 ; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
204 ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask)
205 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
206 ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
207 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
208 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
209 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
210 ; CHECK-NEXT: No successors
212 ; CHECK-NEXT: Successor(s): middle.block
214 ; CHECK-NEXT: middle.block:
215 ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
216 ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
217 ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
219 ; CHECK-NEXT: scalar.ph:
220 ; CHECK-NEXT: Successor(s): ir-bb<for.body>
222 ; CHECK-NEXT: ir-bb<for.body>:
223 ; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
224 ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
225 ; CHECK-NEXT: No successors
227 ; CHECK-NEXT: ir-bb<for.cond.cleanup>:
228 ; CHECK-NEXT: No successors
231 ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
232 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
233 ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
234 ; CHECK-NEXT: Live-in ir<1024> = original trip-count
236 ; CHECK-NEXT: vector.ph:
237 ; CHECK-NEXT: Successor(s): vector loop
239 ; CHECK-NEXT: <x1> vector loop: {
240 ; CHECK-NEXT: vector.body:
241 ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
242 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
243 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
244 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
245 ; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
246 ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask)
247 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
248 ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
249 ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
250 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
251 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
252 ; CHECK-NEXT: No successors
254 ; CHECK-NEXT: Successor(s): middle.block
256 ; CHECK-NEXT: middle.block:
257 ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
258 ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
259 ; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
261 ; CHECK-NEXT: scalar.ph:
262 ; CHECK-NEXT: Successor(s): ir-bb<for.body>
264 ; CHECK-NEXT: ir-bb<for.body>:
265 ; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
266 ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
267 ; CHECK-NEXT: No successors
269 ; CHECK-NEXT: ir-bb<for.cond.cleanup>:
270 ; CHECK-NEXT: No successors
273 define void @test_v4_v4m(ptr noalias %a, ptr readonly %b) #3 {
274 ; CHECK-LABEL: @test_v4_v4m(
276 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
278 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
279 ; CHECK: vector.body:
280 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
281 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
282 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]]
283 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
284 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
285 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_nomask(<4 x i64> [[WIDE_LOAD]])
286 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
287 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
288 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8
289 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
290 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
291 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
292 ; CHECK: middle.block:
293 ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
295 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
296 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
298 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
299 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
300 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
301 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]]
302 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
303 ; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
304 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
305 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
306 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
307 ; CHECK: for.cond.cleanup:
308 ; CHECK-NEXT: ret void
314 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
315 %gep = getelementptr i64, ptr %b, i64 %indvars.iv
316 %load = load i64, ptr %gep
317 %call = call i64 @foo(i64 %load) #0
318 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
319 store i64 %call, ptr %arrayidx
320 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
321 %exitcond = icmp eq i64 %indvars.iv.next, 1024
322 br i1 %exitcond, label %for.cond.cleanup, label %for.body
329 define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 {
330 ; CHECK-LABEL: @test_v2_v4m(
332 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
334 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
335 ; CHECK: vector.body:
336 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
337 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
338 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]]
339 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
340 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
341 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> splat (i1 true))
342 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
343 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
344 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8
345 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
346 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
347 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
348 ; CHECK: middle.block:
349 ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
351 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
352 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
354 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
355 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
356 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
357 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2:[0-9]+]]
358 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
359 ; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
360 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
361 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
362 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
363 ; CHECK: for.cond.cleanup:
364 ; CHECK-NEXT: ret void
370 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
371 %gep = getelementptr i64, ptr %b, i64 %indvars.iv
372 %load = load i64, ptr %gep
373 %call = call i64 @foo(i64 %load) #1
374 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
375 store i64 %call, ptr %arrayidx
376 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
377 %exitcond = icmp eq i64 %indvars.iv.next, 1024
378 br i1 %exitcond, label %for.cond.cleanup, label %for.body
385 define void @test_v2_v4(ptr noalias %a, ptr readonly %b) #3 {
386 ; CHECK-LABEL: @test_v2_v4(
388 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
390 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
391 ; CHECK: vector.body:
392 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
393 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
394 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]]
395 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
396 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
397 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_nomask(<4 x i64> [[WIDE_LOAD]])
398 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
399 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
400 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8
401 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
402 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
403 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
404 ; CHECK: middle.block:
405 ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
407 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
408 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
410 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
411 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
412 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
413 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]]
414 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
415 ; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
416 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
417 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
418 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
419 ; CHECK: for.cond.cleanup:
420 ; CHECK-NEXT: ret void
426 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
427 %gep = getelementptr i64, ptr %b, i64 %indvars.iv
428 %load = load i64, ptr %gep
429 %call = call i64 @foo(i64 %load) #2
430 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
431 store i64 %call, ptr %arrayidx
432 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
433 %exitcond = icmp eq i64 %indvars.iv.next, 1024
434 br i1 %exitcond, label %for.cond.cleanup, label %for.body
441 declare i64 @foo(i64)
443 ;; fixed vector variants of foo
444 declare <2 x i64> @foo_vector_fixed2_nomask(<2 x i64>)
445 declare <4 x i64> @foo_vector_fixed4_nomask(<4 x i64>)
446 declare <4 x i64> @foo_vector_fixed4_mask(<4 x i64>, <4 x i1>)
448 attributes #0 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N4v_foo(foo_vector_fixed4_nomask),_ZGV_LLVM_M4v_foo(foo_vector_fixed4_mask)" }
449 attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_foo(foo_vector_fixed2_nomask),_ZGV_LLVM_M4v_foo(foo_vector_fixed4_mask)" }
450 attributes #2 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_foo(foo_vector_fixed2_nomask),_ZGV_LLVM_N4v_foo(foo_vector_fixed4_nomask)" }
451 attributes #3 = { "target-features"="+sve" vscale_range(2,16) "no-trapping-math"="false" }