1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f -target-abi=lp64f \
5 define void @sink_splat_mul(ptr nocapture %a, i32 signext %x) {
6 ; CHECK-LABEL: sink_splat_mul:
7 ; CHECK: # %bb.0: # %entry
8 ; CHECK-NEXT: lui a2, 1
9 ; CHECK-NEXT: add a2, a0, a2
10 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
11 ; CHECK-NEXT: .LBB0_1: # %vector.body
12 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
13 ; CHECK-NEXT: vle32.v v8, (a0)
14 ; CHECK-NEXT: vmul.vx v8, v8, a1
15 ; CHECK-NEXT: vse32.v v8, (a0)
16 ; CHECK-NEXT: addi a0, a0, 16
17 ; CHECK-NEXT: bne a0, a2, .LBB0_1
18 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
21 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
22 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
25 vector.body: ; preds = %vector.body, %entry
26 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
27 %0 = getelementptr inbounds i32, ptr %a, i64 %index
28 %wide.load = load <4 x i32>, ptr %0, align 4
29 %1 = mul <4 x i32> %wide.load, %broadcast.splat
30 store <4 x i32> %1, ptr %0, align 4
31 %index.next = add nuw i64 %index, 4
32 %2 = icmp eq i64 %index.next, 1024
33 br i1 %2, label %for.cond.cleanup, label %vector.body
35 for.cond.cleanup: ; preds = %vector.body
39 define void @sink_splat_add(ptr nocapture %a, i32 signext %x) {
40 ; CHECK-LABEL: sink_splat_add:
41 ; CHECK: # %bb.0: # %entry
42 ; CHECK-NEXT: lui a2, 1
43 ; CHECK-NEXT: add a2, a0, a2
44 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
45 ; CHECK-NEXT: .LBB1_1: # %vector.body
46 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
47 ; CHECK-NEXT: vle32.v v8, (a0)
48 ; CHECK-NEXT: vadd.vx v8, v8, a1
49 ; CHECK-NEXT: vse32.v v8, (a0)
50 ; CHECK-NEXT: addi a0, a0, 16
51 ; CHECK-NEXT: bne a0, a2, .LBB1_1
52 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
55 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
56 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
59 vector.body: ; preds = %vector.body, %entry
60 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
61 %0 = getelementptr inbounds i32, ptr %a, i64 %index
62 %wide.load = load <4 x i32>, ptr %0, align 4
63 %1 = add <4 x i32> %wide.load, %broadcast.splat
64 store <4 x i32> %1, ptr %0, align 4
65 %index.next = add nuw i64 %index, 4
66 %2 = icmp eq i64 %index.next, 1024
67 br i1 %2, label %for.cond.cleanup, label %vector.body
69 for.cond.cleanup: ; preds = %vector.body
73 define void @sink_splat_sub(ptr nocapture %a, i32 signext %x) {
74 ; CHECK-LABEL: sink_splat_sub:
75 ; CHECK: # %bb.0: # %entry
76 ; CHECK-NEXT: lui a2, 1
77 ; CHECK-NEXT: add a2, a0, a2
78 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
79 ; CHECK-NEXT: .LBB2_1: # %vector.body
80 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
81 ; CHECK-NEXT: vle32.v v8, (a0)
82 ; CHECK-NEXT: vsub.vx v8, v8, a1
83 ; CHECK-NEXT: vse32.v v8, (a0)
84 ; CHECK-NEXT: addi a0, a0, 16
85 ; CHECK-NEXT: bne a0, a2, .LBB2_1
86 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
89 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
90 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
93 vector.body: ; preds = %vector.body, %entry
94 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
95 %0 = getelementptr inbounds i32, ptr %a, i64 %index
96 %wide.load = load <4 x i32>, ptr %0, align 4
97 %1 = sub <4 x i32> %wide.load, %broadcast.splat
98 store <4 x i32> %1, ptr %0, align 4
99 %index.next = add nuw i64 %index, 4
100 %2 = icmp eq i64 %index.next, 1024
101 br i1 %2, label %for.cond.cleanup, label %vector.body
103 for.cond.cleanup: ; preds = %vector.body
107 define void @sink_splat_rsub(ptr nocapture %a, i32 signext %x) {
108 ; CHECK-LABEL: sink_splat_rsub:
109 ; CHECK: # %bb.0: # %entry
110 ; CHECK-NEXT: lui a2, 1
111 ; CHECK-NEXT: add a2, a0, a2
112 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
113 ; CHECK-NEXT: .LBB3_1: # %vector.body
114 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
115 ; CHECK-NEXT: vle32.v v8, (a0)
116 ; CHECK-NEXT: vrsub.vx v8, v8, a1
117 ; CHECK-NEXT: vse32.v v8, (a0)
118 ; CHECK-NEXT: addi a0, a0, 16
119 ; CHECK-NEXT: bne a0, a2, .LBB3_1
120 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
123 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
124 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
125 br label %vector.body
127 vector.body: ; preds = %vector.body, %entry
128 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
129 %0 = getelementptr inbounds i32, ptr %a, i64 %index
130 %wide.load = load <4 x i32>, ptr %0, align 4
131 %1 = sub <4 x i32> %broadcast.splat, %wide.load
132 store <4 x i32> %1, ptr %0, align 4
133 %index.next = add nuw i64 %index, 4
134 %2 = icmp eq i64 %index.next, 1024
135 br i1 %2, label %for.cond.cleanup, label %vector.body
137 for.cond.cleanup: ; preds = %vector.body
141 define void @sink_splat_and(ptr nocapture %a, i32 signext %x) {
142 ; CHECK-LABEL: sink_splat_and:
143 ; CHECK: # %bb.0: # %entry
144 ; CHECK-NEXT: lui a2, 1
145 ; CHECK-NEXT: add a2, a0, a2
146 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
147 ; CHECK-NEXT: .LBB4_1: # %vector.body
148 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
149 ; CHECK-NEXT: vle32.v v8, (a0)
150 ; CHECK-NEXT: vand.vx v8, v8, a1
151 ; CHECK-NEXT: vse32.v v8, (a0)
152 ; CHECK-NEXT: addi a0, a0, 16
153 ; CHECK-NEXT: bne a0, a2, .LBB4_1
154 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
157 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
158 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
159 br label %vector.body
161 vector.body: ; preds = %vector.body, %entry
162 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
163 %0 = getelementptr inbounds i32, ptr %a, i64 %index
164 %wide.load = load <4 x i32>, ptr %0, align 4
165 %1 = and <4 x i32> %wide.load, %broadcast.splat
166 store <4 x i32> %1, ptr %0, align 4
167 %index.next = add nuw i64 %index, 4
168 %2 = icmp eq i64 %index.next, 1024
169 br i1 %2, label %for.cond.cleanup, label %vector.body
171 for.cond.cleanup: ; preds = %vector.body
175 define void @sink_splat_or(ptr nocapture %a, i32 signext %x) {
176 ; CHECK-LABEL: sink_splat_or:
177 ; CHECK: # %bb.0: # %entry
178 ; CHECK-NEXT: lui a2, 1
179 ; CHECK-NEXT: add a2, a0, a2
180 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
181 ; CHECK-NEXT: .LBB5_1: # %vector.body
182 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
183 ; CHECK-NEXT: vle32.v v8, (a0)
184 ; CHECK-NEXT: vor.vx v8, v8, a1
185 ; CHECK-NEXT: vse32.v v8, (a0)
186 ; CHECK-NEXT: addi a0, a0, 16
187 ; CHECK-NEXT: bne a0, a2, .LBB5_1
188 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
191 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
192 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
193 br label %vector.body
195 vector.body: ; preds = %vector.body, %entry
196 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
197 %0 = getelementptr inbounds i32, ptr %a, i64 %index
198 %wide.load = load <4 x i32>, ptr %0, align 4
199 %1 = or <4 x i32> %wide.load, %broadcast.splat
200 store <4 x i32> %1, ptr %0, align 4
201 %index.next = add nuw i64 %index, 4
202 %2 = icmp eq i64 %index.next, 1024
203 br i1 %2, label %for.cond.cleanup, label %vector.body
205 for.cond.cleanup: ; preds = %vector.body
209 define void @sink_splat_xor(ptr nocapture %a, i32 signext %x) {
210 ; CHECK-LABEL: sink_splat_xor:
211 ; CHECK: # %bb.0: # %entry
212 ; CHECK-NEXT: lui a2, 1
213 ; CHECK-NEXT: add a2, a0, a2
214 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215 ; CHECK-NEXT: .LBB6_1: # %vector.body
216 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
217 ; CHECK-NEXT: vle32.v v8, (a0)
218 ; CHECK-NEXT: vxor.vx v8, v8, a1
219 ; CHECK-NEXT: vse32.v v8, (a0)
220 ; CHECK-NEXT: addi a0, a0, 16
221 ; CHECK-NEXT: bne a0, a2, .LBB6_1
222 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
225 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
226 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
227 br label %vector.body
229 vector.body: ; preds = %vector.body, %entry
230 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
231 %0 = getelementptr inbounds i32, ptr %a, i64 %index
232 %wide.load = load <4 x i32>, ptr %0, align 4
233 %1 = xor <4 x i32> %wide.load, %broadcast.splat
234 store <4 x i32> %1, ptr %0, align 4
235 %index.next = add nuw i64 %index, 4
236 %2 = icmp eq i64 %index.next, 1024
237 br i1 %2, label %for.cond.cleanup, label %vector.body
239 for.cond.cleanup: ; preds = %vector.body
243 define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
244 ; CHECK-LABEL: sink_splat_mul_scalable:
245 ; CHECK: # %bb.0: # %entry
246 ; CHECK-NEXT: csrr a5, vlenb
247 ; CHECK-NEXT: srli a2, a5, 1
248 ; CHECK-NEXT: li a3, 1024
249 ; CHECK-NEXT: bgeu a3, a2, .LBB7_2
250 ; CHECK-NEXT: # %bb.1:
251 ; CHECK-NEXT: li a3, 0
252 ; CHECK-NEXT: j .LBB7_5
253 ; CHECK-NEXT: .LBB7_2: # %vector.ph
254 ; CHECK-NEXT: addi a3, a2, -1
255 ; CHECK-NEXT: andi a4, a3, 1024
256 ; CHECK-NEXT: xori a3, a4, 1024
257 ; CHECK-NEXT: slli a5, a5, 1
258 ; CHECK-NEXT: mv a6, a0
259 ; CHECK-NEXT: mv a7, a3
260 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
261 ; CHECK-NEXT: .LBB7_3: # %vector.body
262 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
263 ; CHECK-NEXT: vl2re32.v v8, (a6)
264 ; CHECK-NEXT: vmul.vx v8, v8, a1
265 ; CHECK-NEXT: vs2r.v v8, (a6)
266 ; CHECK-NEXT: sub a7, a7, a2
267 ; CHECK-NEXT: add a6, a6, a5
268 ; CHECK-NEXT: bnez a7, .LBB7_3
269 ; CHECK-NEXT: # %bb.4: # %middle.block
270 ; CHECK-NEXT: beqz a4, .LBB7_7
271 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader
272 ; CHECK-NEXT: slli a2, a3, 2
273 ; CHECK-NEXT: add a2, a0, a2
274 ; CHECK-NEXT: lui a3, 1
275 ; CHECK-NEXT: add a0, a0, a3
276 ; CHECK-NEXT: .LBB7_6: # %for.body
277 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
278 ; CHECK-NEXT: lw a3, 0(a2)
279 ; CHECK-NEXT: mul a3, a3, a1
280 ; CHECK-NEXT: sw a3, 0(a2)
281 ; CHECK-NEXT: addi a2, a2, 4
282 ; CHECK-NEXT: bne a2, a0, .LBB7_6
283 ; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
286 %0 = call i64 @llvm.vscale.i64()
288 %min.iters.check = icmp ugt i64 %1, 1024
289 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
291 vector.ph: ; preds = %entry
292 %2 = call i64 @llvm.vscale.i64()
294 %n.mod.vf = urem i64 1024, %3
295 %n.vec = sub nsw i64 1024, %n.mod.vf
296 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
297 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
298 %4 = call i64 @llvm.vscale.i64()
300 br label %vector.body
302 vector.body: ; preds = %vector.body, %vector.ph
303 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
304 %6 = getelementptr inbounds i32, ptr %a, i64 %index
305 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
306 %7 = mul <vscale x 4 x i32> %wide.load, %broadcast.splat
307 store <vscale x 4 x i32> %7, ptr %6, align 4
308 %index.next = add nuw i64 %index, %5
309 %8 = icmp eq i64 %index.next, %n.vec
310 br i1 %8, label %middle.block, label %vector.body
312 middle.block: ; preds = %vector.body
313 %cmp.n = icmp eq i64 %n.mod.vf, 0
314 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
316 for.body.preheader: ; preds = %entry, %middle.block
317 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
320 for.cond.cleanup: ; preds = %for.body, %middle.block
323 for.body: ; preds = %for.body.preheader, %for.body
324 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
325 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
326 %9 = load i32, ptr %arrayidx, align 4
327 %mul = mul i32 %9, %x
328 store i32 %mul, ptr %arrayidx, align 4
329 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
330 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
331 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
334 define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
335 ; CHECK-LABEL: sink_splat_add_scalable:
336 ; CHECK: # %bb.0: # %entry
337 ; CHECK-NEXT: csrr a5, vlenb
338 ; CHECK-NEXT: srli a2, a5, 1
339 ; CHECK-NEXT: li a3, 1024
340 ; CHECK-NEXT: bgeu a3, a2, .LBB8_2
341 ; CHECK-NEXT: # %bb.1:
342 ; CHECK-NEXT: li a3, 0
343 ; CHECK-NEXT: j .LBB8_5
344 ; CHECK-NEXT: .LBB8_2: # %vector.ph
345 ; CHECK-NEXT: addi a3, a2, -1
346 ; CHECK-NEXT: andi a4, a3, 1024
347 ; CHECK-NEXT: xori a3, a4, 1024
348 ; CHECK-NEXT: slli a5, a5, 1
349 ; CHECK-NEXT: mv a6, a0
350 ; CHECK-NEXT: mv a7, a3
351 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
352 ; CHECK-NEXT: .LBB8_3: # %vector.body
353 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
354 ; CHECK-NEXT: vl2re32.v v8, (a6)
355 ; CHECK-NEXT: vadd.vx v8, v8, a1
356 ; CHECK-NEXT: vs2r.v v8, (a6)
357 ; CHECK-NEXT: sub a7, a7, a2
358 ; CHECK-NEXT: add a6, a6, a5
359 ; CHECK-NEXT: bnez a7, .LBB8_3
360 ; CHECK-NEXT: # %bb.4: # %middle.block
361 ; CHECK-NEXT: beqz a4, .LBB8_7
362 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader
363 ; CHECK-NEXT: slli a2, a3, 2
364 ; CHECK-NEXT: add a2, a0, a2
365 ; CHECK-NEXT: lui a3, 1
366 ; CHECK-NEXT: add a0, a0, a3
367 ; CHECK-NEXT: .LBB8_6: # %for.body
368 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
369 ; CHECK-NEXT: lw a3, 0(a2)
370 ; CHECK-NEXT: add a3, a3, a1
371 ; CHECK-NEXT: sw a3, 0(a2)
372 ; CHECK-NEXT: addi a2, a2, 4
373 ; CHECK-NEXT: bne a2, a0, .LBB8_6
374 ; CHECK-NEXT: .LBB8_7: # %for.cond.cleanup
377 %0 = call i64 @llvm.vscale.i64()
379 %min.iters.check = icmp ugt i64 %1, 1024
380 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
382 vector.ph: ; preds = %entry
383 %2 = call i64 @llvm.vscale.i64()
385 %n.mod.vf = urem i64 1024, %3
386 %n.vec = sub nsw i64 1024, %n.mod.vf
387 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
388 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
389 %4 = call i64 @llvm.vscale.i64()
391 br label %vector.body
393 vector.body: ; preds = %vector.body, %vector.ph
394 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
395 %6 = getelementptr inbounds i32, ptr %a, i64 %index
396 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
397 %7 = add <vscale x 4 x i32> %wide.load, %broadcast.splat
398 store <vscale x 4 x i32> %7, ptr %6, align 4
399 %index.next = add nuw i64 %index, %5
400 %8 = icmp eq i64 %index.next, %n.vec
401 br i1 %8, label %middle.block, label %vector.body
403 middle.block: ; preds = %vector.body
404 %cmp.n = icmp eq i64 %n.mod.vf, 0
405 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
407 for.body.preheader: ; preds = %entry, %middle.block
408 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
411 for.cond.cleanup: ; preds = %for.body, %middle.block
414 for.body: ; preds = %for.body.preheader, %for.body
415 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
416 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
417 %9 = load i32, ptr %arrayidx, align 4
418 %add = add i32 %9, %x
419 store i32 %add, ptr %arrayidx, align 4
420 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
421 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
422 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
425 define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
426 ; CHECK-LABEL: sink_splat_sub_scalable:
427 ; CHECK: # %bb.0: # %entry
428 ; CHECK-NEXT: csrr a5, vlenb
429 ; CHECK-NEXT: srli a2, a5, 1
430 ; CHECK-NEXT: li a3, 1024
431 ; CHECK-NEXT: bgeu a3, a2, .LBB9_2
432 ; CHECK-NEXT: # %bb.1:
433 ; CHECK-NEXT: li a3, 0
434 ; CHECK-NEXT: j .LBB9_5
435 ; CHECK-NEXT: .LBB9_2: # %vector.ph
436 ; CHECK-NEXT: addi a3, a2, -1
437 ; CHECK-NEXT: andi a4, a3, 1024
438 ; CHECK-NEXT: xori a3, a4, 1024
439 ; CHECK-NEXT: slli a5, a5, 1
440 ; CHECK-NEXT: mv a6, a0
441 ; CHECK-NEXT: mv a7, a3
442 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
443 ; CHECK-NEXT: .LBB9_3: # %vector.body
444 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
445 ; CHECK-NEXT: vl2re32.v v8, (a6)
446 ; CHECK-NEXT: vsub.vx v8, v8, a1
447 ; CHECK-NEXT: vs2r.v v8, (a6)
448 ; CHECK-NEXT: sub a7, a7, a2
449 ; CHECK-NEXT: add a6, a6, a5
450 ; CHECK-NEXT: bnez a7, .LBB9_3
451 ; CHECK-NEXT: # %bb.4: # %middle.block
452 ; CHECK-NEXT: beqz a4, .LBB9_7
453 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader
454 ; CHECK-NEXT: slli a2, a3, 2
455 ; CHECK-NEXT: add a2, a0, a2
456 ; CHECK-NEXT: lui a3, 1
457 ; CHECK-NEXT: add a0, a0, a3
458 ; CHECK-NEXT: .LBB9_6: # %for.body
459 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
460 ; CHECK-NEXT: lw a3, 0(a2)
461 ; CHECK-NEXT: add a3, a3, a1
462 ; CHECK-NEXT: sw a3, 0(a2)
463 ; CHECK-NEXT: addi a2, a2, 4
464 ; CHECK-NEXT: bne a2, a0, .LBB9_6
465 ; CHECK-NEXT: .LBB9_7: # %for.cond.cleanup
468 %0 = call i64 @llvm.vscale.i64()
470 %min.iters.check = icmp ugt i64 %1, 1024
471 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
473 vector.ph: ; preds = %entry
474 %2 = call i64 @llvm.vscale.i64()
476 %n.mod.vf = urem i64 1024, %3
477 %n.vec = sub nsw i64 1024, %n.mod.vf
478 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
479 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
480 %4 = call i64 @llvm.vscale.i64()
482 br label %vector.body
484 vector.body: ; preds = %vector.body, %vector.ph
485 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
486 %6 = getelementptr inbounds i32, ptr %a, i64 %index
487 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
488 %7 = sub <vscale x 4 x i32> %wide.load, %broadcast.splat
489 store <vscale x 4 x i32> %7, ptr %6, align 4
490 %index.next = add nuw i64 %index, %5
491 %8 = icmp eq i64 %index.next, %n.vec
492 br i1 %8, label %middle.block, label %vector.body
494 middle.block: ; preds = %vector.body
495 %cmp.n = icmp eq i64 %n.mod.vf, 0
496 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
498 for.body.preheader: ; preds = %entry, %middle.block
499 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
502 for.cond.cleanup: ; preds = %for.body, %middle.block
505 for.body: ; preds = %for.body.preheader, %for.body
506 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
507 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
508 %9 = load i32, ptr %arrayidx, align 4
509 %add = add i32 %9, %x
510 store i32 %add, ptr %arrayidx, align 4
511 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
512 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
513 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
516 define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
517 ; CHECK-LABEL: sink_splat_rsub_scalable:
518 ; CHECK: # %bb.0: # %entry
519 ; CHECK-NEXT: csrr a5, vlenb
520 ; CHECK-NEXT: srli a2, a5, 1
521 ; CHECK-NEXT: li a3, 1024
522 ; CHECK-NEXT: bgeu a3, a2, .LBB10_2
523 ; CHECK-NEXT: # %bb.1:
524 ; CHECK-NEXT: li a3, 0
525 ; CHECK-NEXT: j .LBB10_5
526 ; CHECK-NEXT: .LBB10_2: # %vector.ph
527 ; CHECK-NEXT: addi a3, a2, -1
528 ; CHECK-NEXT: andi a4, a3, 1024
529 ; CHECK-NEXT: xori a3, a4, 1024
530 ; CHECK-NEXT: slli a5, a5, 1
531 ; CHECK-NEXT: mv a6, a0
532 ; CHECK-NEXT: mv a7, a3
533 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
534 ; CHECK-NEXT: .LBB10_3: # %vector.body
535 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
536 ; CHECK-NEXT: vl2re32.v v8, (a6)
537 ; CHECK-NEXT: vrsub.vx v8, v8, a1
538 ; CHECK-NEXT: vs2r.v v8, (a6)
539 ; CHECK-NEXT: sub a7, a7, a2
540 ; CHECK-NEXT: add a6, a6, a5
541 ; CHECK-NEXT: bnez a7, .LBB10_3
542 ; CHECK-NEXT: # %bb.4: # %middle.block
543 ; CHECK-NEXT: beqz a4, .LBB10_7
544 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader
545 ; CHECK-NEXT: slli a2, a3, 2
546 ; CHECK-NEXT: add a2, a0, a2
547 ; CHECK-NEXT: lui a3, 1
548 ; CHECK-NEXT: add a0, a0, a3
549 ; CHECK-NEXT: .LBB10_6: # %for.body
550 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
551 ; CHECK-NEXT: lw a3, 0(a2)
552 ; CHECK-NEXT: subw a3, a1, a3
553 ; CHECK-NEXT: sw a3, 0(a2)
554 ; CHECK-NEXT: addi a2, a2, 4
555 ; CHECK-NEXT: bne a2, a0, .LBB10_6
556 ; CHECK-NEXT: .LBB10_7: # %for.cond.cleanup
559 %0 = call i64 @llvm.vscale.i64()
561 %min.iters.check = icmp ugt i64 %1, 1024
562 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
564 vector.ph: ; preds = %entry
565 %2 = call i64 @llvm.vscale.i64()
567 %n.mod.vf = urem i64 1024, %3
568 %n.vec = sub nsw i64 1024, %n.mod.vf
569 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
570 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
571 %4 = call i64 @llvm.vscale.i64()
573 br label %vector.body
575 vector.body: ; preds = %vector.body, %vector.ph
576 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
577 %6 = getelementptr inbounds i32, ptr %a, i64 %index
578 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
579 %7 = sub <vscale x 4 x i32> %broadcast.splat, %wide.load
580 store <vscale x 4 x i32> %7, ptr %6, align 4
581 %index.next = add nuw i64 %index, %5
582 %8 = icmp eq i64 %index.next, %n.vec
583 br i1 %8, label %middle.block, label %vector.body
585 middle.block: ; preds = %vector.body
586 %cmp.n = icmp eq i64 %n.mod.vf, 0
587 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
589 for.body.preheader: ; preds = %entry, %middle.block
590 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
593 for.cond.cleanup: ; preds = %for.body, %middle.block
596 for.body: ; preds = %for.body.preheader, %for.body
597 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
598 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
599 %9 = load i32, ptr %arrayidx, align 4
600 %add = sub i32 %x, %9
601 store i32 %add, ptr %arrayidx, align 4
602 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
603 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
604 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
607 define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
608 ; CHECK-LABEL: sink_splat_and_scalable:
609 ; CHECK: # %bb.0: # %entry
610 ; CHECK-NEXT: csrr a5, vlenb
611 ; CHECK-NEXT: srli a2, a5, 1
612 ; CHECK-NEXT: li a3, 1024
613 ; CHECK-NEXT: bgeu a3, a2, .LBB11_2
614 ; CHECK-NEXT: # %bb.1:
615 ; CHECK-NEXT: li a3, 0
616 ; CHECK-NEXT: j .LBB11_5
617 ; CHECK-NEXT: .LBB11_2: # %vector.ph
618 ; CHECK-NEXT: addi a3, a2, -1
619 ; CHECK-NEXT: andi a4, a3, 1024
620 ; CHECK-NEXT: xori a3, a4, 1024
621 ; CHECK-NEXT: slli a5, a5, 1
622 ; CHECK-NEXT: mv a6, a0
623 ; CHECK-NEXT: mv a7, a3
624 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
625 ; CHECK-NEXT: .LBB11_3: # %vector.body
626 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
627 ; CHECK-NEXT: vl2re32.v v8, (a6)
628 ; CHECK-NEXT: vand.vx v8, v8, a1
629 ; CHECK-NEXT: vs2r.v v8, (a6)
630 ; CHECK-NEXT: sub a7, a7, a2
631 ; CHECK-NEXT: add a6, a6, a5
632 ; CHECK-NEXT: bnez a7, .LBB11_3
633 ; CHECK-NEXT: # %bb.4: # %middle.block
634 ; CHECK-NEXT: beqz a4, .LBB11_7
635 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader
636 ; CHECK-NEXT: slli a2, a3, 2
637 ; CHECK-NEXT: add a2, a0, a2
638 ; CHECK-NEXT: lui a3, 1
639 ; CHECK-NEXT: add a0, a0, a3
640 ; CHECK-NEXT: .LBB11_6: # %for.body
641 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
642 ; CHECK-NEXT: lw a3, 0(a2)
643 ; CHECK-NEXT: and a3, a3, a1
644 ; CHECK-NEXT: sw a3, 0(a2)
645 ; CHECK-NEXT: addi a2, a2, 4
646 ; CHECK-NEXT: bne a2, a0, .LBB11_6
647 ; CHECK-NEXT: .LBB11_7: # %for.cond.cleanup
650 %0 = call i64 @llvm.vscale.i64()
652 %min.iters.check = icmp ugt i64 %1, 1024
653 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
655 vector.ph: ; preds = %entry
656 %2 = call i64 @llvm.vscale.i64()
658 %n.mod.vf = urem i64 1024, %3
659 %n.vec = sub nsw i64 1024, %n.mod.vf
660 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
661 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
662 %4 = call i64 @llvm.vscale.i64()
664 br label %vector.body
666 vector.body: ; preds = %vector.body, %vector.ph
667 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
668 %6 = getelementptr inbounds i32, ptr %a, i64 %index
669 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
670 %7 = and <vscale x 4 x i32> %wide.load, %broadcast.splat
671 store <vscale x 4 x i32> %7, ptr %6, align 4
672 %index.next = add nuw i64 %index, %5
673 %8 = icmp eq i64 %index.next, %n.vec
674 br i1 %8, label %middle.block, label %vector.body
676 middle.block: ; preds = %vector.body
677 %cmp.n = icmp eq i64 %n.mod.vf, 0
678 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
680 for.body.preheader: ; preds = %entry, %middle.block
681 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
684 for.cond.cleanup: ; preds = %for.body, %middle.block
687 for.body: ; preds = %for.body.preheader, %for.body
688 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
689 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
690 %9 = load i32, ptr %arrayidx, align 4
691 %and = and i32 %9, %x
692 store i32 %and, ptr %arrayidx, align 4
693 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
694 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
695 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
698 define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
699 ; CHECK-LABEL: sink_splat_or_scalable:
700 ; CHECK: # %bb.0: # %entry
701 ; CHECK-NEXT: csrr a5, vlenb
702 ; CHECK-NEXT: srli a2, a5, 1
703 ; CHECK-NEXT: li a3, 1024
704 ; CHECK-NEXT: bgeu a3, a2, .LBB12_2
705 ; CHECK-NEXT: # %bb.1:
706 ; CHECK-NEXT: li a3, 0
707 ; CHECK-NEXT: j .LBB12_5
708 ; CHECK-NEXT: .LBB12_2: # %vector.ph
709 ; CHECK-NEXT: addi a3, a2, -1
710 ; CHECK-NEXT: andi a4, a3, 1024
711 ; CHECK-NEXT: xori a3, a4, 1024
712 ; CHECK-NEXT: slli a5, a5, 1
713 ; CHECK-NEXT: mv a6, a0
714 ; CHECK-NEXT: mv a7, a3
715 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
716 ; CHECK-NEXT: .LBB12_3: # %vector.body
717 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
718 ; CHECK-NEXT: vl2re32.v v8, (a6)
719 ; CHECK-NEXT: vor.vx v8, v8, a1
720 ; CHECK-NEXT: vs2r.v v8, (a6)
721 ; CHECK-NEXT: sub a7, a7, a2
722 ; CHECK-NEXT: add a6, a6, a5
723 ; CHECK-NEXT: bnez a7, .LBB12_3
724 ; CHECK-NEXT: # %bb.4: # %middle.block
725 ; CHECK-NEXT: beqz a4, .LBB12_7
726 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader
727 ; CHECK-NEXT: slli a2, a3, 2
728 ; CHECK-NEXT: add a2, a0, a2
729 ; CHECK-NEXT: lui a3, 1
730 ; CHECK-NEXT: add a0, a0, a3
731 ; CHECK-NEXT: .LBB12_6: # %for.body
732 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
733 ; CHECK-NEXT: lw a3, 0(a2)
734 ; CHECK-NEXT: or a3, a3, a1
735 ; CHECK-NEXT: sw a3, 0(a2)
736 ; CHECK-NEXT: addi a2, a2, 4
737 ; CHECK-NEXT: bne a2, a0, .LBB12_6
738 ; CHECK-NEXT: .LBB12_7: # %for.cond.cleanup
741 %0 = call i64 @llvm.vscale.i64()
743 %min.iters.check = icmp ugt i64 %1, 1024
744 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
746 vector.ph: ; preds = %entry
747 %2 = call i64 @llvm.vscale.i64()
749 %n.mod.vf = urem i64 1024, %3
750 %n.vec = sub nsw i64 1024, %n.mod.vf
751 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
752 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
753 %4 = call i64 @llvm.vscale.i64()
755 br label %vector.body
757 vector.body: ; preds = %vector.body, %vector.ph
758 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
759 %6 = getelementptr inbounds i32, ptr %a, i64 %index
760 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
761 %7 = or <vscale x 4 x i32> %wide.load, %broadcast.splat
762 store <vscale x 4 x i32> %7, ptr %6, align 4
763 %index.next = add nuw i64 %index, %5
764 %8 = icmp eq i64 %index.next, %n.vec
765 br i1 %8, label %middle.block, label %vector.body
767 middle.block: ; preds = %vector.body
768 %cmp.n = icmp eq i64 %n.mod.vf, 0
769 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
771 for.body.preheader: ; preds = %entry, %middle.block
772 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
775 for.cond.cleanup: ; preds = %for.body, %middle.block
778 for.body: ; preds = %for.body.preheader, %for.body
779 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
780 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
781 %9 = load i32, ptr %arrayidx, align 4
783 store i32 %or, ptr %arrayidx, align 4
784 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
785 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
786 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
789 define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
790 ; CHECK-LABEL: sink_splat_xor_scalable:
791 ; CHECK: # %bb.0: # %entry
792 ; CHECK-NEXT: csrr a5, vlenb
793 ; CHECK-NEXT: srli a2, a5, 1
794 ; CHECK-NEXT: li a3, 1024
795 ; CHECK-NEXT: bgeu a3, a2, .LBB13_2
796 ; CHECK-NEXT: # %bb.1:
797 ; CHECK-NEXT: li a3, 0
798 ; CHECK-NEXT: j .LBB13_5
799 ; CHECK-NEXT: .LBB13_2: # %vector.ph
800 ; CHECK-NEXT: addi a3, a2, -1
801 ; CHECK-NEXT: andi a4, a3, 1024
802 ; CHECK-NEXT: xori a3, a4, 1024
803 ; CHECK-NEXT: slli a5, a5, 1
804 ; CHECK-NEXT: mv a6, a0
805 ; CHECK-NEXT: mv a7, a3
806 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
807 ; CHECK-NEXT: .LBB13_3: # %vector.body
808 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
809 ; CHECK-NEXT: vl2re32.v v8, (a6)
810 ; CHECK-NEXT: vxor.vx v8, v8, a1
811 ; CHECK-NEXT: vs2r.v v8, (a6)
812 ; CHECK-NEXT: sub a7, a7, a2
813 ; CHECK-NEXT: add a6, a6, a5
814 ; CHECK-NEXT: bnez a7, .LBB13_3
815 ; CHECK-NEXT: # %bb.4: # %middle.block
816 ; CHECK-NEXT: beqz a4, .LBB13_7
817 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader
818 ; CHECK-NEXT: slli a2, a3, 2
819 ; CHECK-NEXT: add a2, a0, a2
820 ; CHECK-NEXT: lui a3, 1
821 ; CHECK-NEXT: add a0, a0, a3
822 ; CHECK-NEXT: .LBB13_6: # %for.body
823 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
824 ; CHECK-NEXT: lw a3, 0(a2)
825 ; CHECK-NEXT: xor a3, a3, a1
826 ; CHECK-NEXT: sw a3, 0(a2)
827 ; CHECK-NEXT: addi a2, a2, 4
828 ; CHECK-NEXT: bne a2, a0, .LBB13_6
829 ; CHECK-NEXT: .LBB13_7: # %for.cond.cleanup
832 %0 = call i64 @llvm.vscale.i64()
834 %min.iters.check = icmp ugt i64 %1, 1024
835 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
837 vector.ph: ; preds = %entry
838 %2 = call i64 @llvm.vscale.i64()
840 %n.mod.vf = urem i64 1024, %3
841 %n.vec = sub nsw i64 1024, %n.mod.vf
842 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
843 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
844 %4 = call i64 @llvm.vscale.i64()
846 br label %vector.body
848 vector.body: ; preds = %vector.body, %vector.ph
849 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
850 %6 = getelementptr inbounds i32, ptr %a, i64 %index
851 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
852 %7 = xor <vscale x 4 x i32> %wide.load, %broadcast.splat
853 store <vscale x 4 x i32> %7, ptr %6, align 4
854 %index.next = add nuw i64 %index, %5
855 %8 = icmp eq i64 %index.next, %n.vec
856 br i1 %8, label %middle.block, label %vector.body
858 middle.block: ; preds = %vector.body
859 %cmp.n = icmp eq i64 %n.mod.vf, 0
860 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
862 for.body.preheader: ; preds = %entry, %middle.block
863 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
866 for.cond.cleanup: ; preds = %for.body, %middle.block
869 for.body: ; preds = %for.body.preheader, %for.body
870 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
871 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
872 %9 = load i32, ptr %arrayidx, align 4
873 %xor = xor i32 %9, %x
874 store i32 %xor, ptr %arrayidx, align 4
875 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
876 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
877 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
880 define void @sink_splat_shl(ptr nocapture %a, i32 signext %x) {
881 ; CHECK-LABEL: sink_splat_shl:
882 ; CHECK: # %bb.0: # %entry
883 ; CHECK-NEXT: lui a2, 1
884 ; CHECK-NEXT: add a2, a0, a2
885 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
886 ; CHECK-NEXT: .LBB14_1: # %vector.body
887 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
888 ; CHECK-NEXT: vle32.v v8, (a0)
889 ; CHECK-NEXT: vsll.vx v8, v8, a1
890 ; CHECK-NEXT: vse32.v v8, (a0)
891 ; CHECK-NEXT: addi a0, a0, 16
892 ; CHECK-NEXT: bne a0, a2, .LBB14_1
893 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
896 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
897 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
898 br label %vector.body
900 vector.body: ; preds = %vector.body, %entry
901 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
902 %0 = getelementptr inbounds i32, ptr %a, i64 %index
903 %wide.load = load <4 x i32>, ptr %0, align 4
904 %1 = shl <4 x i32> %wide.load, %broadcast.splat
905 store <4 x i32> %1, ptr %0, align 4
906 %index.next = add nuw i64 %index, 4
907 %2 = icmp eq i64 %index.next, 1024
908 br i1 %2, label %for.cond.cleanup, label %vector.body
910 for.cond.cleanup: ; preds = %vector.body
914 define void @sink_splat_lshr(ptr nocapture %a, i32 signext %x) {
915 ; CHECK-LABEL: sink_splat_lshr:
916 ; CHECK: # %bb.0: # %entry
917 ; CHECK-NEXT: lui a2, 1
918 ; CHECK-NEXT: add a2, a0, a2
919 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
920 ; CHECK-NEXT: .LBB15_1: # %vector.body
921 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
922 ; CHECK-NEXT: vle32.v v8, (a0)
923 ; CHECK-NEXT: vsrl.vx v8, v8, a1
924 ; CHECK-NEXT: vse32.v v8, (a0)
925 ; CHECK-NEXT: addi a0, a0, 16
926 ; CHECK-NEXT: bne a0, a2, .LBB15_1
927 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
930 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
931 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
932 br label %vector.body
934 vector.body: ; preds = %vector.body, %entry
935 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
936 %0 = getelementptr inbounds i32, ptr %a, i64 %index
937 %wide.load = load <4 x i32>, ptr %0, align 4
938 %1 = lshr <4 x i32> %wide.load, %broadcast.splat
939 store <4 x i32> %1, ptr %0, align 4
940 %index.next = add nuw i64 %index, 4
941 %2 = icmp eq i64 %index.next, 1024
942 br i1 %2, label %for.cond.cleanup, label %vector.body
944 for.cond.cleanup: ; preds = %vector.body
948 define void @sink_splat_ashr(ptr nocapture %a, i32 signext %x) {
949 ; CHECK-LABEL: sink_splat_ashr:
950 ; CHECK: # %bb.0: # %entry
951 ; CHECK-NEXT: lui a2, 1
952 ; CHECK-NEXT: add a2, a0, a2
953 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
954 ; CHECK-NEXT: .LBB16_1: # %vector.body
955 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
956 ; CHECK-NEXT: vle32.v v8, (a0)
957 ; CHECK-NEXT: vsra.vx v8, v8, a1
958 ; CHECK-NEXT: vse32.v v8, (a0)
959 ; CHECK-NEXT: addi a0, a0, 16
960 ; CHECK-NEXT: bne a0, a2, .LBB16_1
961 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
964 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
965 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
966 br label %vector.body
968 vector.body: ; preds = %vector.body, %entry
969 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
970 %0 = getelementptr inbounds i32, ptr %a, i64 %index
971 %wide.load = load <4 x i32>, ptr %0, align 4
972 %1 = ashr <4 x i32> %wide.load, %broadcast.splat
973 store <4 x i32> %1, ptr %0, align 4
974 %index.next = add nuw i64 %index, 4
975 %2 = icmp eq i64 %index.next, 1024
976 br i1 %2, label %for.cond.cleanup, label %vector.body
978 for.cond.cleanup: ; preds = %vector.body
982 define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
983 ; CHECK-LABEL: sink_splat_shl_scalable:
984 ; CHECK: # %bb.0: # %entry
985 ; CHECK-NEXT: csrr a5, vlenb
986 ; CHECK-NEXT: srli a2, a5, 1
987 ; CHECK-NEXT: li a3, 1024
988 ; CHECK-NEXT: bgeu a3, a2, .LBB17_2
989 ; CHECK-NEXT: # %bb.1:
990 ; CHECK-NEXT: li a3, 0
991 ; CHECK-NEXT: j .LBB17_5
992 ; CHECK-NEXT: .LBB17_2: # %vector.ph
993 ; CHECK-NEXT: addi a3, a2, -1
994 ; CHECK-NEXT: andi a4, a3, 1024
995 ; CHECK-NEXT: xori a3, a4, 1024
996 ; CHECK-NEXT: slli a5, a5, 1
997 ; CHECK-NEXT: mv a6, a0
998 ; CHECK-NEXT: mv a7, a3
999 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
1000 ; CHECK-NEXT: .LBB17_3: # %vector.body
1001 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1002 ; CHECK-NEXT: vl2re32.v v8, (a6)
1003 ; CHECK-NEXT: vsll.vx v8, v8, a1
1004 ; CHECK-NEXT: vs2r.v v8, (a6)
1005 ; CHECK-NEXT: sub a7, a7, a2
1006 ; CHECK-NEXT: add a6, a6, a5
1007 ; CHECK-NEXT: bnez a7, .LBB17_3
1008 ; CHECK-NEXT: # %bb.4: # %middle.block
1009 ; CHECK-NEXT: beqz a4, .LBB17_7
1010 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader
1011 ; CHECK-NEXT: slli a2, a3, 2
1012 ; CHECK-NEXT: add a2, a0, a2
1013 ; CHECK-NEXT: lui a3, 1
1014 ; CHECK-NEXT: add a0, a0, a3
1015 ; CHECK-NEXT: .LBB17_6: # %for.body
1016 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1017 ; CHECK-NEXT: lw a3, 0(a2)
1018 ; CHECK-NEXT: sllw a3, a3, a1
1019 ; CHECK-NEXT: sw a3, 0(a2)
1020 ; CHECK-NEXT: addi a2, a2, 4
1021 ; CHECK-NEXT: bne a2, a0, .LBB17_6
1022 ; CHECK-NEXT: .LBB17_7: # %for.cond.cleanup
1025 %0 = call i64 @llvm.vscale.i64()
1027 %min.iters.check = icmp ugt i64 %1, 1024
1028 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1030 vector.ph: ; preds = %entry
1031 %2 = call i64 @llvm.vscale.i64()
1033 %n.mod.vf = urem i64 1024, %3
1034 %n.vec = sub nsw i64 1024, %n.mod.vf
1035 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
1036 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1037 %4 = call i64 @llvm.vscale.i64()
1039 br label %vector.body
1041 vector.body: ; preds = %vector.body, %vector.ph
1042 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1043 %6 = getelementptr inbounds i32, ptr %a, i64 %index
1044 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
1045 %7 = shl <vscale x 4 x i32> %wide.load, %broadcast.splat
1046 store <vscale x 4 x i32> %7, ptr %6, align 4
1047 %index.next = add nuw i64 %index, %5
1048 %8 = icmp eq i64 %index.next, %n.vec
1049 br i1 %8, label %middle.block, label %vector.body
1051 middle.block: ; preds = %vector.body
1052 %cmp.n = icmp eq i64 %n.mod.vf, 0
1053 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1055 for.body.preheader: ; preds = %entry, %middle.block
1056 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1059 for.cond.cleanup: ; preds = %for.body, %middle.block
1062 for.body: ; preds = %for.body.preheader, %for.body
1063 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1064 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1065 %9 = load i32, ptr %arrayidx, align 4
1066 %shl = shl i32 %9, %x
1067 store i32 %shl, ptr %arrayidx, align 4
1068 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1069 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1070 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1073 define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
1074 ; CHECK-LABEL: sink_splat_lshr_scalable:
1075 ; CHECK: # %bb.0: # %entry
1076 ; CHECK-NEXT: csrr a5, vlenb
1077 ; CHECK-NEXT: srli a2, a5, 1
1078 ; CHECK-NEXT: li a3, 1024
1079 ; CHECK-NEXT: bgeu a3, a2, .LBB18_2
1080 ; CHECK-NEXT: # %bb.1:
1081 ; CHECK-NEXT: li a3, 0
1082 ; CHECK-NEXT: j .LBB18_5
1083 ; CHECK-NEXT: .LBB18_2: # %vector.ph
1084 ; CHECK-NEXT: addi a3, a2, -1
1085 ; CHECK-NEXT: andi a4, a3, 1024
1086 ; CHECK-NEXT: xori a3, a4, 1024
1087 ; CHECK-NEXT: slli a5, a5, 1
1088 ; CHECK-NEXT: mv a6, a0
1089 ; CHECK-NEXT: mv a7, a3
1090 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
1091 ; CHECK-NEXT: .LBB18_3: # %vector.body
1092 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1093 ; CHECK-NEXT: vl2re32.v v8, (a6)
1094 ; CHECK-NEXT: vsrl.vx v8, v8, a1
1095 ; CHECK-NEXT: vs2r.v v8, (a6)
1096 ; CHECK-NEXT: sub a7, a7, a2
1097 ; CHECK-NEXT: add a6, a6, a5
1098 ; CHECK-NEXT: bnez a7, .LBB18_3
1099 ; CHECK-NEXT: # %bb.4: # %middle.block
1100 ; CHECK-NEXT: beqz a4, .LBB18_7
1101 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader
1102 ; CHECK-NEXT: slli a2, a3, 2
1103 ; CHECK-NEXT: add a2, a0, a2
1104 ; CHECK-NEXT: lui a3, 1
1105 ; CHECK-NEXT: add a0, a0, a3
1106 ; CHECK-NEXT: .LBB18_6: # %for.body
1107 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1108 ; CHECK-NEXT: lw a3, 0(a2)
1109 ; CHECK-NEXT: srlw a3, a3, a1
1110 ; CHECK-NEXT: sw a3, 0(a2)
1111 ; CHECK-NEXT: addi a2, a2, 4
1112 ; CHECK-NEXT: bne a2, a0, .LBB18_6
1113 ; CHECK-NEXT: .LBB18_7: # %for.cond.cleanup
1116 %0 = call i64 @llvm.vscale.i64()
1118 %min.iters.check = icmp ugt i64 %1, 1024
1119 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1121 vector.ph: ; preds = %entry
1122 %2 = call i64 @llvm.vscale.i64()
1124 %n.mod.vf = urem i64 1024, %3
1125 %n.vec = sub nsw i64 1024, %n.mod.vf
1126 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
1127 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1128 %4 = call i64 @llvm.vscale.i64()
1130 br label %vector.body
1132 vector.body: ; preds = %vector.body, %vector.ph
1133 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1134 %6 = getelementptr inbounds i32, ptr %a, i64 %index
1135 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
1136 %7 = lshr <vscale x 4 x i32> %wide.load, %broadcast.splat
1137 store <vscale x 4 x i32> %7, ptr %6, align 4
1138 %index.next = add nuw i64 %index, %5
1139 %8 = icmp eq i64 %index.next, %n.vec
1140 br i1 %8, label %middle.block, label %vector.body
1142 middle.block: ; preds = %vector.body
1143 %cmp.n = icmp eq i64 %n.mod.vf, 0
1144 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1146 for.body.preheader: ; preds = %entry, %middle.block
1147 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1150 for.cond.cleanup: ; preds = %for.body, %middle.block
1153 for.body: ; preds = %for.body.preheader, %for.body
1154 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1155 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1156 %9 = load i32, ptr %arrayidx, align 4
1157 %lshr = lshr i32 %9, %x
1158 store i32 %lshr, ptr %arrayidx, align 4
1159 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1160 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1161 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1164 define void @sink_splat_ashr_scalable(ptr nocapture %a) {
1165 ; CHECK-LABEL: sink_splat_ashr_scalable:
1166 ; CHECK: # %bb.0: # %entry
1167 ; CHECK-NEXT: csrr a4, vlenb
1168 ; CHECK-NEXT: srli a2, a4, 1
1169 ; CHECK-NEXT: li a1, 1024
1170 ; CHECK-NEXT: bgeu a1, a2, .LBB19_2
1171 ; CHECK-NEXT: # %bb.1:
1172 ; CHECK-NEXT: li a1, 0
1173 ; CHECK-NEXT: j .LBB19_5
1174 ; CHECK-NEXT: .LBB19_2: # %vector.ph
1175 ; CHECK-NEXT: addi a1, a2, -1
1176 ; CHECK-NEXT: andi a3, a1, 1024
1177 ; CHECK-NEXT: xori a1, a3, 1024
1178 ; CHECK-NEXT: slli a4, a4, 1
1179 ; CHECK-NEXT: mv a5, a0
1180 ; CHECK-NEXT: mv a6, a1
1181 ; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, ma
1182 ; CHECK-NEXT: .LBB19_3: # %vector.body
1183 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1184 ; CHECK-NEXT: vl2re32.v v8, (a5)
1185 ; CHECK-NEXT: vsra.vi v8, v8, 2
1186 ; CHECK-NEXT: vs2r.v v8, (a5)
1187 ; CHECK-NEXT: sub a6, a6, a2
1188 ; CHECK-NEXT: add a5, a5, a4
1189 ; CHECK-NEXT: bnez a6, .LBB19_3
1190 ; CHECK-NEXT: # %bb.4: # %middle.block
1191 ; CHECK-NEXT: beqz a3, .LBB19_7
1192 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader
1193 ; CHECK-NEXT: slli a1, a1, 2
1194 ; CHECK-NEXT: add a1, a0, a1
1195 ; CHECK-NEXT: lui a2, 1
1196 ; CHECK-NEXT: add a0, a0, a2
1197 ; CHECK-NEXT: .LBB19_6: # %for.body
1198 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1199 ; CHECK-NEXT: lw a2, 0(a1)
1200 ; CHECK-NEXT: srli a2, a2, 2
1201 ; CHECK-NEXT: sw a2, 0(a1)
1202 ; CHECK-NEXT: addi a1, a1, 4
1203 ; CHECK-NEXT: bne a1, a0, .LBB19_6
1204 ; CHECK-NEXT: .LBB19_7: # %for.cond.cleanup
1207 %0 = call i64 @llvm.vscale.i64()
1209 %min.iters.check = icmp ugt i64 %1, 1024
1210 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1212 vector.ph: ; preds = %entry
1213 %2 = call i64 @llvm.vscale.i64()
1215 %n.mod.vf = urem i64 1024, %3
1216 %n.vec = sub nsw i64 1024, %n.mod.vf
1217 %4 = call i64 @llvm.vscale.i64()
1219 br label %vector.body
1221 vector.body: ; preds = %vector.body, %vector.ph
1222 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1223 %6 = getelementptr inbounds i32, ptr %a, i64 %index
1224 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
1225 %7 = ashr <vscale x 4 x i32> %wide.load, splat (i32 2)
1226 store <vscale x 4 x i32> %7, ptr %6, align 4
1227 %index.next = add nuw i64 %index, %5
1228 %8 = icmp eq i64 %index.next, %n.vec
1229 br i1 %8, label %middle.block, label %vector.body
1231 middle.block: ; preds = %vector.body
1232 %cmp.n = icmp eq i64 %n.mod.vf, 0
1233 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1235 for.body.preheader: ; preds = %entry, %middle.block
1236 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1239 for.cond.cleanup: ; preds = %for.body, %middle.block
1242 for.body: ; preds = %for.body.preheader, %for.body
1243 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1244 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1245 %9 = load i32, ptr %arrayidx, align 4
1246 %ashr = ashr i32 %9, 2
1247 store i32 %ashr, ptr %arrayidx, align 4
1248 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1249 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1250 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1253 define void @sink_splat_fmul(ptr nocapture %a, float %x) {
1254 ; CHECK-LABEL: sink_splat_fmul:
1255 ; CHECK: # %bb.0: # %entry
1256 ; CHECK-NEXT: lui a1, 1
1257 ; CHECK-NEXT: add a1, a0, a1
1258 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1259 ; CHECK-NEXT: .LBB20_1: # %vector.body
1260 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1261 ; CHECK-NEXT: vle32.v v8, (a0)
1262 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
1263 ; CHECK-NEXT: vse32.v v8, (a0)
1264 ; CHECK-NEXT: addi a0, a0, 16
1265 ; CHECK-NEXT: bne a0, a1, .LBB20_1
1266 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1269 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1270 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1271 br label %vector.body
1273 vector.body: ; preds = %vector.body, %entry
1274 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1275 %0 = getelementptr inbounds float, ptr %a, i64 %index
1276 %wide.load = load <4 x float>, ptr %0, align 4
1277 %1 = fmul <4 x float> %wide.load, %broadcast.splat
1278 store <4 x float> %1, ptr %0, align 4
1279 %index.next = add nuw i64 %index, 4
1280 %2 = icmp eq i64 %index.next, 1024
1281 br i1 %2, label %for.cond.cleanup, label %vector.body
1283 for.cond.cleanup: ; preds = %vector.body
1287 define void @sink_splat_fdiv(ptr nocapture %a, float %x) {
1288 ; CHECK-LABEL: sink_splat_fdiv:
1289 ; CHECK: # %bb.0: # %entry
1290 ; CHECK-NEXT: lui a1, 1
1291 ; CHECK-NEXT: add a1, a0, a1
1292 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1293 ; CHECK-NEXT: .LBB21_1: # %vector.body
1294 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1295 ; CHECK-NEXT: vle32.v v8, (a0)
1296 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
1297 ; CHECK-NEXT: vse32.v v8, (a0)
1298 ; CHECK-NEXT: addi a0, a0, 16
1299 ; CHECK-NEXT: bne a0, a1, .LBB21_1
1300 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1303 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1304 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1305 br label %vector.body
1307 vector.body: ; preds = %vector.body, %entry
1308 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1309 %0 = getelementptr inbounds float, ptr %a, i64 %index
1310 %wide.load = load <4 x float>, ptr %0, align 4
1311 %1 = fdiv <4 x float> %wide.load, %broadcast.splat
1312 store <4 x float> %1, ptr %0, align 4
1313 %index.next = add nuw i64 %index, 4
1314 %2 = icmp eq i64 %index.next, 1024
1315 br i1 %2, label %for.cond.cleanup, label %vector.body
1317 for.cond.cleanup: ; preds = %vector.body
1321 define void @sink_splat_frdiv(ptr nocapture %a, float %x) {
1322 ; CHECK-LABEL: sink_splat_frdiv:
1323 ; CHECK: # %bb.0: # %entry
1324 ; CHECK-NEXT: lui a1, 1
1325 ; CHECK-NEXT: add a1, a0, a1
1326 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1327 ; CHECK-NEXT: .LBB22_1: # %vector.body
1328 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1329 ; CHECK-NEXT: vle32.v v8, (a0)
1330 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
1331 ; CHECK-NEXT: vse32.v v8, (a0)
1332 ; CHECK-NEXT: addi a0, a0, 16
1333 ; CHECK-NEXT: bne a0, a1, .LBB22_1
1334 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1337 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1338 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1339 br label %vector.body
1341 vector.body: ; preds = %vector.body, %entry
1342 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1343 %0 = getelementptr inbounds float, ptr %a, i64 %index
1344 %wide.load = load <4 x float>, ptr %0, align 4
1345 %1 = fdiv <4 x float> %broadcast.splat, %wide.load
1346 store <4 x float> %1, ptr %0, align 4
1347 %index.next = add nuw i64 %index, 4
1348 %2 = icmp eq i64 %index.next, 1024
1349 br i1 %2, label %for.cond.cleanup, label %vector.body
1351 for.cond.cleanup: ; preds = %vector.body
1355 define void @sink_splat_fadd(ptr nocapture %a, float %x) {
1356 ; CHECK-LABEL: sink_splat_fadd:
1357 ; CHECK: # %bb.0: # %entry
1358 ; CHECK-NEXT: lui a1, 1
1359 ; CHECK-NEXT: add a1, a0, a1
1360 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1361 ; CHECK-NEXT: .LBB23_1: # %vector.body
1362 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1363 ; CHECK-NEXT: vle32.v v8, (a0)
1364 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
1365 ; CHECK-NEXT: vse32.v v8, (a0)
1366 ; CHECK-NEXT: addi a0, a0, 16
1367 ; CHECK-NEXT: bne a0, a1, .LBB23_1
1368 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1371 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1372 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1373 br label %vector.body
1375 vector.body: ; preds = %vector.body, %entry
1376 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1377 %0 = getelementptr inbounds float, ptr %a, i64 %index
1378 %wide.load = load <4 x float>, ptr %0, align 4
1379 %1 = fadd <4 x float> %wide.load, %broadcast.splat
1380 store <4 x float> %1, ptr %0, align 4
1381 %index.next = add nuw i64 %index, 4
1382 %2 = icmp eq i64 %index.next, 1024
1383 br i1 %2, label %for.cond.cleanup, label %vector.body
1385 for.cond.cleanup: ; preds = %vector.body
1389 define void @sink_splat_fsub(ptr nocapture %a, float %x) {
1390 ; CHECK-LABEL: sink_splat_fsub:
1391 ; CHECK: # %bb.0: # %entry
1392 ; CHECK-NEXT: lui a1, 1
1393 ; CHECK-NEXT: add a1, a0, a1
1394 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1395 ; CHECK-NEXT: .LBB24_1: # %vector.body
1396 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1397 ; CHECK-NEXT: vle32.v v8, (a0)
1398 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1399 ; CHECK-NEXT: vse32.v v8, (a0)
1400 ; CHECK-NEXT: addi a0, a0, 16
1401 ; CHECK-NEXT: bne a0, a1, .LBB24_1
1402 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1405 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1406 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1407 br label %vector.body
1409 vector.body: ; preds = %vector.body, %entry
1410 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1411 %0 = getelementptr inbounds float, ptr %a, i64 %index
1412 %wide.load = load <4 x float>, ptr %0, align 4
1413 %1 = fsub <4 x float> %wide.load, %broadcast.splat
1414 store <4 x float> %1, ptr %0, align 4
1415 %index.next = add nuw i64 %index, 4
1416 %2 = icmp eq i64 %index.next, 1024
1417 br i1 %2, label %for.cond.cleanup, label %vector.body
1419 for.cond.cleanup: ; preds = %vector.body
1423 define void @sink_splat_frsub(ptr nocapture %a, float %x) {
1424 ; CHECK-LABEL: sink_splat_frsub:
1425 ; CHECK: # %bb.0: # %entry
1426 ; CHECK-NEXT: lui a1, 1
1427 ; CHECK-NEXT: add a1, a0, a1
1428 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1429 ; CHECK-NEXT: .LBB25_1: # %vector.body
1430 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1431 ; CHECK-NEXT: vle32.v v8, (a0)
1432 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
1433 ; CHECK-NEXT: vse32.v v8, (a0)
1434 ; CHECK-NEXT: addi a0, a0, 16
1435 ; CHECK-NEXT: bne a0, a1, .LBB25_1
1436 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1439 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1440 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1441 br label %vector.body
1443 vector.body: ; preds = %vector.body, %entry
1444 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1445 %0 = getelementptr inbounds float, ptr %a, i64 %index
1446 %wide.load = load <4 x float>, ptr %0, align 4
1447 %1 = fsub <4 x float> %broadcast.splat, %wide.load
1448 store <4 x float> %1, ptr %0, align 4
1449 %index.next = add nuw i64 %index, 4
1450 %2 = icmp eq i64 %index.next, 1024
1451 br i1 %2, label %for.cond.cleanup, label %vector.body
1453 for.cond.cleanup: ; preds = %vector.body
1457 define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
1458 ; CHECK-LABEL: sink_splat_fmul_scalable:
1459 ; CHECK: # %bb.0: # %entry
1460 ; CHECK-NEXT: csrr a1, vlenb
1461 ; CHECK-NEXT: srli a2, a1, 2
1462 ; CHECK-NEXT: li a3, 1024
1463 ; CHECK-NEXT: bgeu a3, a2, .LBB26_2
1464 ; CHECK-NEXT: # %bb.1:
1465 ; CHECK-NEXT: li a3, 0
1466 ; CHECK-NEXT: j .LBB26_5
1467 ; CHECK-NEXT: .LBB26_2: # %vector.ph
1468 ; CHECK-NEXT: addi a3, a2, -1
1469 ; CHECK-NEXT: andi a4, a3, 1024
1470 ; CHECK-NEXT: xori a3, a4, 1024
1471 ; CHECK-NEXT: mv a5, a0
1472 ; CHECK-NEXT: mv a6, a3
1473 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
1474 ; CHECK-NEXT: .LBB26_3: # %vector.body
1475 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1476 ; CHECK-NEXT: vl1re32.v v8, (a5)
1477 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
1478 ; CHECK-NEXT: vs1r.v v8, (a5)
1479 ; CHECK-NEXT: sub a6, a6, a2
1480 ; CHECK-NEXT: add a5, a5, a1
1481 ; CHECK-NEXT: bnez a6, .LBB26_3
1482 ; CHECK-NEXT: # %bb.4: # %middle.block
1483 ; CHECK-NEXT: beqz a4, .LBB26_7
1484 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader
1485 ; CHECK-NEXT: slli a1, a3, 2
1486 ; CHECK-NEXT: add a1, a0, a1
1487 ; CHECK-NEXT: lui a2, 1
1488 ; CHECK-NEXT: add a0, a0, a2
1489 ; CHECK-NEXT: .LBB26_6: # %for.body
1490 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1491 ; CHECK-NEXT: flw fa5, 0(a1)
1492 ; CHECK-NEXT: fmul.s fa5, fa5, fa0
1493 ; CHECK-NEXT: fsw fa5, 0(a1)
1494 ; CHECK-NEXT: addi a1, a1, 4
1495 ; CHECK-NEXT: bne a1, a0, .LBB26_6
1496 ; CHECK-NEXT: .LBB26_7: # %for.cond.cleanup
1499 %0 = call i64 @llvm.vscale.i64()
1501 %min.iters.check = icmp ugt i64 %1, 1024
1502 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1504 vector.ph: ; preds = %entry
1505 %2 = call i64 @llvm.vscale.i64()
1507 %n.mod.vf = urem i64 1024, %3
1508 %n.vec = sub nsw i64 1024, %n.mod.vf
1509 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1510 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1511 %4 = call i64 @llvm.vscale.i64()
1513 br label %vector.body
1515 vector.body: ; preds = %vector.body, %vector.ph
1516 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1517 %6 = getelementptr inbounds float, ptr %a, i64 %index
1518 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1519 %7 = fmul <vscale x 2 x float> %wide.load, %broadcast.splat
1520 store <vscale x 2 x float> %7, ptr %6, align 4
1521 %index.next = add nuw i64 %index, %5
1522 %8 = icmp eq i64 %index.next, %n.vec
1523 br i1 %8, label %middle.block, label %vector.body
1525 middle.block: ; preds = %vector.body
1526 %cmp.n = icmp eq i64 %n.mod.vf, 0
1527 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1529 for.body.preheader: ; preds = %entry, %middle.block
1530 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1533 for.cond.cleanup: ; preds = %for.body, %middle.block
1536 for.body: ; preds = %for.body.preheader, %for.body
1537 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1538 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1539 %9 = load float, ptr %arrayidx, align 4
1540 %mul = fmul float %9, %x
1541 store float %mul, ptr %arrayidx, align 4
1542 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1543 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1544 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1547 define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
1548 ; CHECK-LABEL: sink_splat_fdiv_scalable:
1549 ; CHECK: # %bb.0: # %entry
1550 ; CHECK-NEXT: csrr a1, vlenb
1551 ; CHECK-NEXT: srli a2, a1, 2
1552 ; CHECK-NEXT: li a3, 1024
1553 ; CHECK-NEXT: bgeu a3, a2, .LBB27_2
1554 ; CHECK-NEXT: # %bb.1:
1555 ; CHECK-NEXT: li a3, 0
1556 ; CHECK-NEXT: j .LBB27_5
1557 ; CHECK-NEXT: .LBB27_2: # %vector.ph
1558 ; CHECK-NEXT: addi a3, a2, -1
1559 ; CHECK-NEXT: andi a4, a3, 1024
1560 ; CHECK-NEXT: xori a3, a4, 1024
1561 ; CHECK-NEXT: mv a5, a0
1562 ; CHECK-NEXT: mv a6, a3
1563 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
1564 ; CHECK-NEXT: .LBB27_3: # %vector.body
1565 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1566 ; CHECK-NEXT: vl1re32.v v8, (a5)
1567 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
1568 ; CHECK-NEXT: vs1r.v v8, (a5)
1569 ; CHECK-NEXT: sub a6, a6, a2
1570 ; CHECK-NEXT: add a5, a5, a1
1571 ; CHECK-NEXT: bnez a6, .LBB27_3
1572 ; CHECK-NEXT: # %bb.4: # %middle.block
1573 ; CHECK-NEXT: beqz a4, .LBB27_7
1574 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader
1575 ; CHECK-NEXT: slli a1, a3, 2
1576 ; CHECK-NEXT: add a1, a0, a1
1577 ; CHECK-NEXT: lui a2, 1
1578 ; CHECK-NEXT: add a0, a0, a2
1579 ; CHECK-NEXT: .LBB27_6: # %for.body
1580 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1581 ; CHECK-NEXT: flw fa5, 0(a1)
1582 ; CHECK-NEXT: fdiv.s fa5, fa5, fa0
1583 ; CHECK-NEXT: fsw fa5, 0(a1)
1584 ; CHECK-NEXT: addi a1, a1, 4
1585 ; CHECK-NEXT: bne a1, a0, .LBB27_6
1586 ; CHECK-NEXT: .LBB27_7: # %for.cond.cleanup
1589 %0 = call i64 @llvm.vscale.i64()
1591 %min.iters.check = icmp ugt i64 %1, 1024
1592 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1594 vector.ph: ; preds = %entry
1595 %2 = call i64 @llvm.vscale.i64()
1597 %n.mod.vf = urem i64 1024, %3
1598 %n.vec = sub nsw i64 1024, %n.mod.vf
1599 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1600 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1601 %4 = call i64 @llvm.vscale.i64()
1603 br label %vector.body
1605 vector.body: ; preds = %vector.body, %vector.ph
1606 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1607 %6 = getelementptr inbounds float, ptr %a, i64 %index
1608 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1609 %7 = fdiv <vscale x 2 x float> %wide.load, %broadcast.splat
1610 store <vscale x 2 x float> %7, ptr %6, align 4
1611 %index.next = add nuw i64 %index, %5
1612 %8 = icmp eq i64 %index.next, %n.vec
1613 br i1 %8, label %middle.block, label %vector.body
1615 middle.block: ; preds = %vector.body
1616 %cmp.n = icmp eq i64 %n.mod.vf, 0
1617 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1619 for.body.preheader: ; preds = %entry, %middle.block
1620 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1623 for.cond.cleanup: ; preds = %for.body, %middle.block
1626 for.body: ; preds = %for.body.preheader, %for.body
1627 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1628 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1629 %9 = load float, ptr %arrayidx, align 4
1630 %mul = fdiv float %9, %x
1631 store float %mul, ptr %arrayidx, align 4
1632 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1633 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1634 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1637 define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
1638 ; CHECK-LABEL: sink_splat_frdiv_scalable:
1639 ; CHECK: # %bb.0: # %entry
1640 ; CHECK-NEXT: csrr a1, vlenb
1641 ; CHECK-NEXT: srli a2, a1, 2
1642 ; CHECK-NEXT: li a3, 1024
1643 ; CHECK-NEXT: bgeu a3, a2, .LBB28_2
1644 ; CHECK-NEXT: # %bb.1:
1645 ; CHECK-NEXT: li a3, 0
1646 ; CHECK-NEXT: j .LBB28_5
1647 ; CHECK-NEXT: .LBB28_2: # %vector.ph
1648 ; CHECK-NEXT: addi a3, a2, -1
1649 ; CHECK-NEXT: andi a4, a3, 1024
1650 ; CHECK-NEXT: xori a3, a4, 1024
1651 ; CHECK-NEXT: mv a5, a0
1652 ; CHECK-NEXT: mv a6, a3
1653 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
1654 ; CHECK-NEXT: .LBB28_3: # %vector.body
1655 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1656 ; CHECK-NEXT: vl1re32.v v8, (a5)
1657 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
1658 ; CHECK-NEXT: vs1r.v v8, (a5)
1659 ; CHECK-NEXT: sub a6, a6, a2
1660 ; CHECK-NEXT: add a5, a5, a1
1661 ; CHECK-NEXT: bnez a6, .LBB28_3
1662 ; CHECK-NEXT: # %bb.4: # %middle.block
1663 ; CHECK-NEXT: beqz a4, .LBB28_7
1664 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader
1665 ; CHECK-NEXT: slli a1, a3, 2
1666 ; CHECK-NEXT: add a1, a0, a1
1667 ; CHECK-NEXT: lui a2, 1
1668 ; CHECK-NEXT: add a0, a0, a2
1669 ; CHECK-NEXT: .LBB28_6: # %for.body
1670 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1671 ; CHECK-NEXT: flw fa5, 0(a1)
1672 ; CHECK-NEXT: fdiv.s fa5, fa0, fa5
1673 ; CHECK-NEXT: fsw fa5, 0(a1)
1674 ; CHECK-NEXT: addi a1, a1, 4
1675 ; CHECK-NEXT: bne a1, a0, .LBB28_6
1676 ; CHECK-NEXT: .LBB28_7: # %for.cond.cleanup
1679 %0 = call i64 @llvm.vscale.i64()
1681 %min.iters.check = icmp ugt i64 %1, 1024
1682 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1684 vector.ph: ; preds = %entry
1685 %2 = call i64 @llvm.vscale.i64()
1687 %n.mod.vf = urem i64 1024, %3
1688 %n.vec = sub nsw i64 1024, %n.mod.vf
1689 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1690 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1691 %4 = call i64 @llvm.vscale.i64()
1693 br label %vector.body
1695 vector.body: ; preds = %vector.body, %vector.ph
1696 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1697 %6 = getelementptr inbounds float, ptr %a, i64 %index
1698 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1699 %7 = fdiv <vscale x 2 x float> %broadcast.splat, %wide.load
1700 store <vscale x 2 x float> %7, ptr %6, align 4
1701 %index.next = add nuw i64 %index, %5
1702 %8 = icmp eq i64 %index.next, %n.vec
1703 br i1 %8, label %middle.block, label %vector.body
1705 middle.block: ; preds = %vector.body
1706 %cmp.n = icmp eq i64 %n.mod.vf, 0
1707 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1709 for.body.preheader: ; preds = %entry, %middle.block
1710 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1713 for.cond.cleanup: ; preds = %for.body, %middle.block
1716 for.body: ; preds = %for.body.preheader, %for.body
1717 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1718 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1719 %9 = load float, ptr %arrayidx, align 4
1720 %mul = fdiv float %x, %9
1721 store float %mul, ptr %arrayidx, align 4
1722 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1723 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1724 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1727 define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
1728 ; CHECK-LABEL: sink_splat_fadd_scalable:
1729 ; CHECK: # %bb.0: # %entry
1730 ; CHECK-NEXT: csrr a1, vlenb
1731 ; CHECK-NEXT: srli a2, a1, 2
1732 ; CHECK-NEXT: li a3, 1024
1733 ; CHECK-NEXT: bgeu a3, a2, .LBB29_2
1734 ; CHECK-NEXT: # %bb.1:
1735 ; CHECK-NEXT: li a3, 0
1736 ; CHECK-NEXT: j .LBB29_5
1737 ; CHECK-NEXT: .LBB29_2: # %vector.ph
1738 ; CHECK-NEXT: addi a3, a2, -1
1739 ; CHECK-NEXT: andi a4, a3, 1024
1740 ; CHECK-NEXT: xori a3, a4, 1024
1741 ; CHECK-NEXT: mv a5, a0
1742 ; CHECK-NEXT: mv a6, a3
1743 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
1744 ; CHECK-NEXT: .LBB29_3: # %vector.body
1745 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1746 ; CHECK-NEXT: vl1re32.v v8, (a5)
1747 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
1748 ; CHECK-NEXT: vs1r.v v8, (a5)
1749 ; CHECK-NEXT: sub a6, a6, a2
1750 ; CHECK-NEXT: add a5, a5, a1
1751 ; CHECK-NEXT: bnez a6, .LBB29_3
1752 ; CHECK-NEXT: # %bb.4: # %middle.block
1753 ; CHECK-NEXT: beqz a4, .LBB29_7
1754 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader
1755 ; CHECK-NEXT: slli a1, a3, 2
1756 ; CHECK-NEXT: add a1, a0, a1
1757 ; CHECK-NEXT: lui a2, 1
1758 ; CHECK-NEXT: add a0, a0, a2
1759 ; CHECK-NEXT: .LBB29_6: # %for.body
1760 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1761 ; CHECK-NEXT: flw fa5, 0(a1)
1762 ; CHECK-NEXT: fadd.s fa5, fa5, fa0
1763 ; CHECK-NEXT: fsw fa5, 0(a1)
1764 ; CHECK-NEXT: addi a1, a1, 4
1765 ; CHECK-NEXT: bne a1, a0, .LBB29_6
1766 ; CHECK-NEXT: .LBB29_7: # %for.cond.cleanup
1769 %0 = call i64 @llvm.vscale.i64()
1771 %min.iters.check = icmp ugt i64 %1, 1024
1772 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1774 vector.ph: ; preds = %entry
1775 %2 = call i64 @llvm.vscale.i64()
1777 %n.mod.vf = urem i64 1024, %3
1778 %n.vec = sub nsw i64 1024, %n.mod.vf
1779 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1780 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1781 %4 = call i64 @llvm.vscale.i64()
1783 br label %vector.body
1785 vector.body: ; preds = %vector.body, %vector.ph
1786 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1787 %6 = getelementptr inbounds float, ptr %a, i64 %index
1788 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1789 %7 = fadd <vscale x 2 x float> %wide.load, %broadcast.splat
1790 store <vscale x 2 x float> %7, ptr %6, align 4
1791 %index.next = add nuw i64 %index, %5
1792 %8 = icmp eq i64 %index.next, %n.vec
1793 br i1 %8, label %middle.block, label %vector.body
1795 middle.block: ; preds = %vector.body
1796 %cmp.n = icmp eq i64 %n.mod.vf, 0
1797 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1799 for.body.preheader: ; preds = %entry, %middle.block
1800 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1803 for.cond.cleanup: ; preds = %for.body, %middle.block
1806 for.body: ; preds = %for.body.preheader, %for.body
1807 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1808 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1809 %9 = load float, ptr %arrayidx, align 4
1810 %mul = fadd float %9, %x
1811 store float %mul, ptr %arrayidx, align 4
1812 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1813 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1814 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1817 define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
1818 ; CHECK-LABEL: sink_splat_fsub_scalable:
1819 ; CHECK: # %bb.0: # %entry
1820 ; CHECK-NEXT: csrr a1, vlenb
1821 ; CHECK-NEXT: srli a2, a1, 2
1822 ; CHECK-NEXT: li a3, 1024
1823 ; CHECK-NEXT: bgeu a3, a2, .LBB30_2
1824 ; CHECK-NEXT: # %bb.1:
1825 ; CHECK-NEXT: li a3, 0
1826 ; CHECK-NEXT: j .LBB30_5
1827 ; CHECK-NEXT: .LBB30_2: # %vector.ph
1828 ; CHECK-NEXT: addi a3, a2, -1
1829 ; CHECK-NEXT: andi a4, a3, 1024
1830 ; CHECK-NEXT: xori a3, a4, 1024
1831 ; CHECK-NEXT: mv a5, a0
1832 ; CHECK-NEXT: mv a6, a3
1833 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
1834 ; CHECK-NEXT: .LBB30_3: # %vector.body
1835 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1836 ; CHECK-NEXT: vl1re32.v v8, (a5)
1837 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1838 ; CHECK-NEXT: vs1r.v v8, (a5)
1839 ; CHECK-NEXT: sub a6, a6, a2
1840 ; CHECK-NEXT: add a5, a5, a1
1841 ; CHECK-NEXT: bnez a6, .LBB30_3
1842 ; CHECK-NEXT: # %bb.4: # %middle.block
1843 ; CHECK-NEXT: beqz a4, .LBB30_7
1844 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader
1845 ; CHECK-NEXT: slli a1, a3, 2
1846 ; CHECK-NEXT: add a1, a0, a1
1847 ; CHECK-NEXT: lui a2, 1
1848 ; CHECK-NEXT: add a0, a0, a2
1849 ; CHECK-NEXT: .LBB30_6: # %for.body
1850 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1851 ; CHECK-NEXT: flw fa5, 0(a1)
1852 ; CHECK-NEXT: fsub.s fa5, fa5, fa0
1853 ; CHECK-NEXT: fsw fa5, 0(a1)
1854 ; CHECK-NEXT: addi a1, a1, 4
1855 ; CHECK-NEXT: bne a1, a0, .LBB30_6
1856 ; CHECK-NEXT: .LBB30_7: # %for.cond.cleanup
1859 %0 = call i64 @llvm.vscale.i64()
1861 %min.iters.check = icmp ugt i64 %1, 1024
1862 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1864 vector.ph: ; preds = %entry
1865 %2 = call i64 @llvm.vscale.i64()
1867 %n.mod.vf = urem i64 1024, %3
1868 %n.vec = sub nsw i64 1024, %n.mod.vf
1869 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1870 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1871 %4 = call i64 @llvm.vscale.i64()
1873 br label %vector.body
1875 vector.body: ; preds = %vector.body, %vector.ph
1876 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1877 %6 = getelementptr inbounds float, ptr %a, i64 %index
1878 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1879 %7 = fsub <vscale x 2 x float> %wide.load, %broadcast.splat
1880 store <vscale x 2 x float> %7, ptr %6, align 4
1881 %index.next = add nuw i64 %index, %5
1882 %8 = icmp eq i64 %index.next, %n.vec
1883 br i1 %8, label %middle.block, label %vector.body
1885 middle.block: ; preds = %vector.body
1886 %cmp.n = icmp eq i64 %n.mod.vf, 0
1887 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1889 for.body.preheader: ; preds = %entry, %middle.block
1890 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1893 for.cond.cleanup: ; preds = %for.body, %middle.block
1896 for.body: ; preds = %for.body.preheader, %for.body
1897 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1898 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1899 %9 = load float, ptr %arrayidx, align 4
1900 %mul = fsub float %9, %x
1901 store float %mul, ptr %arrayidx, align 4
1902 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1903 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1904 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1907 define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
1908 ; CHECK-LABEL: sink_splat_frsub_scalable:
1909 ; CHECK: # %bb.0: # %entry
1910 ; CHECK-NEXT: csrr a1, vlenb
1911 ; CHECK-NEXT: srli a2, a1, 2
1912 ; CHECK-NEXT: li a3, 1024
1913 ; CHECK-NEXT: bgeu a3, a2, .LBB31_2
1914 ; CHECK-NEXT: # %bb.1:
1915 ; CHECK-NEXT: li a3, 0
1916 ; CHECK-NEXT: j .LBB31_5
1917 ; CHECK-NEXT: .LBB31_2: # %vector.ph
1918 ; CHECK-NEXT: addi a3, a2, -1
1919 ; CHECK-NEXT: andi a4, a3, 1024
1920 ; CHECK-NEXT: xori a3, a4, 1024
1921 ; CHECK-NEXT: mv a5, a0
1922 ; CHECK-NEXT: mv a6, a3
1923 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
1924 ; CHECK-NEXT: .LBB31_3: # %vector.body
1925 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1926 ; CHECK-NEXT: vl1re32.v v8, (a5)
1927 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
1928 ; CHECK-NEXT: vs1r.v v8, (a5)
1929 ; CHECK-NEXT: sub a6, a6, a2
1930 ; CHECK-NEXT: add a5, a5, a1
1931 ; CHECK-NEXT: bnez a6, .LBB31_3
1932 ; CHECK-NEXT: # %bb.4: # %middle.block
1933 ; CHECK-NEXT: beqz a4, .LBB31_7
1934 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader
1935 ; CHECK-NEXT: slli a1, a3, 2
1936 ; CHECK-NEXT: add a1, a0, a1
1937 ; CHECK-NEXT: lui a2, 1
1938 ; CHECK-NEXT: add a0, a0, a2
1939 ; CHECK-NEXT: .LBB31_6: # %for.body
1940 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1941 ; CHECK-NEXT: flw fa5, 0(a1)
1942 ; CHECK-NEXT: fsub.s fa5, fa0, fa5
1943 ; CHECK-NEXT: fsw fa5, 0(a1)
1944 ; CHECK-NEXT: addi a1, a1, 4
1945 ; CHECK-NEXT: bne a1, a0, .LBB31_6
1946 ; CHECK-NEXT: .LBB31_7: # %for.cond.cleanup
1949 %0 = call i64 @llvm.vscale.i64()
1951 %min.iters.check = icmp ugt i64 %1, 1024
1952 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1954 vector.ph: ; preds = %entry
1955 %2 = call i64 @llvm.vscale.i64()
1957 %n.mod.vf = urem i64 1024, %3
1958 %n.vec = sub nsw i64 1024, %n.mod.vf
1959 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1960 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1961 %4 = call i64 @llvm.vscale.i64()
1963 br label %vector.body
1965 vector.body: ; preds = %vector.body, %vector.ph
1966 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1967 %6 = getelementptr inbounds float, ptr %a, i64 %index
1968 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1969 %7 = fsub <vscale x 2 x float> %broadcast.splat, %wide.load
1970 store <vscale x 2 x float> %7, ptr %6, align 4
1971 %index.next = add nuw i64 %index, %5
1972 %8 = icmp eq i64 %index.next, %n.vec
1973 br i1 %8, label %middle.block, label %vector.body
1975 middle.block: ; preds = %vector.body
1976 %cmp.n = icmp eq i64 %n.mod.vf, 0
1977 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1979 for.body.preheader: ; preds = %entry, %middle.block
1980 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1983 for.cond.cleanup: ; preds = %for.body, %middle.block
1986 for.body: ; preds = %for.body.preheader, %for.body
1987 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1988 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1989 %9 = load float, ptr %arrayidx, align 4
1990 %mul = fsub float %x, %9
1991 store float %mul, ptr %arrayidx, align 4
1992 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1993 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1994 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1997 define void @sink_splat_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
1998 ; CHECK-LABEL: sink_splat_fma:
1999 ; CHECK: # %bb.0: # %entry
2000 ; CHECK-NEXT: lui a2, 1
2001 ; CHECK-NEXT: add a2, a1, a2
2002 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2003 ; CHECK-NEXT: .LBB32_1: # %vector.body
2004 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2005 ; CHECK-NEXT: vle32.v v8, (a0)
2006 ; CHECK-NEXT: vle32.v v9, (a1)
2007 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2008 ; CHECK-NEXT: vse32.v v9, (a0)
2009 ; CHECK-NEXT: addi a1, a1, 16
2010 ; CHECK-NEXT: addi a0, a0, 16
2011 ; CHECK-NEXT: bne a1, a2, .LBB32_1
2012 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2015 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
2016 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2017 br label %vector.body
2019 vector.body: ; preds = %vector.body, %entry
2020 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2021 %0 = getelementptr inbounds float, ptr %a, i64 %index
2022 %wide.load = load <4 x float>, ptr %0, align 4
2023 %1 = getelementptr inbounds float, ptr %b, i64 %index
2024 %wide.load12 = load <4 x float>, ptr %1, align 4
2025 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12)
2026 store <4 x float> %2, ptr %0, align 4
2027 %index.next = add nuw i64 %index, 4
2028 %3 = icmp eq i64 %index.next, 1024
2029 br i1 %3, label %for.cond.cleanup, label %vector.body
2031 for.cond.cleanup: ; preds = %vector.body
2035 define void @sink_splat_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
2036 ; CHECK-LABEL: sink_splat_fma_commute:
2037 ; CHECK: # %bb.0: # %entry
2038 ; CHECK-NEXT: lui a2, 1
2039 ; CHECK-NEXT: add a2, a1, a2
2040 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2041 ; CHECK-NEXT: .LBB33_1: # %vector.body
2042 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2043 ; CHECK-NEXT: vle32.v v8, (a0)
2044 ; CHECK-NEXT: vle32.v v9, (a1)
2045 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2046 ; CHECK-NEXT: vse32.v v9, (a0)
2047 ; CHECK-NEXT: addi a1, a1, 16
2048 ; CHECK-NEXT: addi a0, a0, 16
2049 ; CHECK-NEXT: bne a1, a2, .LBB33_1
2050 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2053 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
2054 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2055 br label %vector.body
2057 vector.body: ; preds = %vector.body, %entry
2058 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2059 %0 = getelementptr inbounds float, ptr %a, i64 %index
2060 %wide.load = load <4 x float>, ptr %0, align 4
2061 %1 = getelementptr inbounds float, ptr %b, i64 %index
2062 %wide.load12 = load <4 x float>, ptr %1, align 4
2063 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
2064 store <4 x float> %2, ptr %0, align 4
2065 %index.next = add nuw i64 %index, 4
2066 %3 = icmp eq i64 %index.next, 1024
2067 br i1 %3, label %for.cond.cleanup, label %vector.body
2069 for.cond.cleanup: ; preds = %vector.body
2073 define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) {
2074 ; CHECK-LABEL: sink_splat_fma_scalable:
2075 ; CHECK: # %bb.0: # %entry
2076 ; CHECK-NEXT: csrr a2, vlenb
2077 ; CHECK-NEXT: srli a3, a2, 2
2078 ; CHECK-NEXT: li a4, 1024
2079 ; CHECK-NEXT: bgeu a4, a3, .LBB34_2
2080 ; CHECK-NEXT: # %bb.1:
2081 ; CHECK-NEXT: li a4, 0
2082 ; CHECK-NEXT: j .LBB34_5
2083 ; CHECK-NEXT: .LBB34_2: # %vector.ph
2084 ; CHECK-NEXT: addi a4, a3, -1
2085 ; CHECK-NEXT: andi a5, a4, 1024
2086 ; CHECK-NEXT: xori a4, a5, 1024
2087 ; CHECK-NEXT: mv a6, a0
2088 ; CHECK-NEXT: mv a7, a1
2089 ; CHECK-NEXT: mv t0, a4
2090 ; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
2091 ; CHECK-NEXT: .LBB34_3: # %vector.body
2092 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2093 ; CHECK-NEXT: vl1re32.v v8, (a6)
2094 ; CHECK-NEXT: vl1re32.v v9, (a7)
2095 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2096 ; CHECK-NEXT: vs1r.v v9, (a6)
2097 ; CHECK-NEXT: sub t0, t0, a3
2098 ; CHECK-NEXT: add a7, a7, a2
2099 ; CHECK-NEXT: add a6, a6, a2
2100 ; CHECK-NEXT: bnez t0, .LBB34_3
2101 ; CHECK-NEXT: # %bb.4: # %middle.block
2102 ; CHECK-NEXT: beqz a5, .LBB34_7
2103 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader
2104 ; CHECK-NEXT: slli a4, a4, 2
2105 ; CHECK-NEXT: add a2, a1, a4
2106 ; CHECK-NEXT: add a0, a0, a4
2107 ; CHECK-NEXT: lui a3, 1
2108 ; CHECK-NEXT: add a1, a1, a3
2109 ; CHECK-NEXT: .LBB34_6: # %for.body
2110 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2111 ; CHECK-NEXT: flw fa5, 0(a0)
2112 ; CHECK-NEXT: flw fa4, 0(a2)
2113 ; CHECK-NEXT: fmadd.s fa5, fa5, fa0, fa4
2114 ; CHECK-NEXT: fsw fa5, 0(a0)
2115 ; CHECK-NEXT: addi a2, a2, 4
2116 ; CHECK-NEXT: addi a0, a0, 4
2117 ; CHECK-NEXT: bne a2, a1, .LBB34_6
2118 ; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup
2121 %0 = call i64 @llvm.vscale.i64()
2123 %min.iters.check = icmp ugt i64 %1, 1024
2124 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2126 vector.ph: ; preds = %entry
2127 %2 = call i64 @llvm.vscale.i64()
2129 %n.mod.vf = urem i64 1024, %3
2130 %n.vec = sub nsw i64 1024, %n.mod.vf
2131 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
2132 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
2133 %4 = call i64 @llvm.vscale.i64()
2135 br label %vector.body
2137 vector.body: ; preds = %vector.body, %vector.ph
2138 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2139 %6 = getelementptr inbounds float, ptr %a, i64 %index
2140 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
2141 %7 = getelementptr inbounds float, ptr %b, i64 %index
2142 %wide.load12 = load <vscale x 2 x float>, ptr %7, align 4
2143 %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %wide.load, <vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load12)
2144 store <vscale x 2 x float> %8, ptr %6, align 4
2145 %index.next = add nuw i64 %index, %5
2146 %9 = icmp eq i64 %index.next, %n.vec
2147 br i1 %9, label %middle.block, label %vector.body
2149 middle.block: ; preds = %vector.body
2150 %cmp.n = icmp eq i64 %n.mod.vf, 0
2151 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2153 for.body.preheader: ; preds = %entry, %middle.block
2154 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2157 for.cond.cleanup: ; preds = %for.body, %middle.block
2160 for.body: ; preds = %for.body.preheader, %for.body
2161 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2162 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
2163 %10 = load float, ptr %arrayidx, align 4
2164 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
2165 %11 = load float, ptr %arrayidx2, align 4
2166 %12 = tail call float @llvm.fma.f32(float %10, float %x, float %11)
2167 store float %12, ptr %arrayidx, align 4
2168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2169 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2170 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2173 define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) {
2174 ; CHECK-LABEL: sink_splat_fma_commute_scalable:
2175 ; CHECK: # %bb.0: # %entry
2176 ; CHECK-NEXT: csrr a2, vlenb
2177 ; CHECK-NEXT: srli a3, a2, 2
2178 ; CHECK-NEXT: li a4, 1024
2179 ; CHECK-NEXT: bgeu a4, a3, .LBB35_2
2180 ; CHECK-NEXT: # %bb.1:
2181 ; CHECK-NEXT: li a4, 0
2182 ; CHECK-NEXT: j .LBB35_5
2183 ; CHECK-NEXT: .LBB35_2: # %vector.ph
2184 ; CHECK-NEXT: addi a4, a3, -1
2185 ; CHECK-NEXT: andi a5, a4, 1024
2186 ; CHECK-NEXT: xori a4, a5, 1024
2187 ; CHECK-NEXT: mv a6, a0
2188 ; CHECK-NEXT: mv a7, a1
2189 ; CHECK-NEXT: mv t0, a4
2190 ; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
2191 ; CHECK-NEXT: .LBB35_3: # %vector.body
2192 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2193 ; CHECK-NEXT: vl1re32.v v8, (a6)
2194 ; CHECK-NEXT: vl1re32.v v9, (a7)
2195 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2196 ; CHECK-NEXT: vs1r.v v9, (a6)
2197 ; CHECK-NEXT: sub t0, t0, a3
2198 ; CHECK-NEXT: add a7, a7, a2
2199 ; CHECK-NEXT: add a6, a6, a2
2200 ; CHECK-NEXT: bnez t0, .LBB35_3
2201 ; CHECK-NEXT: # %bb.4: # %middle.block
2202 ; CHECK-NEXT: beqz a5, .LBB35_7
2203 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader
2204 ; CHECK-NEXT: slli a4, a4, 2
2205 ; CHECK-NEXT: add a2, a1, a4
2206 ; CHECK-NEXT: add a0, a0, a4
2207 ; CHECK-NEXT: lui a3, 1
2208 ; CHECK-NEXT: add a1, a1, a3
2209 ; CHECK-NEXT: .LBB35_6: # %for.body
2210 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2211 ; CHECK-NEXT: flw fa5, 0(a0)
2212 ; CHECK-NEXT: flw fa4, 0(a2)
2213 ; CHECK-NEXT: fmadd.s fa5, fa0, fa5, fa4
2214 ; CHECK-NEXT: fsw fa5, 0(a0)
2215 ; CHECK-NEXT: addi a2, a2, 4
2216 ; CHECK-NEXT: addi a0, a0, 4
2217 ; CHECK-NEXT: bne a2, a1, .LBB35_6
2218 ; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup
2221 %0 = call i64 @llvm.vscale.i64()
2223 %min.iters.check = icmp ugt i64 %1, 1024
2224 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2226 vector.ph: ; preds = %entry
2227 %2 = call i64 @llvm.vscale.i64()
2229 %n.mod.vf = urem i64 1024, %3
2230 %n.vec = sub nsw i64 1024, %n.mod.vf
2231 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
2232 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
2233 %4 = call i64 @llvm.vscale.i64()
2235 br label %vector.body
2237 vector.body: ; preds = %vector.body, %vector.ph
2238 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2239 %6 = getelementptr inbounds float, ptr %a, i64 %index
2240 %wide.load = load <vscale x 2 x float>, ptr %6, align 4
2241 %7 = getelementptr inbounds float, ptr %b, i64 %index
2242 %wide.load12 = load <vscale x 2 x float>, ptr %7, align 4
2243 %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load, <vscale x 2 x float> %wide.load12)
2244 store <vscale x 2 x float> %8, ptr %6, align 4
2245 %index.next = add nuw i64 %index, %5
2246 %9 = icmp eq i64 %index.next, %n.vec
2247 br i1 %9, label %middle.block, label %vector.body
2249 middle.block: ; preds = %vector.body
2250 %cmp.n = icmp eq i64 %n.mod.vf, 0
2251 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2253 for.body.preheader: ; preds = %entry, %middle.block
2254 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2257 for.cond.cleanup: ; preds = %for.body, %middle.block
2260 for.body: ; preds = %for.body.preheader, %for.body
2261 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2262 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
2263 %10 = load float, ptr %arrayidx, align 4
2264 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
2265 %11 = load float, ptr %arrayidx2, align 4
2266 %12 = tail call float @llvm.fma.f32(float %x, float %10, float %11)
2267 store float %12, ptr %arrayidx, align 4
2268 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2269 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2270 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2273 declare i64 @llvm.vscale.i64()
2274 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2275 declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
2276 declare float @llvm.fma.f32(float, float, float)
2278 define void @sink_splat_icmp(ptr nocapture %x, i32 signext %y) {
2279 ; CHECK-LABEL: sink_splat_icmp:
2280 ; CHECK: # %bb.0: # %entry
2281 ; CHECK-NEXT: lui a2, 1
2282 ; CHECK-NEXT: add a2, a0, a2
2283 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2284 ; CHECK-NEXT: vmv.v.i v8, 0
2285 ; CHECK-NEXT: .LBB36_1: # %vector.body
2286 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2287 ; CHECK-NEXT: vle32.v v9, (a0)
2288 ; CHECK-NEXT: vmseq.vx v0, v9, a1
2289 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
2290 ; CHECK-NEXT: addi a0, a0, 16
2291 ; CHECK-NEXT: bne a0, a2, .LBB36_1
2292 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2295 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0
2296 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2297 br label %vector.body
2299 vector.body: ; preds = %vector.body, %entry
2300 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2301 %0 = getelementptr inbounds i32, ptr %x, i64 %index
2302 %wide.load = load <4 x i32>, ptr %0, align 4
2303 %1 = icmp eq <4 x i32> %wide.load, %broadcast.splat
2304 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
2305 %index.next = add nuw i64 %index, 4
2306 %2 = icmp eq i64 %index.next, 1024
2307 br i1 %2, label %for.cond.cleanup, label %vector.body
2309 for.cond.cleanup: ; preds = %vector.body
2312 declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
2314 define void @sink_splat_fcmp(ptr nocapture %x, float %y) {
2315 ; CHECK-LABEL: sink_splat_fcmp:
2316 ; CHECK: # %bb.0: # %entry
2317 ; CHECK-NEXT: lui a1, 1
2318 ; CHECK-NEXT: add a1, a0, a1
2319 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2320 ; CHECK-NEXT: vmv.v.i v8, 0
2321 ; CHECK-NEXT: .LBB37_1: # %vector.body
2322 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2323 ; CHECK-NEXT: vle32.v v9, (a0)
2324 ; CHECK-NEXT: vmfeq.vf v0, v9, fa0
2325 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
2326 ; CHECK-NEXT: addi a0, a0, 16
2327 ; CHECK-NEXT: bne a0, a1, .LBB37_1
2328 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2331 %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0
2332 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2333 br label %vector.body
2335 vector.body: ; preds = %vector.body, %entry
2336 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2337 %0 = getelementptr inbounds float, ptr %x, i64 %index
2338 %wide.load = load <4 x float>, ptr %0, align 4
2339 %1 = fcmp fast oeq <4 x float> %wide.load, %broadcast.splat
2340 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
2341 %index.next = add nuw i64 %index, 4
2342 %2 = icmp eq i64 %index.next, 1024
2343 br i1 %2, label %for.cond.cleanup, label %vector.body
2345 for.cond.cleanup: ; preds = %vector.body
2348 declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
2350 define void @sink_splat_udiv(ptr nocapture %a, i32 signext %x) {
2351 ; CHECK-LABEL: sink_splat_udiv:
2352 ; CHECK: # %bb.0: # %entry
2353 ; CHECK-NEXT: lui a2, 1
2354 ; CHECK-NEXT: add a2, a0, a2
2355 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2356 ; CHECK-NEXT: .LBB38_1: # %vector.body
2357 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2358 ; CHECK-NEXT: vle32.v v8, (a0)
2359 ; CHECK-NEXT: vdivu.vx v8, v8, a1
2360 ; CHECK-NEXT: vse32.v v8, (a0)
2361 ; CHECK-NEXT: addi a0, a0, 16
2362 ; CHECK-NEXT: bne a0, a2, .LBB38_1
2363 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2366 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2367 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2368 br label %vector.body
2370 vector.body: ; preds = %vector.body, %entry
2371 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2372 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2373 %wide.load = load <4 x i32>, ptr %0, align 4
2374 %1 = udiv <4 x i32> %wide.load, %broadcast.splat
2375 store <4 x i32> %1, ptr %0, align 4
2376 %index.next = add nuw i64 %index, 4
2377 %2 = icmp eq i64 %index.next, 1024
2378 br i1 %2, label %for.cond.cleanup, label %vector.body
2380 for.cond.cleanup: ; preds = %vector.body
2384 define void @sink_splat_sdiv(ptr nocapture %a, i32 signext %x) {
2385 ; CHECK-LABEL: sink_splat_sdiv:
2386 ; CHECK: # %bb.0: # %entry
2387 ; CHECK-NEXT: lui a2, 1
2388 ; CHECK-NEXT: add a2, a0, a2
2389 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2390 ; CHECK-NEXT: .LBB39_1: # %vector.body
2391 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2392 ; CHECK-NEXT: vle32.v v8, (a0)
2393 ; CHECK-NEXT: vdiv.vx v8, v8, a1
2394 ; CHECK-NEXT: vse32.v v8, (a0)
2395 ; CHECK-NEXT: addi a0, a0, 16
2396 ; CHECK-NEXT: bne a0, a2, .LBB39_1
2397 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2400 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2401 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2402 br label %vector.body
2404 vector.body: ; preds = %vector.body, %entry
2405 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2406 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2407 %wide.load = load <4 x i32>, ptr %0, align 4
2408 %1 = sdiv <4 x i32> %wide.load, %broadcast.splat
2409 store <4 x i32> %1, ptr %0, align 4
2410 %index.next = add nuw i64 %index, 4
2411 %2 = icmp eq i64 %index.next, 1024
2412 br i1 %2, label %for.cond.cleanup, label %vector.body
2414 for.cond.cleanup: ; preds = %vector.body
2418 define void @sink_splat_urem(ptr nocapture %a, i32 signext %x) {
2419 ; CHECK-LABEL: sink_splat_urem:
2420 ; CHECK: # %bb.0: # %entry
2421 ; CHECK-NEXT: lui a2, 1
2422 ; CHECK-NEXT: add a2, a0, a2
2423 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2424 ; CHECK-NEXT: .LBB40_1: # %vector.body
2425 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2426 ; CHECK-NEXT: vle32.v v8, (a0)
2427 ; CHECK-NEXT: vremu.vx v8, v8, a1
2428 ; CHECK-NEXT: vse32.v v8, (a0)
2429 ; CHECK-NEXT: addi a0, a0, 16
2430 ; CHECK-NEXT: bne a0, a2, .LBB40_1
2431 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2434 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2435 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2436 br label %vector.body
2438 vector.body: ; preds = %vector.body, %entry
2439 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2440 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2441 %wide.load = load <4 x i32>, ptr %0, align 4
2442 %1 = urem <4 x i32> %wide.load, %broadcast.splat
2443 store <4 x i32> %1, ptr %0, align 4
2444 %index.next = add nuw i64 %index, 4
2445 %2 = icmp eq i64 %index.next, 1024
2446 br i1 %2, label %for.cond.cleanup, label %vector.body
2448 for.cond.cleanup: ; preds = %vector.body
2452 define void @sink_splat_srem(ptr nocapture %a, i32 signext %x) {
2453 ; CHECK-LABEL: sink_splat_srem:
2454 ; CHECK: # %bb.0: # %entry
2455 ; CHECK-NEXT: lui a2, 1
2456 ; CHECK-NEXT: add a2, a0, a2
2457 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2458 ; CHECK-NEXT: .LBB41_1: # %vector.body
2459 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2460 ; CHECK-NEXT: vle32.v v8, (a0)
2461 ; CHECK-NEXT: vrem.vx v8, v8, a1
2462 ; CHECK-NEXT: vse32.v v8, (a0)
2463 ; CHECK-NEXT: addi a0, a0, 16
2464 ; CHECK-NEXT: bne a0, a2, .LBB41_1
2465 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2468 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2469 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2470 br label %vector.body
2472 vector.body: ; preds = %vector.body, %entry
2473 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2474 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2475 %wide.load = load <4 x i32>, ptr %0, align 4
2476 %1 = srem <4 x i32> %wide.load, %broadcast.splat
2477 store <4 x i32> %1, ptr %0, align 4
2478 %index.next = add nuw i64 %index, 4
2479 %2 = icmp eq i64 %index.next, 1024
2480 br i1 %2, label %for.cond.cleanup, label %vector.body
2482 for.cond.cleanup: ; preds = %vector.body
2486 define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
2487 ; CHECK-LABEL: sink_splat_udiv_scalable:
2488 ; CHECK: # %bb.0: # %entry
2489 ; CHECK-NEXT: csrr a5, vlenb
2490 ; CHECK-NEXT: srli a2, a5, 1
2491 ; CHECK-NEXT: li a3, 1024
2492 ; CHECK-NEXT: bgeu a3, a2, .LBB42_2
2493 ; CHECK-NEXT: # %bb.1:
2494 ; CHECK-NEXT: li a3, 0
2495 ; CHECK-NEXT: j .LBB42_5
2496 ; CHECK-NEXT: .LBB42_2: # %vector.ph
2497 ; CHECK-NEXT: addi a3, a2, -1
2498 ; CHECK-NEXT: andi a4, a3, 1024
2499 ; CHECK-NEXT: xori a3, a4, 1024
2500 ; CHECK-NEXT: slli a5, a5, 1
2501 ; CHECK-NEXT: mv a6, a0
2502 ; CHECK-NEXT: mv a7, a3
2503 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
2504 ; CHECK-NEXT: .LBB42_3: # %vector.body
2505 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2506 ; CHECK-NEXT: vl2re32.v v8, (a6)
2507 ; CHECK-NEXT: vdivu.vx v8, v8, a1
2508 ; CHECK-NEXT: vs2r.v v8, (a6)
2509 ; CHECK-NEXT: sub a7, a7, a2
2510 ; CHECK-NEXT: add a6, a6, a5
2511 ; CHECK-NEXT: bnez a7, .LBB42_3
2512 ; CHECK-NEXT: # %bb.4: # %middle.block
2513 ; CHECK-NEXT: beqz a4, .LBB42_7
2514 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader
2515 ; CHECK-NEXT: slli a2, a3, 2
2516 ; CHECK-NEXT: add a2, a0, a2
2517 ; CHECK-NEXT: lui a3, 1
2518 ; CHECK-NEXT: add a0, a0, a3
2519 ; CHECK-NEXT: .LBB42_6: # %for.body
2520 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2521 ; CHECK-NEXT: lw a3, 0(a2)
2522 ; CHECK-NEXT: divuw a3, a3, a1
2523 ; CHECK-NEXT: sw a3, 0(a2)
2524 ; CHECK-NEXT: addi a2, a2, 4
2525 ; CHECK-NEXT: bne a2, a0, .LBB42_6
2526 ; CHECK-NEXT: .LBB42_7: # %for.cond.cleanup
2529 %0 = call i64 @llvm.vscale.i64()
2531 %min.iters.check = icmp ugt i64 %1, 1024
2532 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2534 vector.ph: ; preds = %entry
2535 %2 = call i64 @llvm.vscale.i64()
2537 %n.mod.vf = urem i64 1024, %3
2538 %n.vec = sub nsw i64 1024, %n.mod.vf
2539 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2540 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2541 %4 = call i64 @llvm.vscale.i64()
2543 br label %vector.body
2545 vector.body: ; preds = %vector.body, %vector.ph
2546 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2547 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2548 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2549 %7 = udiv <vscale x 4 x i32> %wide.load, %broadcast.splat
2550 store <vscale x 4 x i32> %7, ptr %6, align 4
2551 %index.next = add nuw i64 %index, %5
2552 %8 = icmp eq i64 %index.next, %n.vec
2553 br i1 %8, label %middle.block, label %vector.body
2555 middle.block: ; preds = %vector.body
2556 %cmp.n = icmp eq i64 %n.mod.vf, 0
2557 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2559 for.body.preheader: ; preds = %entry, %middle.block
2560 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2563 for.cond.cleanup: ; preds = %for.body, %middle.block
2566 for.body: ; preds = %for.body.preheader, %for.body
2567 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2568 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2569 %9 = load i32, ptr %arrayidx, align 4
2570 %div = udiv i32 %9, %x
2571 store i32 %div, ptr %arrayidx, align 4
2572 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2573 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2574 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2577 define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
2578 ; CHECK-LABEL: sink_splat_sdiv_scalable:
2579 ; CHECK: # %bb.0: # %entry
2580 ; CHECK-NEXT: csrr a5, vlenb
2581 ; CHECK-NEXT: srli a2, a5, 1
2582 ; CHECK-NEXT: li a3, 1024
2583 ; CHECK-NEXT: bgeu a3, a2, .LBB43_2
2584 ; CHECK-NEXT: # %bb.1:
2585 ; CHECK-NEXT: li a3, 0
2586 ; CHECK-NEXT: j .LBB43_5
2587 ; CHECK-NEXT: .LBB43_2: # %vector.ph
2588 ; CHECK-NEXT: addi a3, a2, -1
2589 ; CHECK-NEXT: andi a4, a3, 1024
2590 ; CHECK-NEXT: xori a3, a4, 1024
2591 ; CHECK-NEXT: slli a5, a5, 1
2592 ; CHECK-NEXT: mv a6, a0
2593 ; CHECK-NEXT: mv a7, a3
2594 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
2595 ; CHECK-NEXT: .LBB43_3: # %vector.body
2596 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2597 ; CHECK-NEXT: vl2re32.v v8, (a6)
2598 ; CHECK-NEXT: vdiv.vx v8, v8, a1
2599 ; CHECK-NEXT: vs2r.v v8, (a6)
2600 ; CHECK-NEXT: sub a7, a7, a2
2601 ; CHECK-NEXT: add a6, a6, a5
2602 ; CHECK-NEXT: bnez a7, .LBB43_3
2603 ; CHECK-NEXT: # %bb.4: # %middle.block
2604 ; CHECK-NEXT: beqz a4, .LBB43_7
2605 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader
2606 ; CHECK-NEXT: slli a2, a3, 2
2607 ; CHECK-NEXT: add a2, a0, a2
2608 ; CHECK-NEXT: lui a3, 1
2609 ; CHECK-NEXT: add a0, a0, a3
2610 ; CHECK-NEXT: .LBB43_6: # %for.body
2611 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2612 ; CHECK-NEXT: lw a3, 0(a2)
2613 ; CHECK-NEXT: divw a3, a3, a1
2614 ; CHECK-NEXT: sw a3, 0(a2)
2615 ; CHECK-NEXT: addi a2, a2, 4
2616 ; CHECK-NEXT: bne a2, a0, .LBB43_6
2617 ; CHECK-NEXT: .LBB43_7: # %for.cond.cleanup
2620 %0 = call i64 @llvm.vscale.i64()
2622 %min.iters.check = icmp ugt i64 %1, 1024
2623 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2625 vector.ph: ; preds = %entry
2626 %2 = call i64 @llvm.vscale.i64()
2628 %n.mod.vf = urem i64 1024, %3
2629 %n.vec = sub nsw i64 1024, %n.mod.vf
2630 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2631 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2632 %4 = call i64 @llvm.vscale.i64()
2634 br label %vector.body
2636 vector.body: ; preds = %vector.body, %vector.ph
2637 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2638 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2639 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2640 %7 = sdiv <vscale x 4 x i32> %wide.load, %broadcast.splat
2641 store <vscale x 4 x i32> %7, ptr %6, align 4
2642 %index.next = add nuw i64 %index, %5
2643 %8 = icmp eq i64 %index.next, %n.vec
2644 br i1 %8, label %middle.block, label %vector.body
2646 middle.block: ; preds = %vector.body
2647 %cmp.n = icmp eq i64 %n.mod.vf, 0
2648 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2650 for.body.preheader: ; preds = %entry, %middle.block
2651 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2654 for.cond.cleanup: ; preds = %for.body, %middle.block
2657 for.body: ; preds = %for.body.preheader, %for.body
2658 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2659 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2660 %9 = load i32, ptr %arrayidx, align 4
2661 %div = sdiv i32 %9, %x
2662 store i32 %div, ptr %arrayidx, align 4
2663 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2664 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2665 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2668 define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
2669 ; CHECK-LABEL: sink_splat_urem_scalable:
2670 ; CHECK: # %bb.0: # %entry
2671 ; CHECK-NEXT: csrr a5, vlenb
2672 ; CHECK-NEXT: srli a2, a5, 1
2673 ; CHECK-NEXT: li a3, 1024
2674 ; CHECK-NEXT: bgeu a3, a2, .LBB44_2
2675 ; CHECK-NEXT: # %bb.1:
2676 ; CHECK-NEXT: li a3, 0
2677 ; CHECK-NEXT: j .LBB44_5
2678 ; CHECK-NEXT: .LBB44_2: # %vector.ph
2679 ; CHECK-NEXT: addi a3, a2, -1
2680 ; CHECK-NEXT: andi a4, a3, 1024
2681 ; CHECK-NEXT: xori a3, a4, 1024
2682 ; CHECK-NEXT: slli a5, a5, 1
2683 ; CHECK-NEXT: mv a6, a0
2684 ; CHECK-NEXT: mv a7, a3
2685 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
2686 ; CHECK-NEXT: .LBB44_3: # %vector.body
2687 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2688 ; CHECK-NEXT: vl2re32.v v8, (a6)
2689 ; CHECK-NEXT: vremu.vx v8, v8, a1
2690 ; CHECK-NEXT: vs2r.v v8, (a6)
2691 ; CHECK-NEXT: sub a7, a7, a2
2692 ; CHECK-NEXT: add a6, a6, a5
2693 ; CHECK-NEXT: bnez a7, .LBB44_3
2694 ; CHECK-NEXT: # %bb.4: # %middle.block
2695 ; CHECK-NEXT: beqz a4, .LBB44_7
2696 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader
2697 ; CHECK-NEXT: slli a2, a3, 2
2698 ; CHECK-NEXT: add a2, a0, a2
2699 ; CHECK-NEXT: lui a3, 1
2700 ; CHECK-NEXT: add a0, a0, a3
2701 ; CHECK-NEXT: .LBB44_6: # %for.body
2702 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2703 ; CHECK-NEXT: lw a3, 0(a2)
2704 ; CHECK-NEXT: remuw a3, a3, a1
2705 ; CHECK-NEXT: sw a3, 0(a2)
2706 ; CHECK-NEXT: addi a2, a2, 4
2707 ; CHECK-NEXT: bne a2, a0, .LBB44_6
2708 ; CHECK-NEXT: .LBB44_7: # %for.cond.cleanup
2711 %0 = call i64 @llvm.vscale.i64()
2713 %min.iters.check = icmp ugt i64 %1, 1024
2714 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2716 vector.ph: ; preds = %entry
2717 %2 = call i64 @llvm.vscale.i64()
2719 %n.mod.vf = urem i64 1024, %3
2720 %n.vec = sub nsw i64 1024, %n.mod.vf
2721 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2722 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2723 %4 = call i64 @llvm.vscale.i64()
2725 br label %vector.body
2727 vector.body: ; preds = %vector.body, %vector.ph
2728 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2729 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2730 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2731 %7 = urem <vscale x 4 x i32> %wide.load, %broadcast.splat
2732 store <vscale x 4 x i32> %7, ptr %6, align 4
2733 %index.next = add nuw i64 %index, %5
2734 %8 = icmp eq i64 %index.next, %n.vec
2735 br i1 %8, label %middle.block, label %vector.body
2737 middle.block: ; preds = %vector.body
2738 %cmp.n = icmp eq i64 %n.mod.vf, 0
2739 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2741 for.body.preheader: ; preds = %entry, %middle.block
2742 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2745 for.cond.cleanup: ; preds = %for.body, %middle.block
2748 for.body: ; preds = %for.body.preheader, %for.body
2749 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2750 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2751 %9 = load i32, ptr %arrayidx, align 4
2752 %rem = urem i32 %9, %x
2753 store i32 %rem, ptr %arrayidx, align 4
2754 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2755 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2756 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2759 define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
2760 ; CHECK-LABEL: sink_splat_srem_scalable:
2761 ; CHECK: # %bb.0: # %entry
2762 ; CHECK-NEXT: csrr a5, vlenb
2763 ; CHECK-NEXT: srli a2, a5, 1
2764 ; CHECK-NEXT: li a3, 1024
2765 ; CHECK-NEXT: bgeu a3, a2, .LBB45_2
2766 ; CHECK-NEXT: # %bb.1:
2767 ; CHECK-NEXT: li a3, 0
2768 ; CHECK-NEXT: j .LBB45_5
2769 ; CHECK-NEXT: .LBB45_2: # %vector.ph
2770 ; CHECK-NEXT: addi a3, a2, -1
2771 ; CHECK-NEXT: andi a4, a3, 1024
2772 ; CHECK-NEXT: xori a3, a4, 1024
2773 ; CHECK-NEXT: slli a5, a5, 1
2774 ; CHECK-NEXT: mv a6, a0
2775 ; CHECK-NEXT: mv a7, a3
2776 ; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
2777 ; CHECK-NEXT: .LBB45_3: # %vector.body
2778 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2779 ; CHECK-NEXT: vl2re32.v v8, (a6)
2780 ; CHECK-NEXT: vrem.vx v8, v8, a1
2781 ; CHECK-NEXT: vs2r.v v8, (a6)
2782 ; CHECK-NEXT: sub a7, a7, a2
2783 ; CHECK-NEXT: add a6, a6, a5
2784 ; CHECK-NEXT: bnez a7, .LBB45_3
2785 ; CHECK-NEXT: # %bb.4: # %middle.block
2786 ; CHECK-NEXT: beqz a4, .LBB45_7
2787 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader
2788 ; CHECK-NEXT: slli a2, a3, 2
2789 ; CHECK-NEXT: add a2, a0, a2
2790 ; CHECK-NEXT: lui a3, 1
2791 ; CHECK-NEXT: add a0, a0, a3
2792 ; CHECK-NEXT: .LBB45_6: # %for.body
2793 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2794 ; CHECK-NEXT: lw a3, 0(a2)
2795 ; CHECK-NEXT: remw a3, a3, a1
2796 ; CHECK-NEXT: sw a3, 0(a2)
2797 ; CHECK-NEXT: addi a2, a2, 4
2798 ; CHECK-NEXT: bne a2, a0, .LBB45_6
2799 ; CHECK-NEXT: .LBB45_7: # %for.cond.cleanup
2802 %0 = call i64 @llvm.vscale.i64()
2804 %min.iters.check = icmp ugt i64 %1, 1024
2805 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2807 vector.ph: ; preds = %entry
2808 %2 = call i64 @llvm.vscale.i64()
2810 %n.mod.vf = urem i64 1024, %3
2811 %n.vec = sub nsw i64 1024, %n.mod.vf
2812 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2813 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2814 %4 = call i64 @llvm.vscale.i64()
2816 br label %vector.body
2818 vector.body: ; preds = %vector.body, %vector.ph
2819 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2820 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2821 %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2822 %7 = srem <vscale x 4 x i32> %wide.load, %broadcast.splat
2823 store <vscale x 4 x i32> %7, ptr %6, align 4
2824 %index.next = add nuw i64 %index, %5
2825 %8 = icmp eq i64 %index.next, %n.vec
2826 br i1 %8, label %middle.block, label %vector.body
2828 middle.block: ; preds = %vector.body
2829 %cmp.n = icmp eq i64 %n.mod.vf, 0
2830 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2832 for.body.preheader: ; preds = %entry, %middle.block
2833 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2836 for.cond.cleanup: ; preds = %for.body, %middle.block
2839 for.body: ; preds = %for.body.preheader, %for.body
2840 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2841 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2842 %9 = load i32, ptr %arrayidx, align 4
2843 %rem = srem i32 %9, %x
2844 store i32 %rem, ptr %arrayidx, align 4
2845 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2846 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2847 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2850 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
2852 define void @sink_splat_min(ptr nocapture %a, i32 signext %x) {
2853 ; CHECK-LABEL: sink_splat_min:
2854 ; CHECK: # %bb.0: # %entry
2855 ; CHECK-NEXT: li a2, 1024
2856 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2857 ; CHECK-NEXT: .LBB46_1: # %vector.body
2858 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2859 ; CHECK-NEXT: vle32.v v8, (a0)
2860 ; CHECK-NEXT: vmin.vx v8, v8, a1
2861 ; CHECK-NEXT: vse32.v v8, (a0)
2862 ; CHECK-NEXT: addi a2, a2, 4
2863 ; CHECK-NEXT: addi a0, a0, -16
2864 ; CHECK-NEXT: bnez a2, .LBB46_1
2865 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2868 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2869 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2870 br label %vector.body
2872 vector.body: ; preds = %vector.body, %entry
2873 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2874 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2875 %wide.load = load <4 x i32>, ptr %0, align 4
2876 %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
2877 store <4 x i32> %1, ptr %0, align 4
2878 %index.next = sub nuw i64 %index, 4
2879 %2 = icmp eq i64 %index.next, 1024
2880 br i1 %2, label %for.cond.cleanup, label %vector.body
2882 for.cond.cleanup: ; preds = %vector.body
2886 define void @sink_splat_min_commute(ptr nocapture %a, i32 signext %x) {
2887 ; CHECK-LABEL: sink_splat_min_commute:
2888 ; CHECK: # %bb.0: # %entry
2889 ; CHECK-NEXT: li a2, 1024
2890 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2891 ; CHECK-NEXT: .LBB47_1: # %vector.body
2892 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2893 ; CHECK-NEXT: vle32.v v8, (a0)
2894 ; CHECK-NEXT: vmin.vx v8, v8, a1
2895 ; CHECK-NEXT: vse32.v v8, (a0)
2896 ; CHECK-NEXT: addi a2, a2, 4
2897 ; CHECK-NEXT: addi a0, a0, -16
2898 ; CHECK-NEXT: bnez a2, .LBB47_1
2899 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2902 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2903 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2904 br label %vector.body
2906 vector.body: ; preds = %vector.body, %entry
2907 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2908 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2909 %wide.load = load <4 x i32>, ptr %0, align 4
2910 %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
2911 store <4 x i32> %1, ptr %0, align 4
2912 %index.next = sub nuw i64 %index, 4
2913 %2 = icmp eq i64 %index.next, 1024
2914 br i1 %2, label %for.cond.cleanup, label %vector.body
2916 for.cond.cleanup: ; preds = %vector.body
2920 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
2922 define void @sink_splat_max(ptr nocapture %a, i32 signext %x) {
2923 ; CHECK-LABEL: sink_splat_max:
2924 ; CHECK: # %bb.0: # %entry
2925 ; CHECK-NEXT: li a2, 1024
2926 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2927 ; CHECK-NEXT: .LBB48_1: # %vector.body
2928 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2929 ; CHECK-NEXT: vle32.v v8, (a0)
2930 ; CHECK-NEXT: vmax.vx v8, v8, a1
2931 ; CHECK-NEXT: vse32.v v8, (a0)
2932 ; CHECK-NEXT: addi a2, a2, 4
2933 ; CHECK-NEXT: addi a0, a0, -16
2934 ; CHECK-NEXT: bnez a2, .LBB48_1
2935 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2938 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2939 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2940 br label %vector.body
2942 vector.body: ; preds = %vector.body, %entry
2943 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2944 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2945 %wide.load = load <4 x i32>, ptr %0, align 4
2946 %1 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
2947 store <4 x i32> %1, ptr %0, align 4
2948 %index.next = sub nuw i64 %index, 4
2949 %2 = icmp eq i64 %index.next, 1024
2950 br i1 %2, label %for.cond.cleanup, label %vector.body
2952 for.cond.cleanup: ; preds = %vector.body
2956 define void @sink_splat_max_commute(ptr nocapture %a, i32 signext %x) {
2957 ; CHECK-LABEL: sink_splat_max_commute:
2958 ; CHECK: # %bb.0: # %entry
2959 ; CHECK-NEXT: li a2, 1024
2960 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2961 ; CHECK-NEXT: .LBB49_1: # %vector.body
2962 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2963 ; CHECK-NEXT: vle32.v v8, (a0)
2964 ; CHECK-NEXT: vmax.vx v8, v8, a1
2965 ; CHECK-NEXT: vse32.v v8, (a0)
2966 ; CHECK-NEXT: addi a2, a2, 4
2967 ; CHECK-NEXT: addi a0, a0, -16
2968 ; CHECK-NEXT: bnez a2, .LBB49_1
2969 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2972 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2973 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2974 br label %vector.body
2976 vector.body: ; preds = %vector.body, %entry
2977 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2978 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2979 %wide.load = load <4 x i32>, ptr %0, align 4
2980 %1 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
2981 store <4 x i32> %1, ptr %0, align 4
2982 %index.next = sub nuw i64 %index, 4
2983 %2 = icmp eq i64 %index.next, 1024
2984 br i1 %2, label %for.cond.cleanup, label %vector.body
2986 for.cond.cleanup: ; preds = %vector.body
2990 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
2992 define void @sink_splat_umin(ptr nocapture %a, i32 signext %x) {
2993 ; CHECK-LABEL: sink_splat_umin:
2994 ; CHECK: # %bb.0: # %entry
2995 ; CHECK-NEXT: li a2, 1024
2996 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2997 ; CHECK-NEXT: .LBB50_1: # %vector.body
2998 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2999 ; CHECK-NEXT: vle32.v v8, (a0)
3000 ; CHECK-NEXT: vminu.vx v8, v8, a1
3001 ; CHECK-NEXT: vse32.v v8, (a0)
3002 ; CHECK-NEXT: addi a2, a2, 4
3003 ; CHECK-NEXT: addi a0, a0, -16
3004 ; CHECK-NEXT: bnez a2, .LBB50_1
3005 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3008 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3009 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3010 br label %vector.body
3012 vector.body: ; preds = %vector.body, %entry
3013 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3014 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3015 %wide.load = load <4 x i32>, ptr %0, align 4
3016 %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3017 store <4 x i32> %1, ptr %0, align 4
3018 %index.next = sub nuw i64 %index, 4
3019 %2 = icmp eq i64 %index.next, 1024
3020 br i1 %2, label %for.cond.cleanup, label %vector.body
3022 for.cond.cleanup: ; preds = %vector.body
3026 define void @sink_splat_umin_commute(ptr nocapture %a, i32 signext %x) {
3027 ; CHECK-LABEL: sink_splat_umin_commute:
3028 ; CHECK: # %bb.0: # %entry
3029 ; CHECK-NEXT: li a2, 1024
3030 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3031 ; CHECK-NEXT: .LBB51_1: # %vector.body
3032 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3033 ; CHECK-NEXT: vle32.v v8, (a0)
3034 ; CHECK-NEXT: vminu.vx v8, v8, a1
3035 ; CHECK-NEXT: vse32.v v8, (a0)
3036 ; CHECK-NEXT: addi a2, a2, 4
3037 ; CHECK-NEXT: addi a0, a0, -16
3038 ; CHECK-NEXT: bnez a2, .LBB51_1
3039 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3042 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3043 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3044 br label %vector.body
3046 vector.body: ; preds = %vector.body, %entry
3047 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3048 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3049 %wide.load = load <4 x i32>, ptr %0, align 4
3050 %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3051 store <4 x i32> %1, ptr %0, align 4
3052 %index.next = sub nuw i64 %index, 4
3053 %2 = icmp eq i64 %index.next, 1024
3054 br i1 %2, label %for.cond.cleanup, label %vector.body
3056 for.cond.cleanup: ; preds = %vector.body
3060 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
3062 define void @sink_splat_umax(ptr nocapture %a, i32 signext %x) {
3063 ; CHECK-LABEL: sink_splat_umax:
3064 ; CHECK: # %bb.0: # %entry
3065 ; CHECK-NEXT: li a2, 1024
3066 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3067 ; CHECK-NEXT: .LBB52_1: # %vector.body
3068 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3069 ; CHECK-NEXT: vle32.v v8, (a0)
3070 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
3071 ; CHECK-NEXT: vse32.v v8, (a0)
3072 ; CHECK-NEXT: addi a2, a2, 4
3073 ; CHECK-NEXT: addi a0, a0, -16
3074 ; CHECK-NEXT: bnez a2, .LBB52_1
3075 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3078 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3079 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3080 br label %vector.body
3082 vector.body: ; preds = %vector.body, %entry
3083 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3084 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3085 %wide.load = load <4 x i32>, ptr %0, align 4
3086 %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3087 store <4 x i32> %1, ptr %0, align 4
3088 %index.next = sub nuw i64 %index, 4
3089 %2 = icmp eq i64 %index.next, 1024
3090 br i1 %2, label %for.cond.cleanup, label %vector.body
3092 for.cond.cleanup: ; preds = %vector.body
3096 define void @sink_splat_umax_commute(ptr nocapture %a, i32 signext %x) {
3097 ; CHECK-LABEL: sink_splat_umax_commute:
3098 ; CHECK: # %bb.0: # %entry
3099 ; CHECK-NEXT: li a2, 1024
3100 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3101 ; CHECK-NEXT: .LBB53_1: # %vector.body
3102 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3103 ; CHECK-NEXT: vle32.v v8, (a0)
3104 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
3105 ; CHECK-NEXT: vse32.v v8, (a0)
3106 ; CHECK-NEXT: addi a2, a2, 4
3107 ; CHECK-NEXT: addi a0, a0, -16
3108 ; CHECK-NEXT: bnez a2, .LBB53_1
3109 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3112 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3113 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3114 br label %vector.body
3116 vector.body: ; preds = %vector.body, %entry
3117 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3118 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3119 %wide.load = load <4 x i32>, ptr %0, align 4
3120 %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3121 store <4 x i32> %1, ptr %0, align 4
3122 %index.next = sub nuw i64 %index, 4
3123 %2 = icmp eq i64 %index.next, 1024
3124 br i1 %2, label %for.cond.cleanup, label %vector.body
3126 for.cond.cleanup: ; preds = %vector.body
3130 declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
3132 define void @sink_splat_sadd_sat(ptr nocapture %a, i32 signext %x) {
3133 ; CHECK-LABEL: sink_splat_sadd_sat:
3134 ; CHECK: # %bb.0: # %entry
3135 ; CHECK-NEXT: lui a2, 1
3136 ; CHECK-NEXT: add a2, a0, a2
3137 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3138 ; CHECK-NEXT: .LBB54_1: # %vector.body
3139 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3140 ; CHECK-NEXT: vle32.v v8, (a0)
3141 ; CHECK-NEXT: vsadd.vx v8, v8, a1
3142 ; CHECK-NEXT: vse32.v v8, (a0)
3143 ; CHECK-NEXT: addi a0, a0, 16
3144 ; CHECK-NEXT: bne a0, a2, .LBB54_1
3145 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3148 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3149 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3150 br label %vector.body
3152 vector.body: ; preds = %vector.body, %entry
3153 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3154 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3155 %wide.load = load <4 x i32>, ptr %0, align 4
3156 %1 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3157 store <4 x i32> %1, ptr %0, align 4
3158 %index.next = add nuw i64 %index, 4
3159 %2 = icmp eq i64 %index.next, 1024
3160 br i1 %2, label %for.cond.cleanup, label %vector.body
3162 for.cond.cleanup: ; preds = %vector.body
3166 define void @sink_splat_sadd_sat_commute(ptr nocapture %a, i32 signext %x) {
3167 ; CHECK-LABEL: sink_splat_sadd_sat_commute:
3168 ; CHECK: # %bb.0: # %entry
3169 ; CHECK-NEXT: lui a2, 1
3170 ; CHECK-NEXT: add a2, a0, a2
3171 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3172 ; CHECK-NEXT: .LBB55_1: # %vector.body
3173 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3174 ; CHECK-NEXT: vle32.v v8, (a0)
3175 ; CHECK-NEXT: vsadd.vx v8, v8, a1
3176 ; CHECK-NEXT: vse32.v v8, (a0)
3177 ; CHECK-NEXT: addi a0, a0, 16
3178 ; CHECK-NEXT: bne a0, a2, .LBB55_1
3179 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3182 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3183 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3184 br label %vector.body
3186 vector.body: ; preds = %vector.body, %entry
3187 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3188 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3189 %wide.load = load <4 x i32>, ptr %0, align 4
3190 %1 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3191 store <4 x i32> %1, ptr %0, align 4
3192 %index.next = add nuw i64 %index, 4
3193 %2 = icmp eq i64 %index.next, 1024
3194 br i1 %2, label %for.cond.cleanup, label %vector.body
3196 for.cond.cleanup: ; preds = %vector.body
3200 declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
3202 define void @sink_splat_ssub_sat(ptr nocapture %a, i32 signext %x) {
3203 ; CHECK-LABEL: sink_splat_ssub_sat:
3204 ; CHECK: # %bb.0: # %entry
3205 ; CHECK-NEXT: li a2, 1024
3206 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3207 ; CHECK-NEXT: .LBB56_1: # %vector.body
3208 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3209 ; CHECK-NEXT: vle32.v v8, (a0)
3210 ; CHECK-NEXT: vssub.vx v8, v8, a1
3211 ; CHECK-NEXT: vse32.v v8, (a0)
3212 ; CHECK-NEXT: addi a2, a2, 4
3213 ; CHECK-NEXT: addi a0, a0, -16
3214 ; CHECK-NEXT: bnez a2, .LBB56_1
3215 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3218 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3219 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3220 br label %vector.body
3222 vector.body: ; preds = %vector.body, %entry
3223 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3224 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3225 %wide.load = load <4 x i32>, ptr %0, align 4
3226 %1 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3227 store <4 x i32> %1, ptr %0, align 4
3228 %index.next = sub nuw i64 %index, 4
3229 %2 = icmp eq i64 %index.next, 1024
3230 br i1 %2, label %for.cond.cleanup, label %vector.body
3232 for.cond.cleanup: ; preds = %vector.body
3236 declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
3238 define void @sink_splat_uadd_sat(ptr nocapture %a, i32 signext %x) {
3239 ; CHECK-LABEL: sink_splat_uadd_sat:
3240 ; CHECK: # %bb.0: # %entry
3241 ; CHECK-NEXT: lui a2, 1
3242 ; CHECK-NEXT: add a2, a0, a2
3243 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3244 ; CHECK-NEXT: .LBB57_1: # %vector.body
3245 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3246 ; CHECK-NEXT: vle32.v v8, (a0)
3247 ; CHECK-NEXT: vsaddu.vx v8, v8, a1
3248 ; CHECK-NEXT: vse32.v v8, (a0)
3249 ; CHECK-NEXT: addi a0, a0, 16
3250 ; CHECK-NEXT: bne a0, a2, .LBB57_1
3251 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3254 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3255 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3256 br label %vector.body
3258 vector.body: ; preds = %vector.body, %entry
3259 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3260 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3261 %wide.load = load <4 x i32>, ptr %0, align 4
3262 %1 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3263 store <4 x i32> %1, ptr %0, align 4
3264 %index.next = add nuw i64 %index, 4
3265 %2 = icmp eq i64 %index.next, 1024
3266 br i1 %2, label %for.cond.cleanup, label %vector.body
3268 for.cond.cleanup: ; preds = %vector.body
3272 define void @sink_splat_uadd_sat_commute(ptr nocapture %a, i32 signext %x) {
3273 ; CHECK-LABEL: sink_splat_uadd_sat_commute:
3274 ; CHECK: # %bb.0: # %entry
3275 ; CHECK-NEXT: lui a2, 1
3276 ; CHECK-NEXT: add a2, a0, a2
3277 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3278 ; CHECK-NEXT: .LBB58_1: # %vector.body
3279 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3280 ; CHECK-NEXT: vle32.v v8, (a0)
3281 ; CHECK-NEXT: vsaddu.vx v8, v8, a1
3282 ; CHECK-NEXT: vse32.v v8, (a0)
3283 ; CHECK-NEXT: addi a0, a0, 16
3284 ; CHECK-NEXT: bne a0, a2, .LBB58_1
3285 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3288 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3289 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3290 br label %vector.body
3292 vector.body: ; preds = %vector.body, %entry
3293 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3294 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3295 %wide.load = load <4 x i32>, ptr %0, align 4
3296 %1 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3297 store <4 x i32> %1, ptr %0, align 4
3298 %index.next = add nuw i64 %index, 4
3299 %2 = icmp eq i64 %index.next, 1024
3300 br i1 %2, label %for.cond.cleanup, label %vector.body
3302 for.cond.cleanup: ; preds = %vector.body
3306 declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
3308 define void @sink_splat_usub_sat(ptr nocapture %a, i32 signext %x) {
3309 ; CHECK-LABEL: sink_splat_usub_sat:
3310 ; CHECK: # %bb.0: # %entry
3311 ; CHECK-NEXT: li a2, 1024
3312 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3313 ; CHECK-NEXT: .LBB59_1: # %vector.body
3314 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3315 ; CHECK-NEXT: vle32.v v8, (a0)
3316 ; CHECK-NEXT: vssubu.vx v8, v8, a1
3317 ; CHECK-NEXT: vse32.v v8, (a0)
3318 ; CHECK-NEXT: addi a2, a2, 4
3319 ; CHECK-NEXT: addi a0, a0, -16
3320 ; CHECK-NEXT: bnez a2, .LBB59_1
3321 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3324 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3325 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3326 br label %vector.body
3328 vector.body: ; preds = %vector.body, %entry
3329 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3330 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3331 %wide.load = load <4 x i32>, ptr %0, align 4
3332 %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3333 store <4 x i32> %1, ptr %0, align 4
3334 %index.next = sub nuw i64 %index, 4
3335 %2 = icmp eq i64 %index.next, 1024
3336 br i1 %2, label %for.cond.cleanup, label %vector.body
3338 for.cond.cleanup: ; preds = %vector.body
3342 declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3344 define void @sink_splat_vp_mul(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3345 ; CHECK-LABEL: sink_splat_vp_mul:
3346 ; CHECK: # %bb.0: # %entry
3347 ; CHECK-NEXT: lui a3, 1
3348 ; CHECK-NEXT: add a3, a0, a3
3349 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3350 ; CHECK-NEXT: .LBB60_1: # %vector.body
3351 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3352 ; CHECK-NEXT: vle32.v v8, (a0)
3353 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3354 ; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t
3355 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3356 ; CHECK-NEXT: vse32.v v8, (a0)
3357 ; CHECK-NEXT: addi a0, a0, 16
3358 ; CHECK-NEXT: bne a0, a3, .LBB60_1
3359 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3362 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3363 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3364 br label %vector.body
3366 vector.body: ; preds = %vector.body, %entry
3367 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3368 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3369 %wide.load = load <4 x i32>, ptr %0, align 4
3370 %1 = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3371 store <4 x i32> %1, ptr %0, align 4
3372 %index.next = add nuw i64 %index, 4
3373 %2 = icmp eq i64 %index.next, 1024
3374 br i1 %2, label %for.cond.cleanup, label %vector.body
3376 for.cond.cleanup: ; preds = %vector.body
3380 declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3382 define void @sink_splat_vp_add(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3383 ; CHECK-LABEL: sink_splat_vp_add:
3384 ; CHECK: # %bb.0: # %entry
3385 ; CHECK-NEXT: lui a3, 1
3386 ; CHECK-NEXT: add a3, a0, a3
3387 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3388 ; CHECK-NEXT: .LBB61_1: # %vector.body
3389 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3390 ; CHECK-NEXT: vle32.v v8, (a0)
3391 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3392 ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
3393 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3394 ; CHECK-NEXT: vse32.v v8, (a0)
3395 ; CHECK-NEXT: addi a0, a0, 16
3396 ; CHECK-NEXT: bne a0, a3, .LBB61_1
3397 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3400 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3401 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3402 br label %vector.body
3404 vector.body: ; preds = %vector.body, %entry
3405 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3406 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3407 %wide.load = load <4 x i32>, ptr %0, align 4
3408 %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3409 store <4 x i32> %1, ptr %0, align 4
3410 %index.next = add nuw i64 %index, 4
3411 %2 = icmp eq i64 %index.next, 1024
3412 br i1 %2, label %for.cond.cleanup, label %vector.body
3414 for.cond.cleanup: ; preds = %vector.body
3418 define void @sink_splat_vp_add_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3419 ; CHECK-LABEL: sink_splat_vp_add_commute:
3420 ; CHECK: # %bb.0: # %entry
3421 ; CHECK-NEXT: lui a3, 1
3422 ; CHECK-NEXT: add a3, a0, a3
3423 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3424 ; CHECK-NEXT: .LBB62_1: # %vector.body
3425 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3426 ; CHECK-NEXT: vle32.v v8, (a0)
3427 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3428 ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
3429 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3430 ; CHECK-NEXT: vse32.v v8, (a0)
3431 ; CHECK-NEXT: addi a0, a0, 16
3432 ; CHECK-NEXT: bne a0, a3, .LBB62_1
3433 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3436 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3437 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3438 br label %vector.body
3440 vector.body: ; preds = %vector.body, %entry
3441 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3442 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3443 %wide.load = load <4 x i32>, ptr %0, align 4
3444 %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
3445 store <4 x i32> %1, ptr %0, align 4
3446 %index.next = add nuw i64 %index, 4
3447 %2 = icmp eq i64 %index.next, 1024
3448 br i1 %2, label %for.cond.cleanup, label %vector.body
3450 for.cond.cleanup: ; preds = %vector.body
3454 declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3456 define void @sink_splat_vp_sub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3457 ; CHECK-LABEL: sink_splat_vp_sub:
3458 ; CHECK: # %bb.0: # %entry
3459 ; CHECK-NEXT: lui a3, 1
3460 ; CHECK-NEXT: add a3, a0, a3
3461 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3462 ; CHECK-NEXT: .LBB63_1: # %vector.body
3463 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3464 ; CHECK-NEXT: vle32.v v8, (a0)
3465 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3466 ; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
3467 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3468 ; CHECK-NEXT: vse32.v v8, (a0)
3469 ; CHECK-NEXT: addi a0, a0, 16
3470 ; CHECK-NEXT: bne a0, a3, .LBB63_1
3471 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3474 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3475 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3476 br label %vector.body
3478 vector.body: ; preds = %vector.body, %entry
3479 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3480 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3481 %wide.load = load <4 x i32>, ptr %0, align 4
3482 %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3483 store <4 x i32> %1, ptr %0, align 4
3484 %index.next = add nuw i64 %index, 4
3485 %2 = icmp eq i64 %index.next, 1024
3486 br i1 %2, label %for.cond.cleanup, label %vector.body
3488 for.cond.cleanup: ; preds = %vector.body
3492 define void @sink_splat_vp_rsub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3493 ; CHECK-LABEL: sink_splat_vp_rsub:
3494 ; CHECK: # %bb.0: # %entry
3495 ; CHECK-NEXT: lui a3, 1
3496 ; CHECK-NEXT: add a3, a0, a3
3497 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3498 ; CHECK-NEXT: .LBB64_1: # %vector.body
3499 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3500 ; CHECK-NEXT: vle32.v v8, (a0)
3501 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3502 ; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t
3503 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3504 ; CHECK-NEXT: vse32.v v8, (a0)
3505 ; CHECK-NEXT: addi a0, a0, 16
3506 ; CHECK-NEXT: bne a0, a3, .LBB64_1
3507 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3510 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3511 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3512 br label %vector.body
3514 vector.body: ; preds = %vector.body, %entry
3515 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3516 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3517 %wide.load = load <4 x i32>, ptr %0, align 4
3518 %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
3519 store <4 x i32> %1, ptr %0, align 4
3520 %index.next = add nuw i64 %index, 4
3521 %2 = icmp eq i64 %index.next, 1024
3522 br i1 %2, label %for.cond.cleanup, label %vector.body
3524 for.cond.cleanup: ; preds = %vector.body
3528 declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3530 define void @sink_splat_vp_shl(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3531 ; CHECK-LABEL: sink_splat_vp_shl:
3532 ; CHECK: # %bb.0: # %entry
3533 ; CHECK-NEXT: lui a3, 1
3534 ; CHECK-NEXT: add a3, a0, a3
3535 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3536 ; CHECK-NEXT: .LBB65_1: # %vector.body
3537 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3538 ; CHECK-NEXT: vle32.v v8, (a0)
3539 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3540 ; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t
3541 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3542 ; CHECK-NEXT: vse32.v v8, (a0)
3543 ; CHECK-NEXT: addi a0, a0, 16
3544 ; CHECK-NEXT: bne a0, a3, .LBB65_1
3545 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3548 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3549 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3550 br label %vector.body
3552 vector.body: ; preds = %vector.body, %entry
3553 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3554 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3555 %wide.load = load <4 x i32>, ptr %0, align 4
3556 %1 = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3557 store <4 x i32> %1, ptr %0, align 4
3558 %index.next = add nuw i64 %index, 4
3559 %2 = icmp eq i64 %index.next, 1024
3560 br i1 %2, label %for.cond.cleanup, label %vector.body
3562 for.cond.cleanup: ; preds = %vector.body
3566 declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3568 define void @sink_splat_vp_lshr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3569 ; CHECK-LABEL: sink_splat_vp_lshr:
3570 ; CHECK: # %bb.0: # %entry
3571 ; CHECK-NEXT: lui a3, 1
3572 ; CHECK-NEXT: add a3, a0, a3
3573 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3574 ; CHECK-NEXT: .LBB66_1: # %vector.body
3575 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3576 ; CHECK-NEXT: vle32.v v8, (a0)
3577 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3578 ; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
3579 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3580 ; CHECK-NEXT: vse32.v v8, (a0)
3581 ; CHECK-NEXT: addi a0, a0, 16
3582 ; CHECK-NEXT: bne a0, a3, .LBB66_1
3583 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3586 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3587 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3588 br label %vector.body
3590 vector.body: ; preds = %vector.body, %entry
3591 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3592 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3593 %wide.load = load <4 x i32>, ptr %0, align 4
3594 %1 = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3595 store <4 x i32> %1, ptr %0, align 4
3596 %index.next = add nuw i64 %index, 4
3597 %2 = icmp eq i64 %index.next, 1024
3598 br i1 %2, label %for.cond.cleanup, label %vector.body
3600 for.cond.cleanup: ; preds = %vector.body
3604 declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3606 define void @sink_splat_vp_ashr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3607 ; CHECK-LABEL: sink_splat_vp_ashr:
3608 ; CHECK: # %bb.0: # %entry
3609 ; CHECK-NEXT: lui a3, 1
3610 ; CHECK-NEXT: add a3, a0, a3
3611 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3612 ; CHECK-NEXT: .LBB67_1: # %vector.body
3613 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3614 ; CHECK-NEXT: vle32.v v8, (a0)
3615 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3616 ; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t
3617 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3618 ; CHECK-NEXT: vse32.v v8, (a0)
3619 ; CHECK-NEXT: addi a0, a0, 16
3620 ; CHECK-NEXT: bne a0, a3, .LBB67_1
3621 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3624 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3625 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3626 br label %vector.body
3628 vector.body: ; preds = %vector.body, %entry
3629 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3630 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3631 %wide.load = load <4 x i32>, ptr %0, align 4
3632 %1 = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3633 store <4 x i32> %1, ptr %0, align 4
3634 %index.next = add nuw i64 %index, 4
3635 %2 = icmp eq i64 %index.next, 1024
3636 br i1 %2, label %for.cond.cleanup, label %vector.body
3638 for.cond.cleanup: ; preds = %vector.body
3642 declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3644 define void @sink_splat_vp_fmul(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3645 ; CHECK-LABEL: sink_splat_vp_fmul:
3646 ; CHECK: # %bb.0: # %entry
3647 ; CHECK-NEXT: lui a2, 1
3648 ; CHECK-NEXT: add a2, a0, a2
3649 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3650 ; CHECK-NEXT: .LBB68_1: # %vector.body
3651 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3652 ; CHECK-NEXT: vle32.v v8, (a0)
3653 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3654 ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t
3655 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3656 ; CHECK-NEXT: vse32.v v8, (a0)
3657 ; CHECK-NEXT: addi a0, a0, 16
3658 ; CHECK-NEXT: bne a0, a2, .LBB68_1
3659 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3662 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3663 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3664 br label %vector.body
3666 vector.body: ; preds = %vector.body, %entry
3667 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3668 %0 = getelementptr inbounds float, ptr %a, i64 %index
3669 %wide.load = load <4 x float>, ptr %0, align 4
3670 %1 = call <4 x float> @llvm.vp.fmul.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3671 store <4 x float> %1, ptr %0, align 4
3672 %index.next = add nuw i64 %index, 4
3673 %2 = icmp eq i64 %index.next, 1024
3674 br i1 %2, label %for.cond.cleanup, label %vector.body
3676 for.cond.cleanup: ; preds = %vector.body
3680 declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3682 define void @sink_splat_vp_fdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3683 ; CHECK-LABEL: sink_splat_vp_fdiv:
3684 ; CHECK: # %bb.0: # %entry
3685 ; CHECK-NEXT: lui a2, 1
3686 ; CHECK-NEXT: add a2, a0, a2
3687 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3688 ; CHECK-NEXT: .LBB69_1: # %vector.body
3689 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3690 ; CHECK-NEXT: vle32.v v8, (a0)
3691 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3692 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t
3693 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3694 ; CHECK-NEXT: vse32.v v8, (a0)
3695 ; CHECK-NEXT: addi a0, a0, 16
3696 ; CHECK-NEXT: bne a0, a2, .LBB69_1
3697 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3700 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3701 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3702 br label %vector.body
3704 vector.body: ; preds = %vector.body, %entry
3705 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3706 %0 = getelementptr inbounds float, ptr %a, i64 %index
3707 %wide.load = load <4 x float>, ptr %0, align 4
3708 %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3709 store <4 x float> %1, ptr %0, align 4
3710 %index.next = add nuw i64 %index, 4
3711 %2 = icmp eq i64 %index.next, 1024
3712 br i1 %2, label %for.cond.cleanup, label %vector.body
3714 for.cond.cleanup: ; preds = %vector.body
3718 define void @sink_splat_vp_frdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3719 ; CHECK-LABEL: sink_splat_vp_frdiv:
3720 ; CHECK: # %bb.0: # %entry
3721 ; CHECK-NEXT: lui a2, 1
3722 ; CHECK-NEXT: add a2, a0, a2
3723 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3724 ; CHECK-NEXT: .LBB70_1: # %vector.body
3725 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3726 ; CHECK-NEXT: vle32.v v8, (a0)
3727 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3728 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t
3729 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3730 ; CHECK-NEXT: vse32.v v8, (a0)
3731 ; CHECK-NEXT: addi a0, a0, 16
3732 ; CHECK-NEXT: bne a0, a2, .LBB70_1
3733 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3736 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3737 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3738 br label %vector.body
3740 vector.body: ; preds = %vector.body, %entry
3741 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3742 %0 = getelementptr inbounds float, ptr %a, i64 %index
3743 %wide.load = load <4 x float>, ptr %0, align 4
3744 %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
3745 store <4 x float> %1, ptr %0, align 4
3746 %index.next = add nuw i64 %index, 4
3747 %2 = icmp eq i64 %index.next, 1024
3748 br i1 %2, label %for.cond.cleanup, label %vector.body
3750 for.cond.cleanup: ; preds = %vector.body
3754 declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3756 define void @sink_splat_vp_fadd(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3757 ; CHECK-LABEL: sink_splat_vp_fadd:
3758 ; CHECK: # %bb.0: # %entry
3759 ; CHECK-NEXT: lui a2, 1
3760 ; CHECK-NEXT: add a2, a0, a2
3761 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3762 ; CHECK-NEXT: .LBB71_1: # %vector.body
3763 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3764 ; CHECK-NEXT: vle32.v v8, (a0)
3765 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3766 ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t
3767 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3768 ; CHECK-NEXT: vse32.v v8, (a0)
3769 ; CHECK-NEXT: addi a0, a0, 16
3770 ; CHECK-NEXT: bne a0, a2, .LBB71_1
3771 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3774 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3775 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3776 br label %vector.body
3778 vector.body: ; preds = %vector.body, %entry
3779 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3780 %0 = getelementptr inbounds float, ptr %a, i64 %index
3781 %wide.load = load <4 x float>, ptr %0, align 4
3782 %1 = call <4 x float> @llvm.vp.fadd.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3783 store <4 x float> %1, ptr %0, align 4
3784 %index.next = add nuw i64 %index, 4
3785 %2 = icmp eq i64 %index.next, 1024
3786 br i1 %2, label %for.cond.cleanup, label %vector.body
3788 for.cond.cleanup: ; preds = %vector.body
3792 declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3794 define void @sink_splat_vp_fsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3795 ; CHECK-LABEL: sink_splat_vp_fsub:
3796 ; CHECK: # %bb.0: # %entry
3797 ; CHECK-NEXT: lui a2, 1
3798 ; CHECK-NEXT: add a2, a0, a2
3799 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3800 ; CHECK-NEXT: .LBB72_1: # %vector.body
3801 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3802 ; CHECK-NEXT: vle32.v v8, (a0)
3803 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3804 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
3805 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3806 ; CHECK-NEXT: vse32.v v8, (a0)
3807 ; CHECK-NEXT: addi a0, a0, 16
3808 ; CHECK-NEXT: bne a0, a2, .LBB72_1
3809 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3812 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3813 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3814 br label %vector.body
3816 vector.body: ; preds = %vector.body, %entry
3817 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3818 %0 = getelementptr inbounds float, ptr %a, i64 %index
3819 %wide.load = load <4 x float>, ptr %0, align 4
3820 %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3821 store <4 x float> %1, ptr %0, align 4
3822 %index.next = add nuw i64 %index, 4
3823 %2 = icmp eq i64 %index.next, 1024
3824 br i1 %2, label %for.cond.cleanup, label %vector.body
3826 for.cond.cleanup: ; preds = %vector.body
3830 declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3832 define void @sink_splat_vp_frsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3833 ; CHECK-LABEL: sink_splat_vp_frsub:
3834 ; CHECK: # %bb.0: # %entry
3835 ; CHECK-NEXT: lui a2, 1
3836 ; CHECK-NEXT: add a2, a0, a2
3837 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3838 ; CHECK-NEXT: .LBB73_1: # %vector.body
3839 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3840 ; CHECK-NEXT: vle32.v v8, (a0)
3841 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3842 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t
3843 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3844 ; CHECK-NEXT: vse32.v v8, (a0)
3845 ; CHECK-NEXT: addi a0, a0, 16
3846 ; CHECK-NEXT: bne a0, a2, .LBB73_1
3847 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3850 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3851 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3852 br label %vector.body
3854 vector.body: ; preds = %vector.body, %entry
3855 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3856 %0 = getelementptr inbounds float, ptr %a, i64 %index
3857 %wide.load = load <4 x float>, ptr %0, align 4
3858 %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
3859 store <4 x float> %1, ptr %0, align 4
3860 %index.next = add nuw i64 %index, 4
3861 %2 = icmp eq i64 %index.next, 1024
3862 br i1 %2, label %for.cond.cleanup, label %vector.body
3864 for.cond.cleanup: ; preds = %vector.body
3868 declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3870 define void @sink_splat_vp_udiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3871 ; CHECK-LABEL: sink_splat_vp_udiv:
3872 ; CHECK: # %bb.0: # %entry
3873 ; CHECK-NEXT: lui a3, 1
3874 ; CHECK-NEXT: add a3, a0, a3
3875 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3876 ; CHECK-NEXT: .LBB74_1: # %vector.body
3877 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3878 ; CHECK-NEXT: vle32.v v8, (a0)
3879 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3880 ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
3881 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3882 ; CHECK-NEXT: vse32.v v8, (a0)
3883 ; CHECK-NEXT: addi a0, a0, 16
3884 ; CHECK-NEXT: bne a0, a3, .LBB74_1
3885 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3888 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3889 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3890 br label %vector.body
3892 vector.body: ; preds = %vector.body, %entry
3893 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3894 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3895 %wide.load = load <4 x i32>, ptr %0, align 4
3896 %1 = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3897 store <4 x i32> %1, ptr %0, align 4
3898 %index.next = add nuw i64 %index, 4
3899 %2 = icmp eq i64 %index.next, 1024
3900 br i1 %2, label %for.cond.cleanup, label %vector.body
3902 for.cond.cleanup: ; preds = %vector.body
3906 declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3908 define void @sink_splat_vp_sdiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3909 ; CHECK-LABEL: sink_splat_vp_sdiv:
3910 ; CHECK: # %bb.0: # %entry
3911 ; CHECK-NEXT: lui a3, 1
3912 ; CHECK-NEXT: add a3, a0, a3
3913 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3914 ; CHECK-NEXT: .LBB75_1: # %vector.body
3915 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3916 ; CHECK-NEXT: vle32.v v8, (a0)
3917 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3918 ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
3919 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3920 ; CHECK-NEXT: vse32.v v8, (a0)
3921 ; CHECK-NEXT: addi a0, a0, 16
3922 ; CHECK-NEXT: bne a0, a3, .LBB75_1
3923 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3926 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3927 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3928 br label %vector.body
3930 vector.body: ; preds = %vector.body, %entry
3931 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3932 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3933 %wide.load = load <4 x i32>, ptr %0, align 4
3934 %1 = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3935 store <4 x i32> %1, ptr %0, align 4
3936 %index.next = add nuw i64 %index, 4
3937 %2 = icmp eq i64 %index.next, 1024
3938 br i1 %2, label %for.cond.cleanup, label %vector.body
3940 for.cond.cleanup: ; preds = %vector.body
3944 declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3946 define void @sink_splat_vp_urem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3947 ; CHECK-LABEL: sink_splat_vp_urem:
3948 ; CHECK: # %bb.0: # %entry
3949 ; CHECK-NEXT: lui a3, 1
3950 ; CHECK-NEXT: add a3, a0, a3
3951 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3952 ; CHECK-NEXT: .LBB76_1: # %vector.body
3953 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3954 ; CHECK-NEXT: vle32.v v8, (a0)
3955 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3956 ; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t
3957 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3958 ; CHECK-NEXT: vse32.v v8, (a0)
3959 ; CHECK-NEXT: addi a0, a0, 16
3960 ; CHECK-NEXT: bne a0, a3, .LBB76_1
3961 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3964 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3965 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3966 br label %vector.body
3968 vector.body: ; preds = %vector.body, %entry
3969 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3970 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3971 %wide.load = load <4 x i32>, ptr %0, align 4
3972 %1 = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3973 store <4 x i32> %1, ptr %0, align 4
3974 %index.next = add nuw i64 %index, 4
3975 %2 = icmp eq i64 %index.next, 1024
3976 br i1 %2, label %for.cond.cleanup, label %vector.body
3978 for.cond.cleanup: ; preds = %vector.body
3982 declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3984 define void @sink_splat_vp_srem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3985 ; CHECK-LABEL: sink_splat_vp_srem:
3986 ; CHECK: # %bb.0: # %entry
3987 ; CHECK-NEXT: lui a3, 1
3988 ; CHECK-NEXT: add a3, a0, a3
3989 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3990 ; CHECK-NEXT: .LBB77_1: # %vector.body
3991 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3992 ; CHECK-NEXT: vle32.v v8, (a0)
3993 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3994 ; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t
3995 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3996 ; CHECK-NEXT: vse32.v v8, (a0)
3997 ; CHECK-NEXT: addi a0, a0, 16
3998 ; CHECK-NEXT: bne a0, a3, .LBB77_1
3999 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4002 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
4003 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4004 br label %vector.body
4006 vector.body: ; preds = %vector.body, %entry
4007 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4008 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4009 %wide.load = load <4 x i32>, ptr %0, align 4
4010 %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
4011 store <4 x i32> %1, ptr %0, align 4
4012 %index.next = add nuw i64 %index, 4
4013 %2 = icmp eq i64 %index.next, 1024
4014 br i1 %2, label %for.cond.cleanup, label %vector.body
4016 for.cond.cleanup: ; preds = %vector.body
4020 ; Check that we don't sink a splat operand that has no chance of being folded.
4022 define void @sink_splat_vp_srem_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
4023 ; CHECK-LABEL: sink_splat_vp_srem_commute:
4024 ; CHECK: # %bb.0: # %entry
4025 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4026 ; CHECK-NEXT: vmv.v.x v8, a1
4027 ; CHECK-NEXT: lui a1, 1
4028 ; CHECK-NEXT: add a1, a0, a1
4029 ; CHECK-NEXT: .LBB78_1: # %vector.body
4030 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4031 ; CHECK-NEXT: vle32.v v9, (a0)
4032 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4033 ; CHECK-NEXT: vrem.vv v9, v8, v9, v0.t
4034 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4035 ; CHECK-NEXT: vse32.v v9, (a0)
4036 ; CHECK-NEXT: addi a0, a0, 16
4037 ; CHECK-NEXT: bne a0, a1, .LBB78_1
4038 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4041 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
4042 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4043 br label %vector.body
4045 vector.body: ; preds = %vector.body, %entry
4046 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4047 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4048 %wide.load = load <4 x i32>, ptr %0, align 4
4049 %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
4050 store <4 x i32> %1, ptr %0, align 4
4051 %index.next = add nuw i64 %index, 4
4052 %2 = icmp eq i64 %index.next, 1024
4053 br i1 %2, label %for.cond.cleanup, label %vector.body
4055 for.cond.cleanup: ; preds = %vector.body
4059 declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
4061 define void @sink_splat_vp_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
4062 ; CHECK-LABEL: sink_splat_vp_fma:
4063 ; CHECK: # %bb.0: # %entry
4064 ; CHECK-NEXT: lui a3, 1
4065 ; CHECK-NEXT: add a3, a1, a3
4066 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4067 ; CHECK-NEXT: .LBB79_1: # %vector.body
4068 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4069 ; CHECK-NEXT: vle32.v v8, (a0)
4070 ; CHECK-NEXT: vle32.v v9, (a1)
4071 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4072 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
4073 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4074 ; CHECK-NEXT: vse32.v v8, (a0)
4075 ; CHECK-NEXT: addi a1, a1, 16
4076 ; CHECK-NEXT: addi a0, a0, 16
4077 ; CHECK-NEXT: bne a1, a3, .LBB79_1
4078 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4081 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
4082 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4083 br label %vector.body
4085 vector.body: ; preds = %vector.body, %entry
4086 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4087 %0 = getelementptr inbounds float, ptr %a, i64 %index
4088 %wide.load = load <4 x float>, ptr %0, align 4
4089 %1 = getelementptr inbounds float, ptr %b, i64 %index
4090 %wide.load12 = load <4 x float>, ptr %1, align 4
4091 %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
4092 store <4 x float> %2, ptr %0, align 4
4093 %index.next = add nuw i64 %index, 4
4094 %3 = icmp eq i64 %index.next, 1024
4095 br i1 %3, label %for.cond.cleanup, label %vector.body
4097 for.cond.cleanup: ; preds = %vector.body
4101 define void @sink_splat_vp_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
4102 ; CHECK-LABEL: sink_splat_vp_fma_commute:
4103 ; CHECK: # %bb.0: # %entry
4104 ; CHECK-NEXT: lui a3, 1
4105 ; CHECK-NEXT: add a3, a1, a3
4106 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4107 ; CHECK-NEXT: .LBB80_1: # %vector.body
4108 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4109 ; CHECK-NEXT: vle32.v v8, (a0)
4110 ; CHECK-NEXT: vle32.v v9, (a1)
4111 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4112 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
4113 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4114 ; CHECK-NEXT: vse32.v v8, (a0)
4115 ; CHECK-NEXT: addi a1, a1, 16
4116 ; CHECK-NEXT: addi a0, a0, 16
4117 ; CHECK-NEXT: bne a1, a3, .LBB80_1
4118 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4121 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
4122 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4123 br label %vector.body
4125 vector.body: ; preds = %vector.body, %entry
4126 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4127 %0 = getelementptr inbounds float, ptr %a, i64 %index
4128 %wide.load = load <4 x float>, ptr %0, align 4
4129 %1 = getelementptr inbounds float, ptr %b, i64 %index
4130 %wide.load12 = load <4 x float>, ptr %1, align 4
4131 %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
4132 store <4 x float> %2, ptr %0, align 4
4133 %index.next = add nuw i64 %index, 4
4134 %3 = icmp eq i64 %index.next, 1024
4135 br i1 %3, label %for.cond.cleanup, label %vector.body
4137 for.cond.cleanup: ; preds = %vector.body
4142 define void @sink_splat_mul_lmul2(ptr nocapture %a, i64 signext %x) {
4143 ; CHECK-LABEL: sink_splat_mul_lmul2:
4144 ; CHECK: # %bb.0: # %entry
4145 ; CHECK-NEXT: lui a2, 2
4146 ; CHECK-NEXT: add a2, a0, a2
4147 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4148 ; CHECK-NEXT: .LBB81_1: # %vector.body
4149 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4150 ; CHECK-NEXT: vle64.v v8, (a0)
4151 ; CHECK-NEXT: vmul.vx v8, v8, a1
4152 ; CHECK-NEXT: vse64.v v8, (a0)
4153 ; CHECK-NEXT: addi a0, a0, 32
4154 ; CHECK-NEXT: bne a0, a2, .LBB81_1
4155 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4158 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4159 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4160 br label %vector.body
4162 vector.body: ; preds = %vector.body, %entry
4163 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4164 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4165 %wide.load = load <4 x i64>, ptr %0, align 8
4166 %1 = mul <4 x i64> %wide.load, %broadcast.splat
4167 store <4 x i64> %1, ptr %0, align 8
4168 %index.next = add nuw i64 %index, 4
4169 %2 = icmp eq i64 %index.next, 1024
4170 br i1 %2, label %for.cond.cleanup, label %vector.body
4172 for.cond.cleanup: ; preds = %vector.body
4176 define void @sink_splat_add_lmul2(ptr nocapture %a, i64 signext %x) {
4177 ; CHECK-LABEL: sink_splat_add_lmul2:
4178 ; CHECK: # %bb.0: # %entry
4179 ; CHECK-NEXT: lui a2, 2
4180 ; CHECK-NEXT: add a2, a0, a2
4181 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4182 ; CHECK-NEXT: .LBB82_1: # %vector.body
4183 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4184 ; CHECK-NEXT: vle64.v v8, (a0)
4185 ; CHECK-NEXT: vadd.vx v8, v8, a1
4186 ; CHECK-NEXT: vse64.v v8, (a0)
4187 ; CHECK-NEXT: addi a0, a0, 32
4188 ; CHECK-NEXT: bne a0, a2, .LBB82_1
4189 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4192 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4193 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4194 br label %vector.body
4196 vector.body: ; preds = %vector.body, %entry
4197 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4198 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4199 %wide.load = load <4 x i64>, ptr %0, align 8
4200 %1 = add <4 x i64> %wide.load, %broadcast.splat
4201 store <4 x i64> %1, ptr %0, align 8
4202 %index.next = add nuw i64 %index, 4
4203 %2 = icmp eq i64 %index.next, 1024
4204 br i1 %2, label %for.cond.cleanup, label %vector.body
4206 for.cond.cleanup: ; preds = %vector.body
4210 define void @sink_splat_sub_lmul2(ptr nocapture %a, i64 signext %x) {
4211 ; CHECK-LABEL: sink_splat_sub_lmul2:
4212 ; CHECK: # %bb.0: # %entry
4213 ; CHECK-NEXT: lui a2, 2
4214 ; CHECK-NEXT: add a2, a0, a2
4215 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4216 ; CHECK-NEXT: .LBB83_1: # %vector.body
4217 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4218 ; CHECK-NEXT: vle64.v v8, (a0)
4219 ; CHECK-NEXT: vsub.vx v8, v8, a1
4220 ; CHECK-NEXT: vse64.v v8, (a0)
4221 ; CHECK-NEXT: addi a0, a0, 32
4222 ; CHECK-NEXT: bne a0, a2, .LBB83_1
4223 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4226 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4227 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4228 br label %vector.body
4230 vector.body: ; preds = %vector.body, %entry
4231 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4232 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4233 %wide.load = load <4 x i64>, ptr %0, align 8
4234 %1 = sub <4 x i64> %wide.load, %broadcast.splat
4235 store <4 x i64> %1, ptr %0, align 8
4236 %index.next = add nuw i64 %index, 4
4237 %2 = icmp eq i64 %index.next, 1024
4238 br i1 %2, label %for.cond.cleanup, label %vector.body
4240 for.cond.cleanup: ; preds = %vector.body
4244 define void @sink_splat_rsub_lmul2(ptr nocapture %a, i64 signext %x) {
4245 ; CHECK-LABEL: sink_splat_rsub_lmul2:
4246 ; CHECK: # %bb.0: # %entry
4247 ; CHECK-NEXT: lui a2, 2
4248 ; CHECK-NEXT: add a2, a0, a2
4249 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4250 ; CHECK-NEXT: .LBB84_1: # %vector.body
4251 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4252 ; CHECK-NEXT: vle64.v v8, (a0)
4253 ; CHECK-NEXT: vrsub.vx v8, v8, a1
4254 ; CHECK-NEXT: vse64.v v8, (a0)
4255 ; CHECK-NEXT: addi a0, a0, 32
4256 ; CHECK-NEXT: bne a0, a2, .LBB84_1
4257 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4260 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4261 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4262 br label %vector.body
4264 vector.body: ; preds = %vector.body, %entry
4265 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4266 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4267 %wide.load = load <4 x i64>, ptr %0, align 8
4268 %1 = sub <4 x i64> %broadcast.splat, %wide.load
4269 store <4 x i64> %1, ptr %0, align 8
4270 %index.next = add nuw i64 %index, 4
4271 %2 = icmp eq i64 %index.next, 1024
4272 br i1 %2, label %for.cond.cleanup, label %vector.body
4274 for.cond.cleanup: ; preds = %vector.body
4278 define void @sink_splat_and_lmul2(ptr nocapture %a, i64 signext %x) {
4279 ; CHECK-LABEL: sink_splat_and_lmul2:
4280 ; CHECK: # %bb.0: # %entry
4281 ; CHECK-NEXT: lui a2, 2
4282 ; CHECK-NEXT: add a2, a0, a2
4283 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4284 ; CHECK-NEXT: .LBB85_1: # %vector.body
4285 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4286 ; CHECK-NEXT: vle64.v v8, (a0)
4287 ; CHECK-NEXT: vand.vx v8, v8, a1
4288 ; CHECK-NEXT: vse64.v v8, (a0)
4289 ; CHECK-NEXT: addi a0, a0, 32
4290 ; CHECK-NEXT: bne a0, a2, .LBB85_1
4291 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4294 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4295 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4296 br label %vector.body
4298 vector.body: ; preds = %vector.body, %entry
4299 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4300 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4301 %wide.load = load <4 x i64>, ptr %0, align 8
4302 %1 = and <4 x i64> %wide.load, %broadcast.splat
4303 store <4 x i64> %1, ptr %0, align 8
4304 %index.next = add nuw i64 %index, 4
4305 %2 = icmp eq i64 %index.next, 1024
4306 br i1 %2, label %for.cond.cleanup, label %vector.body
4308 for.cond.cleanup: ; preds = %vector.body
4312 define void @sink_splat_or_lmul2(ptr nocapture %a, i64 signext %x) {
4313 ; CHECK-LABEL: sink_splat_or_lmul2:
4314 ; CHECK: # %bb.0: # %entry
4315 ; CHECK-NEXT: lui a2, 2
4316 ; CHECK-NEXT: add a2, a0, a2
4317 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4318 ; CHECK-NEXT: .LBB86_1: # %vector.body
4319 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4320 ; CHECK-NEXT: vle64.v v8, (a0)
4321 ; CHECK-NEXT: vor.vx v8, v8, a1
4322 ; CHECK-NEXT: vse64.v v8, (a0)
4323 ; CHECK-NEXT: addi a0, a0, 32
4324 ; CHECK-NEXT: bne a0, a2, .LBB86_1
4325 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4328 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4329 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4330 br label %vector.body
4332 vector.body: ; preds = %vector.body, %entry
4333 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4334 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4335 %wide.load = load <4 x i64>, ptr %0, align 8
4336 %1 = or <4 x i64> %wide.load, %broadcast.splat
4337 store <4 x i64> %1, ptr %0, align 8
4338 %index.next = add nuw i64 %index, 4
4339 %2 = icmp eq i64 %index.next, 1024
4340 br i1 %2, label %for.cond.cleanup, label %vector.body
4342 for.cond.cleanup: ; preds = %vector.body
4346 define void @sink_splat_xor_lmul2(ptr nocapture %a, i64 signext %x) {
4347 ; CHECK-LABEL: sink_splat_xor_lmul2:
4348 ; CHECK: # %bb.0: # %entry
4349 ; CHECK-NEXT: lui a2, 2
4350 ; CHECK-NEXT: add a2, a0, a2
4351 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4352 ; CHECK-NEXT: .LBB87_1: # %vector.body
4353 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4354 ; CHECK-NEXT: vle64.v v8, (a0)
4355 ; CHECK-NEXT: vxor.vx v8, v8, a1
4356 ; CHECK-NEXT: vse64.v v8, (a0)
4357 ; CHECK-NEXT: addi a0, a0, 32
4358 ; CHECK-NEXT: bne a0, a2, .LBB87_1
4359 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4362 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4363 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4364 br label %vector.body
4366 vector.body: ; preds = %vector.body, %entry
4367 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4368 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4369 %wide.load = load <4 x i64>, ptr %0, align 8
4370 %1 = xor <4 x i64> %wide.load, %broadcast.splat
4371 store <4 x i64> %1, ptr %0, align 8
4372 %index.next = add nuw i64 %index, 4
4373 %2 = icmp eq i64 %index.next, 1024
4374 br i1 %2, label %for.cond.cleanup, label %vector.body
4376 for.cond.cleanup: ; preds = %vector.body
4380 define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) {
4381 ; CHECK-LABEL: sink_splat_mul_lmul8:
4382 ; CHECK: # %bb.0: # %entry
4383 ; CHECK-NEXT: lui a2, 1
4384 ; CHECK-NEXT: add a2, a0, a2
4385 ; CHECK-NEXT: li a3, 32
4386 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4387 ; CHECK-NEXT: .LBB88_1: # %vector.body
4388 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4389 ; CHECK-NEXT: vle32.v v8, (a0)
4390 ; CHECK-NEXT: vmul.vx v8, v8, a1
4391 ; CHECK-NEXT: vse32.v v8, (a0)
4392 ; CHECK-NEXT: addi a0, a0, 16
4393 ; CHECK-NEXT: bne a0, a2, .LBB88_1
4394 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4397 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4398 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4399 br label %vector.body
4401 vector.body: ; preds = %vector.body, %entry
4402 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4403 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4404 %wide.load = load <32 x i32>, ptr %0, align 4
4405 %1 = mul <32 x i32> %wide.load, %broadcast.splat
4406 store <32 x i32> %1, ptr %0, align 4
4407 %index.next = add nuw i64 %index, 4
4408 %2 = icmp eq i64 %index.next, 1024
4409 br i1 %2, label %for.cond.cleanup, label %vector.body
4411 for.cond.cleanup: ; preds = %vector.body
4415 define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) {
4416 ; CHECK-LABEL: sink_splat_add_lmul8:
4417 ; CHECK: # %bb.0: # %entry
4418 ; CHECK-NEXT: lui a2, 1
4419 ; CHECK-NEXT: add a2, a0, a2
4420 ; CHECK-NEXT: li a3, 32
4421 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4422 ; CHECK-NEXT: .LBB89_1: # %vector.body
4423 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4424 ; CHECK-NEXT: vle32.v v8, (a0)
4425 ; CHECK-NEXT: vadd.vx v8, v8, a1
4426 ; CHECK-NEXT: vse32.v v8, (a0)
4427 ; CHECK-NEXT: addi a0, a0, 16
4428 ; CHECK-NEXT: bne a0, a2, .LBB89_1
4429 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4432 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4433 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4434 br label %vector.body
4436 vector.body: ; preds = %vector.body, %entry
4437 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4438 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4439 %wide.load = load <32 x i32>, ptr %0, align 4
4440 %1 = add <32 x i32> %wide.load, %broadcast.splat
4441 store <32 x i32> %1, ptr %0, align 4
4442 %index.next = add nuw i64 %index, 4
4443 %2 = icmp eq i64 %index.next, 1024
4444 br i1 %2, label %for.cond.cleanup, label %vector.body
4446 for.cond.cleanup: ; preds = %vector.body
4450 define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) {
4451 ; CHECK-LABEL: sink_splat_sub_lmul8:
4452 ; CHECK: # %bb.0: # %entry
4453 ; CHECK-NEXT: lui a2, 1
4454 ; CHECK-NEXT: add a2, a0, a2
4455 ; CHECK-NEXT: li a3, 32
4456 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4457 ; CHECK-NEXT: .LBB90_1: # %vector.body
4458 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4459 ; CHECK-NEXT: vle32.v v8, (a0)
4460 ; CHECK-NEXT: vsub.vx v8, v8, a1
4461 ; CHECK-NEXT: vse32.v v8, (a0)
4462 ; CHECK-NEXT: addi a0, a0, 16
4463 ; CHECK-NEXT: bne a0, a2, .LBB90_1
4464 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4467 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4468 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4469 br label %vector.body
4471 vector.body: ; preds = %vector.body, %entry
4472 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4473 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4474 %wide.load = load <32 x i32>, ptr %0, align 4
4475 %1 = sub <32 x i32> %wide.load, %broadcast.splat
4476 store <32 x i32> %1, ptr %0, align 4
4477 %index.next = add nuw i64 %index, 4
4478 %2 = icmp eq i64 %index.next, 1024
4479 br i1 %2, label %for.cond.cleanup, label %vector.body
4481 for.cond.cleanup: ; preds = %vector.body
4485 define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) {
4486 ; CHECK-LABEL: sink_splat_rsub_lmul8:
4487 ; CHECK: # %bb.0: # %entry
4488 ; CHECK-NEXT: lui a2, 1
4489 ; CHECK-NEXT: add a2, a0, a2
4490 ; CHECK-NEXT: li a3, 32
4491 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4492 ; CHECK-NEXT: .LBB91_1: # %vector.body
4493 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4494 ; CHECK-NEXT: vle32.v v8, (a0)
4495 ; CHECK-NEXT: vrsub.vx v8, v8, a1
4496 ; CHECK-NEXT: vse32.v v8, (a0)
4497 ; CHECK-NEXT: addi a0, a0, 16
4498 ; CHECK-NEXT: bne a0, a2, .LBB91_1
4499 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4502 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4503 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4504 br label %vector.body
4506 vector.body: ; preds = %vector.body, %entry
4507 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4508 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4509 %wide.load = load <32 x i32>, ptr %0, align 4
4510 %1 = sub <32 x i32> %broadcast.splat, %wide.load
4511 store <32 x i32> %1, ptr %0, align 4
4512 %index.next = add nuw i64 %index, 4
4513 %2 = icmp eq i64 %index.next, 1024
4514 br i1 %2, label %for.cond.cleanup, label %vector.body
4516 for.cond.cleanup: ; preds = %vector.body
4520 define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) {
4521 ; CHECK-LABEL: sink_splat_and_lmul8:
4522 ; CHECK: # %bb.0: # %entry
4523 ; CHECK-NEXT: lui a2, 1
4524 ; CHECK-NEXT: add a2, a0, a2
4525 ; CHECK-NEXT: li a3, 32
4526 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4527 ; CHECK-NEXT: .LBB92_1: # %vector.body
4528 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4529 ; CHECK-NEXT: vle32.v v8, (a0)
4530 ; CHECK-NEXT: vand.vx v8, v8, a1
4531 ; CHECK-NEXT: vse32.v v8, (a0)
4532 ; CHECK-NEXT: addi a0, a0, 16
4533 ; CHECK-NEXT: bne a0, a2, .LBB92_1
4534 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4537 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4538 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4539 br label %vector.body
4541 vector.body: ; preds = %vector.body, %entry
4542 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4543 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4544 %wide.load = load <32 x i32>, ptr %0, align 4
4545 %1 = and <32 x i32> %wide.load, %broadcast.splat
4546 store <32 x i32> %1, ptr %0, align 4
4547 %index.next = add nuw i64 %index, 4
4548 %2 = icmp eq i64 %index.next, 1024
4549 br i1 %2, label %for.cond.cleanup, label %vector.body
4551 for.cond.cleanup: ; preds = %vector.body
4555 define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) {
4556 ; CHECK-LABEL: sink_splat_or_lmul8:
4557 ; CHECK: # %bb.0: # %entry
4558 ; CHECK-NEXT: lui a2, 1
4559 ; CHECK-NEXT: add a2, a0, a2
4560 ; CHECK-NEXT: li a3, 32
4561 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4562 ; CHECK-NEXT: .LBB93_1: # %vector.body
4563 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4564 ; CHECK-NEXT: vle32.v v8, (a0)
4565 ; CHECK-NEXT: vor.vx v8, v8, a1
4566 ; CHECK-NEXT: vse32.v v8, (a0)
4567 ; CHECK-NEXT: addi a0, a0, 16
4568 ; CHECK-NEXT: bne a0, a2, .LBB93_1
4569 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4572 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4573 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4574 br label %vector.body
4576 vector.body: ; preds = %vector.body, %entry
4577 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4578 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4579 %wide.load = load <32 x i32>, ptr %0, align 4
4580 %1 = or <32 x i32> %wide.load, %broadcast.splat
4581 store <32 x i32> %1, ptr %0, align 4
4582 %index.next = add nuw i64 %index, 4
4583 %2 = icmp eq i64 %index.next, 1024
4584 br i1 %2, label %for.cond.cleanup, label %vector.body
4586 for.cond.cleanup: ; preds = %vector.body
4590 define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) {
4591 ; CHECK-LABEL: sink_splat_xor_lmul8:
4592 ; CHECK: # %bb.0: # %entry
4593 ; CHECK-NEXT: lui a2, 1
4594 ; CHECK-NEXT: add a2, a0, a2
4595 ; CHECK-NEXT: li a3, 32
4596 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4597 ; CHECK-NEXT: .LBB94_1: # %vector.body
4598 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4599 ; CHECK-NEXT: vle32.v v8, (a0)
4600 ; CHECK-NEXT: vxor.vx v8, v8, a1
4601 ; CHECK-NEXT: vse32.v v8, (a0)
4602 ; CHECK-NEXT: addi a0, a0, 16
4603 ; CHECK-NEXT: bne a0, a2, .LBB94_1
4604 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4607 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4608 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4609 br label %vector.body
4611 vector.body: ; preds = %vector.body, %entry
4612 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4613 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4614 %wide.load = load <32 x i32>, ptr %0, align 4
4615 %1 = xor <32 x i32> %wide.load, %broadcast.splat
4616 store <32 x i32> %1, ptr %0, align 4
4617 %index.next = add nuw i64 %index, 4
4618 %2 = icmp eq i64 %index.next, 1024
4619 br i1 %2, label %for.cond.cleanup, label %vector.body
4621 for.cond.cleanup: ; preds = %vector.body
4625 define void @sink_splat_mul_lmulmf2(ptr nocapture %a, i32 signext %x) {
4626 ; CHECK-LABEL: sink_splat_mul_lmulmf2:
4627 ; CHECK: # %bb.0: # %entry
4628 ; CHECK-NEXT: lui a2, 2
4629 ; CHECK-NEXT: add a2, a0, a2
4630 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4631 ; CHECK-NEXT: .LBB95_1: # %vector.body
4632 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4633 ; CHECK-NEXT: vle32.v v8, (a0)
4634 ; CHECK-NEXT: vmul.vx v8, v8, a1
4635 ; CHECK-NEXT: vse32.v v8, (a0)
4636 ; CHECK-NEXT: addi a0, a0, 32
4637 ; CHECK-NEXT: bne a0, a2, .LBB95_1
4638 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4641 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4642 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4643 br label %vector.body
4645 vector.body: ; preds = %vector.body, %entry
4646 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4647 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4648 %wide.load = load <2 x i32>, ptr %0, align 8
4649 %1 = mul <2 x i32> %wide.load, %broadcast.splat
4650 store <2 x i32> %1, ptr %0, align 8
4651 %index.next = add nuw i64 %index, 4
4652 %2 = icmp eq i64 %index.next, 1024
4653 br i1 %2, label %for.cond.cleanup, label %vector.body
4655 for.cond.cleanup: ; preds = %vector.body
4659 define void @sink_splat_add_lmulmf2(ptr nocapture %a, i32 signext %x) {
4660 ; CHECK-LABEL: sink_splat_add_lmulmf2:
4661 ; CHECK: # %bb.0: # %entry
4662 ; CHECK-NEXT: lui a2, 2
4663 ; CHECK-NEXT: add a2, a0, a2
4664 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4665 ; CHECK-NEXT: .LBB96_1: # %vector.body
4666 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4667 ; CHECK-NEXT: vle32.v v8, (a0)
4668 ; CHECK-NEXT: vadd.vx v8, v8, a1
4669 ; CHECK-NEXT: vse32.v v8, (a0)
4670 ; CHECK-NEXT: addi a0, a0, 32
4671 ; CHECK-NEXT: bne a0, a2, .LBB96_1
4672 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4675 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4676 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4677 br label %vector.body
4679 vector.body: ; preds = %vector.body, %entry
4680 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4681 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4682 %wide.load = load <2 x i32>, ptr %0, align 8
4683 %1 = add <2 x i32> %wide.load, %broadcast.splat
4684 store <2 x i32> %1, ptr %0, align 8
4685 %index.next = add nuw i64 %index, 4
4686 %2 = icmp eq i64 %index.next, 1024
4687 br i1 %2, label %for.cond.cleanup, label %vector.body
4689 for.cond.cleanup: ; preds = %vector.body
4693 define void @sink_splat_sub_lmulmf2(ptr nocapture %a, i32 signext %x) {
4694 ; CHECK-LABEL: sink_splat_sub_lmulmf2:
4695 ; CHECK: # %bb.0: # %entry
4696 ; CHECK-NEXT: lui a2, 2
4697 ; CHECK-NEXT: add a2, a0, a2
4698 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4699 ; CHECK-NEXT: .LBB97_1: # %vector.body
4700 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4701 ; CHECK-NEXT: vle32.v v8, (a0)
4702 ; CHECK-NEXT: vsub.vx v8, v8, a1
4703 ; CHECK-NEXT: vse32.v v8, (a0)
4704 ; CHECK-NEXT: addi a0, a0, 32
4705 ; CHECK-NEXT: bne a0, a2, .LBB97_1
4706 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4709 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4710 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4711 br label %vector.body
4713 vector.body: ; preds = %vector.body, %entry
4714 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4715 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4716 %wide.load = load <2 x i32>, ptr %0, align 8
4717 %1 = sub <2 x i32> %wide.load, %broadcast.splat
4718 store <2 x i32> %1, ptr %0, align 8
4719 %index.next = add nuw i64 %index, 4
4720 %2 = icmp eq i64 %index.next, 1024
4721 br i1 %2, label %for.cond.cleanup, label %vector.body
4723 for.cond.cleanup: ; preds = %vector.body
4727 define void @sink_splat_rsub_lmulmf2(ptr nocapture %a, i32 signext %x) {
4728 ; CHECK-LABEL: sink_splat_rsub_lmulmf2:
4729 ; CHECK: # %bb.0: # %entry
4730 ; CHECK-NEXT: lui a2, 2
4731 ; CHECK-NEXT: add a2, a0, a2
4732 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4733 ; CHECK-NEXT: .LBB98_1: # %vector.body
4734 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4735 ; CHECK-NEXT: vle32.v v8, (a0)
4736 ; CHECK-NEXT: vrsub.vx v8, v8, a1
4737 ; CHECK-NEXT: vse32.v v8, (a0)
4738 ; CHECK-NEXT: addi a0, a0, 32
4739 ; CHECK-NEXT: bne a0, a2, .LBB98_1
4740 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4743 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4744 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4745 br label %vector.body
4747 vector.body: ; preds = %vector.body, %entry
4748 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4749 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4750 %wide.load = load <2 x i32>, ptr %0, align 8
4751 %1 = sub <2 x i32> %broadcast.splat, %wide.load
4752 store <2 x i32> %1, ptr %0, align 8
4753 %index.next = add nuw i64 %index, 4
4754 %2 = icmp eq i64 %index.next, 1024
4755 br i1 %2, label %for.cond.cleanup, label %vector.body
4757 for.cond.cleanup: ; preds = %vector.body
4761 define void @sink_splat_and_lmulmf2(ptr nocapture %a, i32 signext %x) {
4762 ; CHECK-LABEL: sink_splat_and_lmulmf2:
4763 ; CHECK: # %bb.0: # %entry
4764 ; CHECK-NEXT: lui a2, 2
4765 ; CHECK-NEXT: add a2, a0, a2
4766 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4767 ; CHECK-NEXT: .LBB99_1: # %vector.body
4768 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4769 ; CHECK-NEXT: vle32.v v8, (a0)
4770 ; CHECK-NEXT: vand.vx v8, v8, a1
4771 ; CHECK-NEXT: vse32.v v8, (a0)
4772 ; CHECK-NEXT: addi a0, a0, 32
4773 ; CHECK-NEXT: bne a0, a2, .LBB99_1
4774 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4777 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4778 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4779 br label %vector.body
4781 vector.body: ; preds = %vector.body, %entry
4782 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4783 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4784 %wide.load = load <2 x i32>, ptr %0, align 8
4785 %1 = and <2 x i32> %wide.load, %broadcast.splat
4786 store <2 x i32> %1, ptr %0, align 8
4787 %index.next = add nuw i64 %index, 4
4788 %2 = icmp eq i64 %index.next, 1024
4789 br i1 %2, label %for.cond.cleanup, label %vector.body
4791 for.cond.cleanup: ; preds = %vector.body
4795 define void @sink_splat_or_lmulmf2(ptr nocapture %a, i32 signext %x) {
4796 ; CHECK-LABEL: sink_splat_or_lmulmf2:
4797 ; CHECK: # %bb.0: # %entry
4798 ; CHECK-NEXT: lui a2, 2
4799 ; CHECK-NEXT: add a2, a0, a2
4800 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4801 ; CHECK-NEXT: .LBB100_1: # %vector.body
4802 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4803 ; CHECK-NEXT: vle32.v v8, (a0)
4804 ; CHECK-NEXT: vor.vx v8, v8, a1
4805 ; CHECK-NEXT: vse32.v v8, (a0)
4806 ; CHECK-NEXT: addi a0, a0, 32
4807 ; CHECK-NEXT: bne a0, a2, .LBB100_1
4808 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4811 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4812 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4813 br label %vector.body
4815 vector.body: ; preds = %vector.body, %entry
4816 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4817 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4818 %wide.load = load <2 x i32>, ptr %0, align 8
4819 %1 = or <2 x i32> %wide.load, %broadcast.splat
4820 store <2 x i32> %1, ptr %0, align 8
4821 %index.next = add nuw i64 %index, 4
4822 %2 = icmp eq i64 %index.next, 1024
4823 br i1 %2, label %for.cond.cleanup, label %vector.body
4825 for.cond.cleanup: ; preds = %vector.body
4829 define void @sink_splat_xor_lmulmf2(ptr nocapture %a, i32 signext %x) {
4830 ; CHECK-LABEL: sink_splat_xor_lmulmf2:
4831 ; CHECK: # %bb.0: # %entry
4832 ; CHECK-NEXT: lui a2, 2
4833 ; CHECK-NEXT: add a2, a0, a2
4834 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4835 ; CHECK-NEXT: .LBB101_1: # %vector.body
4836 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4837 ; CHECK-NEXT: vle32.v v8, (a0)
4838 ; CHECK-NEXT: vxor.vx v8, v8, a1
4839 ; CHECK-NEXT: vse32.v v8, (a0)
4840 ; CHECK-NEXT: addi a0, a0, 32
4841 ; CHECK-NEXT: bne a0, a2, .LBB101_1
4842 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4845 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4846 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4847 br label %vector.body
4849 vector.body: ; preds = %vector.body, %entry
4850 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4851 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4852 %wide.load = load <2 x i32>, ptr %0, align 8
4853 %1 = xor <2 x i32> %wide.load, %broadcast.splat
4854 store <2 x i32> %1, ptr %0, align 8
4855 %index.next = add nuw i64 %index, 4
4856 %2 = icmp eq i64 %index.next, 1024
4857 br i1 %2, label %for.cond.cleanup, label %vector.body
4859 for.cond.cleanup: ; preds = %vector.body
4863 declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i32)
4865 define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i32 zeroext %vl) {
4866 ; CHECK-LABEL: sink_splat_vp_icmp:
4867 ; CHECK: # %bb.0: # %entry
4868 ; CHECK-NEXT: vmv1r.v v8, v0
4869 ; CHECK-NEXT: lui a3, 1
4870 ; CHECK-NEXT: add a3, a0, a3
4871 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4872 ; CHECK-NEXT: vmv.v.i v9, 0
4873 ; CHECK-NEXT: .LBB102_1: # %vector.body
4874 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4875 ; CHECK-NEXT: vle32.v v10, (a0)
4876 ; CHECK-NEXT: vmv1r.v v0, v8
4877 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4878 ; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t
4879 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4880 ; CHECK-NEXT: vse32.v v9, (a0), v0.t
4881 ; CHECK-NEXT: addi a0, a0, 16
4882 ; CHECK-NEXT: bne a0, a3, .LBB102_1
4883 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4886 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0
4887 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4888 br label %vector.body
4890 vector.body: ; preds = %vector.body, %entry
4891 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4892 %0 = getelementptr inbounds i32, ptr %x, i64 %index
4893 %wide.load = load <4 x i32>, ptr %0, align 4
4894 %1 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, metadata !"eq", <4 x i1> %m, i32 %vl)
4895 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
4896 %index.next = add nuw i64 %index, 4
4897 %2 = icmp eq i64 %index.next, 1024
4898 br i1 %2, label %for.cond.cleanup, label %vector.body
4900 for.cond.cleanup: ; preds = %vector.body
4904 declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32)
4906 define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zeroext %vl) {
4907 ; CHECK-LABEL: sink_splat_vp_fcmp:
4908 ; CHECK: # %bb.0: # %entry
4909 ; CHECK-NEXT: vmv1r.v v8, v0
4910 ; CHECK-NEXT: lui a2, 1
4911 ; CHECK-NEXT: add a2, a0, a2
4912 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4913 ; CHECK-NEXT: vmv.v.i v9, 0
4914 ; CHECK-NEXT: .LBB103_1: # %vector.body
4915 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4916 ; CHECK-NEXT: vle32.v v10, (a0)
4917 ; CHECK-NEXT: vmv1r.v v0, v8
4918 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
4919 ; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t
4920 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4921 ; CHECK-NEXT: vse32.v v9, (a0), v0.t
4922 ; CHECK-NEXT: addi a0, a0, 16
4923 ; CHECK-NEXT: bne a0, a2, .LBB103_1
4924 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4927 %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0
4928 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4929 br label %vector.body
4931 vector.body: ; preds = %vector.body, %entry
4932 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4933 %0 = getelementptr inbounds float, ptr %x, i64 %index
4934 %wide.load = load <4 x float>, ptr %0, align 4
4935 %1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, metadata !"oeq", <4 x i1> %m, i32 %vl)
4936 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
4937 %index.next = add nuw i64 %index, 4
4938 %2 = icmp eq i64 %index.next, 1024
4939 br i1 %2, label %for.cond.cleanup, label %vector.body
4941 for.cond.cleanup: ; preds = %vector.body
4945 declare <4 x i32> @llvm.vp.smin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
4947 define void @sink_splat_vp_min(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
4948 ; CHECK-LABEL: sink_splat_vp_min:
4949 ; CHECK: # %bb.0: # %entry
4950 ; CHECK-NEXT: lui a3, 1
4951 ; CHECK-NEXT: add a3, a0, a3
4952 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4953 ; CHECK-NEXT: .LBB104_1: # %vector.body
4954 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4955 ; CHECK-NEXT: vle32.v v8, (a0)
4956 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4957 ; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t
4958 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4959 ; CHECK-NEXT: vse32.v v8, (a0)
4960 ; CHECK-NEXT: addi a0, a0, 16
4961 ; CHECK-NEXT: bne a0, a3, .LBB104_1
4962 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4965 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
4966 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4967 br label %vector.body
4969 vector.body: ; preds = %vector.body, %entry
4970 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4971 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4972 %wide.load = load <4 x i32>, ptr %0, align 4
4973 %1 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
4974 store <4 x i32> %1, ptr %0, align 4
4975 %index.next = add nuw i64 %index, 4
4976 %2 = icmp eq i64 %index.next, 1024
4977 br i1 %2, label %for.cond.cleanup, label %vector.body
4979 for.cond.cleanup: ; preds = %vector.body
4983 define void @sink_splat_vp_min_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
4984 ; CHECK-LABEL: sink_splat_vp_min_commute:
4985 ; CHECK: # %bb.0: # %entry
4986 ; CHECK-NEXT: lui a3, 1
4987 ; CHECK-NEXT: add a3, a0, a3
4988 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4989 ; CHECK-NEXT: .LBB105_1: # %vector.body
4990 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4991 ; CHECK-NEXT: vle32.v v8, (a0)
4992 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4993 ; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t
4994 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4995 ; CHECK-NEXT: vse32.v v8, (a0)
4996 ; CHECK-NEXT: addi a0, a0, 16
4997 ; CHECK-NEXT: bne a0, a3, .LBB105_1
4998 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5001 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5002 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5003 br label %vector.body
5005 vector.body: ; preds = %vector.body, %entry
5006 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5007 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5008 %wide.load = load <4 x i32>, ptr %0, align 4
5009 %1 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5010 store <4 x i32> %1, ptr %0, align 4
5011 %index.next = add nuw i64 %index, 4
5012 %2 = icmp eq i64 %index.next, 1024
5013 br i1 %2, label %for.cond.cleanup, label %vector.body
5015 for.cond.cleanup: ; preds = %vector.body
5019 declare <4 x i32> @llvm.vp.smax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5021 define void @sink_splat_vp_max(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5022 ; CHECK-LABEL: sink_splat_vp_max:
5023 ; CHECK: # %bb.0: # %entry
5024 ; CHECK-NEXT: lui a3, 1
5025 ; CHECK-NEXT: add a3, a0, a3
5026 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5027 ; CHECK-NEXT: .LBB106_1: # %vector.body
5028 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5029 ; CHECK-NEXT: vle32.v v8, (a0)
5030 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5031 ; CHECK-NEXT: vmax.vx v8, v8, a1, v0.t
5032 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5033 ; CHECK-NEXT: vse32.v v8, (a0)
5034 ; CHECK-NEXT: addi a0, a0, 16
5035 ; CHECK-NEXT: bne a0, a3, .LBB106_1
5036 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5039 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5040 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5041 br label %vector.body
5043 vector.body: ; preds = %vector.body, %entry
5044 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5045 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5046 %wide.load = load <4 x i32>, ptr %0, align 4
5047 %1 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5048 store <4 x i32> %1, ptr %0, align 4
5049 %index.next = add nuw i64 %index, 4
5050 %2 = icmp eq i64 %index.next, 1024
5051 br i1 %2, label %for.cond.cleanup, label %vector.body
5053 for.cond.cleanup: ; preds = %vector.body
5057 define void @sink_splat_vp_max_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5058 ; CHECK-LABEL: sink_splat_vp_max_commute:
5059 ; CHECK: # %bb.0: # %entry
5060 ; CHECK-NEXT: lui a3, 1
5061 ; CHECK-NEXT: add a3, a0, a3
5062 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5063 ; CHECK-NEXT: .LBB107_1: # %vector.body
5064 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5065 ; CHECK-NEXT: vle32.v v8, (a0)
5066 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5067 ; CHECK-NEXT: vmax.vx v8, v8, a1, v0.t
5068 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5069 ; CHECK-NEXT: vse32.v v8, (a0)
5070 ; CHECK-NEXT: addi a0, a0, 16
5071 ; CHECK-NEXT: bne a0, a3, .LBB107_1
5072 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5075 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5076 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5077 br label %vector.body
5079 vector.body: ; preds = %vector.body, %entry
5080 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5081 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5082 %wide.load = load <4 x i32>, ptr %0, align 4
5083 %1 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5084 store <4 x i32> %1, ptr %0, align 4
5085 %index.next = add nuw i64 %index, 4
5086 %2 = icmp eq i64 %index.next, 1024
5087 br i1 %2, label %for.cond.cleanup, label %vector.body
5089 for.cond.cleanup: ; preds = %vector.body
5093 define void @sink_splat_vp_umin_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5094 ; CHECK-LABEL: sink_splat_vp_umin_commute:
5095 ; CHECK: # %bb.0: # %entry
5096 ; CHECK-NEXT: lui a3, 1
5097 ; CHECK-NEXT: add a3, a0, a3
5098 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5099 ; CHECK-NEXT: .LBB108_1: # %vector.body
5100 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5101 ; CHECK-NEXT: vle32.v v8, (a0)
5102 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5103 ; CHECK-NEXT: vminu.vx v8, v8, a1, v0.t
5104 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5105 ; CHECK-NEXT: vse32.v v8, (a0)
5106 ; CHECK-NEXT: addi a0, a0, 16
5107 ; CHECK-NEXT: bne a0, a3, .LBB108_1
5108 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5111 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5112 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5113 br label %vector.body
5115 vector.body: ; preds = %vector.body, %entry
5116 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5117 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5118 %wide.load = load <4 x i32>, ptr %0, align 4
5119 %1 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5120 store <4 x i32> %1, ptr %0, align 4
5121 %index.next = add nuw i64 %index, 4
5122 %2 = icmp eq i64 %index.next, 1024
5123 br i1 %2, label %for.cond.cleanup, label %vector.body
5125 for.cond.cleanup: ; preds = %vector.body
5129 declare <4 x i32> @llvm.vp.umax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5131 define void @sink_splat_vp_umax(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5132 ; CHECK-LABEL: sink_splat_vp_umax:
5133 ; CHECK: # %bb.0: # %entry
5134 ; CHECK-NEXT: lui a3, 1
5135 ; CHECK-NEXT: add a3, a0, a3
5136 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5137 ; CHECK-NEXT: .LBB109_1: # %vector.body
5138 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5139 ; CHECK-NEXT: vle32.v v8, (a0)
5140 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5141 ; CHECK-NEXT: vmaxu.vx v8, v8, a1, v0.t
5142 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5143 ; CHECK-NEXT: vse32.v v8, (a0)
5144 ; CHECK-NEXT: addi a0, a0, 16
5145 ; CHECK-NEXT: bne a0, a3, .LBB109_1
5146 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5149 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5150 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5151 br label %vector.body
5153 vector.body: ; preds = %vector.body, %entry
5154 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5155 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5156 %wide.load = load <4 x i32>, ptr %0, align 4
5157 %1 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5158 store <4 x i32> %1, ptr %0, align 4
5159 %index.next = add nuw i64 %index, 4
5160 %2 = icmp eq i64 %index.next, 1024
5161 br i1 %2, label %for.cond.cleanup, label %vector.body
5163 for.cond.cleanup: ; preds = %vector.body
5167 define void @sink_splat_vp_umax_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5168 ; CHECK-LABEL: sink_splat_vp_umax_commute:
5169 ; CHECK: # %bb.0: # %entry
5170 ; CHECK-NEXT: lui a3, 1
5171 ; CHECK-NEXT: add a3, a0, a3
5172 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5173 ; CHECK-NEXT: .LBB110_1: # %vector.body
5174 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5175 ; CHECK-NEXT: vle32.v v8, (a0)
5176 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5177 ; CHECK-NEXT: vmaxu.vx v8, v8, a1, v0.t
5178 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5179 ; CHECK-NEXT: vse32.v v8, (a0)
5180 ; CHECK-NEXT: addi a0, a0, 16
5181 ; CHECK-NEXT: bne a0, a3, .LBB110_1
5182 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5185 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5186 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5187 br label %vector.body
5189 vector.body: ; preds = %vector.body, %entry
5190 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5191 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5192 %wide.load = load <4 x i32>, ptr %0, align 4
5193 %1 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5194 store <4 x i32> %1, ptr %0, align 4
5195 %index.next = add nuw i64 %index, 4
5196 %2 = icmp eq i64 %index.next, 1024
5197 br i1 %2, label %for.cond.cleanup, label %vector.body
5199 for.cond.cleanup: ; preds = %vector.body
5203 declare <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5205 define void @sink_splat_vp_sadd_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5206 ; CHECK-LABEL: sink_splat_vp_sadd_sat:
5207 ; CHECK: # %bb.0: # %entry
5208 ; CHECK-NEXT: lui a3, 1
5209 ; CHECK-NEXT: add a3, a0, a3
5210 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5211 ; CHECK-NEXT: .LBB111_1: # %vector.body
5212 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5213 ; CHECK-NEXT: vle32.v v8, (a0)
5214 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5215 ; CHECK-NEXT: vsadd.vx v8, v8, a1, v0.t
5216 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5217 ; CHECK-NEXT: vse32.v v8, (a0)
5218 ; CHECK-NEXT: addi a0, a0, 16
5219 ; CHECK-NEXT: bne a0, a3, .LBB111_1
5220 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5223 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5224 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5225 br label %vector.body
5227 vector.body: ; preds = %vector.body, %entry
5228 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5229 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5230 %wide.load = load <4 x i32>, ptr %0, align 4
5231 %1 = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5232 store <4 x i32> %1, ptr %0, align 4
5233 %index.next = add nuw i64 %index, 4
5234 %2 = icmp eq i64 %index.next, 1024
5235 br i1 %2, label %for.cond.cleanup, label %vector.body
5237 for.cond.cleanup: ; preds = %vector.body
5241 define void @sink_splat_vp_sadd_sat_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5242 ; CHECK-LABEL: sink_splat_vp_sadd_sat_commute:
5243 ; CHECK: # %bb.0: # %entry
5244 ; CHECK-NEXT: lui a3, 1
5245 ; CHECK-NEXT: add a3, a0, a3
5246 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5247 ; CHECK-NEXT: .LBB112_1: # %vector.body
5248 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5249 ; CHECK-NEXT: vle32.v v8, (a0)
5250 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5251 ; CHECK-NEXT: vsadd.vx v8, v8, a1, v0.t
5252 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5253 ; CHECK-NEXT: vse32.v v8, (a0)
5254 ; CHECK-NEXT: addi a0, a0, 16
5255 ; CHECK-NEXT: bne a0, a3, .LBB112_1
5256 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5259 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5260 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5261 br label %vector.body
5263 vector.body: ; preds = %vector.body, %entry
5264 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5265 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5266 %wide.load = load <4 x i32>, ptr %0, align 4
5267 %1 = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5268 store <4 x i32> %1, ptr %0, align 4
5269 %index.next = add nuw i64 %index, 4
5270 %2 = icmp eq i64 %index.next, 1024
5271 br i1 %2, label %for.cond.cleanup, label %vector.body
5273 for.cond.cleanup: ; preds = %vector.body
5277 declare <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5279 define void @sink_splat_vp_ssub_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5280 ; CHECK-LABEL: sink_splat_vp_ssub_sat:
5281 ; CHECK: # %bb.0: # %entry
5282 ; CHECK-NEXT: li a3, 1024
5283 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5284 ; CHECK-NEXT: .LBB113_1: # %vector.body
5285 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5286 ; CHECK-NEXT: vle32.v v8, (a0)
5287 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5288 ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
5289 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5290 ; CHECK-NEXT: vse32.v v8, (a0)
5291 ; CHECK-NEXT: addi a3, a3, 4
5292 ; CHECK-NEXT: addi a0, a0, -16
5293 ; CHECK-NEXT: bnez a3, .LBB113_1
5294 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5297 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5298 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5299 br label %vector.body
5301 vector.body: ; preds = %vector.body, %entry
5302 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5303 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5304 %wide.load = load <4 x i32>, ptr %0, align 4
5305 %1 = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5306 store <4 x i32> %1, ptr %0, align 4
5307 %index.next = sub nuw i64 %index, 4
5308 %2 = icmp eq i64 %index.next, 1024
5309 br i1 %2, label %for.cond.cleanup, label %vector.body
5311 for.cond.cleanup: ; preds = %vector.body
5315 declare <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5317 define void @sink_splat_vp_uadd_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5318 ; CHECK-LABEL: sink_splat_vp_uadd_sat:
5319 ; CHECK: # %bb.0: # %entry
5320 ; CHECK-NEXT: lui a3, 1
5321 ; CHECK-NEXT: add a3, a0, a3
5322 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5323 ; CHECK-NEXT: .LBB114_1: # %vector.body
5324 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5325 ; CHECK-NEXT: vle32.v v8, (a0)
5326 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5327 ; CHECK-NEXT: vsaddu.vx v8, v8, a1, v0.t
5328 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5329 ; CHECK-NEXT: vse32.v v8, (a0)
5330 ; CHECK-NEXT: addi a0, a0, 16
5331 ; CHECK-NEXT: bne a0, a3, .LBB114_1
5332 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5335 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5336 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5337 br label %vector.body
5339 vector.body: ; preds = %vector.body, %entry
5340 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5341 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5342 %wide.load = load <4 x i32>, ptr %0, align 4
5343 %1 = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5344 store <4 x i32> %1, ptr %0, align 4
5345 %index.next = add nuw i64 %index, 4
5346 %2 = icmp eq i64 %index.next, 1024
5347 br i1 %2, label %for.cond.cleanup, label %vector.body
5349 for.cond.cleanup: ; preds = %vector.body
5353 define void @sink_splat_vp_uadd_sat_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5354 ; CHECK-LABEL: sink_splat_vp_uadd_sat_commute:
5355 ; CHECK: # %bb.0: # %entry
5356 ; CHECK-NEXT: lui a3, 1
5357 ; CHECK-NEXT: add a3, a0, a3
5358 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5359 ; CHECK-NEXT: .LBB115_1: # %vector.body
5360 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5361 ; CHECK-NEXT: vle32.v v8, (a0)
5362 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5363 ; CHECK-NEXT: vsaddu.vx v8, v8, a1, v0.t
5364 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5365 ; CHECK-NEXT: vse32.v v8, (a0)
5366 ; CHECK-NEXT: addi a0, a0, 16
5367 ; CHECK-NEXT: bne a0, a3, .LBB115_1
5368 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5371 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5372 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5373 br label %vector.body
5375 vector.body: ; preds = %vector.body, %entry
5376 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5377 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5378 %wide.load = load <4 x i32>, ptr %0, align 4
5379 %1 = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5380 store <4 x i32> %1, ptr %0, align 4
5381 %index.next = add nuw i64 %index, 4
5382 %2 = icmp eq i64 %index.next, 1024
5383 br i1 %2, label %for.cond.cleanup, label %vector.body
5385 for.cond.cleanup: ; preds = %vector.body
5389 declare <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5391 define void @sink_splat_vp_usub_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5392 ; CHECK-LABEL: sink_splat_vp_usub_sat:
5393 ; CHECK: # %bb.0: # %entry
5394 ; CHECK-NEXT: li a3, 1024
5395 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5396 ; CHECK-NEXT: .LBB116_1: # %vector.body
5397 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5398 ; CHECK-NEXT: vle32.v v8, (a0)
5399 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
5400 ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
5401 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5402 ; CHECK-NEXT: vse32.v v8, (a0)
5403 ; CHECK-NEXT: addi a3, a3, 4
5404 ; CHECK-NEXT: addi a0, a0, -16
5405 ; CHECK-NEXT: bnez a3, .LBB116_1
5406 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5409 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5410 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5411 br label %vector.body
5413 vector.body: ; preds = %vector.body, %entry
5414 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5415 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5416 %wide.load = load <4 x i32>, ptr %0, align 4
5417 %1 = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5418 store <4 x i32> %1, ptr %0, align 4
5419 %index.next = sub nuw i64 %index, 4
5420 %2 = icmp eq i64 %index.next, 1024
5421 br i1 %2, label %for.cond.cleanup, label %vector.body
5423 for.cond.cleanup: ; preds = %vector.body
5427 define void @sink_splat_select(ptr nocapture %a, i32 signext %x) {
5428 ; CHECK-LABEL: sink_splat_select:
5429 ; CHECK: # %bb.0: # %entry
5430 ; CHECK-NEXT: lui a2, 1
5431 ; CHECK-NEXT: add a2, a0, a2
5432 ; CHECK-NEXT: li a3, 42
5433 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5434 ; CHECK-NEXT: .LBB117_1: # %vector.body
5435 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5436 ; CHECK-NEXT: vle32.v v8, (a0)
5437 ; CHECK-NEXT: vmseq.vx v0, v8, a3
5438 ; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0
5439 ; CHECK-NEXT: vse32.v v8, (a0)
5440 ; CHECK-NEXT: addi a0, a0, 16
5441 ; CHECK-NEXT: bne a0, a2, .LBB117_1
5442 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5445 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5446 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5447 br label %vector.body
5449 vector.body: ; preds = %vector.body, %entry
5450 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5451 %0 = getelementptr inbounds i32, ptr %a, i64 %index
5452 %load = load <4 x i32>, ptr %0, align 4
5453 %cond = icmp eq <4 x i32> %load, splat (i32 42)
5454 %1 = select <4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load
5455 store <4 x i32> %1, ptr %0, align 4
5456 %index.next = add nuw i64 %index, 4
5457 %2 = icmp eq i64 %index.next, 1024
5458 br i1 %2, label %for.cond.cleanup, label %vector.body
5460 for.cond.cleanup: ; preds = %vector.body