1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f -target-abi=lp64f \
5 define void @sink_splat_mul(ptr nocapture %a, i32 signext %x) {
6 ; CHECK-LABEL: sink_splat_mul:
7 ; CHECK: # %bb.0: # %entry
8 ; CHECK-NEXT: li a2, 1024
9 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
10 ; CHECK-NEXT: .LBB0_1: # %vector.body
11 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
12 ; CHECK-NEXT: vle32.v v8, (a0)
13 ; CHECK-NEXT: vmul.vx v8, v8, a1
14 ; CHECK-NEXT: vse32.v v8, (a0)
15 ; CHECK-NEXT: addi a2, a2, -4
16 ; CHECK-NEXT: addi a0, a0, 16
17 ; CHECK-NEXT: bnez a2, .LBB0_1
18 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
21 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
22 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
25 vector.body: ; preds = %vector.body, %entry
26 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
27 %0 = getelementptr inbounds i32, ptr %a, i64 %index
28 %wide.load = load <4 x i32>, ptr %0, align 4
29 %1 = mul <4 x i32> %wide.load, %broadcast.splat
30 store <4 x i32> %1, ptr %0, align 4
31 %index.next = add nuw i64 %index, 4
32 %2 = icmp eq i64 %index.next, 1024
33 br i1 %2, label %for.cond.cleanup, label %vector.body
35 for.cond.cleanup: ; preds = %vector.body
39 define void @sink_splat_add(ptr nocapture %a, i32 signext %x) {
40 ; CHECK-LABEL: sink_splat_add:
41 ; CHECK: # %bb.0: # %entry
42 ; CHECK-NEXT: li a2, 1024
43 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
44 ; CHECK-NEXT: .LBB1_1: # %vector.body
45 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
46 ; CHECK-NEXT: vle32.v v8, (a0)
47 ; CHECK-NEXT: vadd.vx v8, v8, a1
48 ; CHECK-NEXT: vse32.v v8, (a0)
49 ; CHECK-NEXT: addi a2, a2, -4
50 ; CHECK-NEXT: addi a0, a0, 16
51 ; CHECK-NEXT: bnez a2, .LBB1_1
52 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
55 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
56 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
59 vector.body: ; preds = %vector.body, %entry
60 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
61 %0 = getelementptr inbounds i32, ptr %a, i64 %index
62 %wide.load = load <4 x i32>, ptr %0, align 4
63 %1 = add <4 x i32> %wide.load, %broadcast.splat
64 store <4 x i32> %1, ptr %0, align 4
65 %index.next = add nuw i64 %index, 4
66 %2 = icmp eq i64 %index.next, 1024
67 br i1 %2, label %for.cond.cleanup, label %vector.body
69 for.cond.cleanup: ; preds = %vector.body
73 define void @sink_splat_sub(ptr nocapture %a, i32 signext %x) {
74 ; CHECK-LABEL: sink_splat_sub:
75 ; CHECK: # %bb.0: # %entry
76 ; CHECK-NEXT: li a2, 1024
77 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
78 ; CHECK-NEXT: .LBB2_1: # %vector.body
79 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
80 ; CHECK-NEXT: vle32.v v8, (a0)
81 ; CHECK-NEXT: vsub.vx v8, v8, a1
82 ; CHECK-NEXT: vse32.v v8, (a0)
83 ; CHECK-NEXT: addi a2, a2, -4
84 ; CHECK-NEXT: addi a0, a0, 16
85 ; CHECK-NEXT: bnez a2, .LBB2_1
86 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
89 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
90 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
93 vector.body: ; preds = %vector.body, %entry
94 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
95 %0 = getelementptr inbounds i32, ptr %a, i64 %index
96 %wide.load = load <4 x i32>, ptr %0, align 4
97 %1 = sub <4 x i32> %wide.load, %broadcast.splat
98 store <4 x i32> %1, ptr %0, align 4
99 %index.next = add nuw i64 %index, 4
100 %2 = icmp eq i64 %index.next, 1024
101 br i1 %2, label %for.cond.cleanup, label %vector.body
103 for.cond.cleanup: ; preds = %vector.body
107 define void @sink_splat_rsub(ptr nocapture %a, i32 signext %x) {
108 ; CHECK-LABEL: sink_splat_rsub:
109 ; CHECK: # %bb.0: # %entry
110 ; CHECK-NEXT: li a2, 1024
111 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
112 ; CHECK-NEXT: .LBB3_1: # %vector.body
113 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
114 ; CHECK-NEXT: vle32.v v8, (a0)
115 ; CHECK-NEXT: vrsub.vx v8, v8, a1
116 ; CHECK-NEXT: vse32.v v8, (a0)
117 ; CHECK-NEXT: addi a2, a2, -4
118 ; CHECK-NEXT: addi a0, a0, 16
119 ; CHECK-NEXT: bnez a2, .LBB3_1
120 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
123 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
124 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
125 br label %vector.body
127 vector.body: ; preds = %vector.body, %entry
128 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
129 %0 = getelementptr inbounds i32, ptr %a, i64 %index
130 %wide.load = load <4 x i32>, ptr %0, align 4
131 %1 = sub <4 x i32> %broadcast.splat, %wide.load
132 store <4 x i32> %1, ptr %0, align 4
133 %index.next = add nuw i64 %index, 4
134 %2 = icmp eq i64 %index.next, 1024
135 br i1 %2, label %for.cond.cleanup, label %vector.body
137 for.cond.cleanup: ; preds = %vector.body
141 define void @sink_splat_and(ptr nocapture %a, i32 signext %x) {
142 ; CHECK-LABEL: sink_splat_and:
143 ; CHECK: # %bb.0: # %entry
144 ; CHECK-NEXT: li a2, 1024
145 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
146 ; CHECK-NEXT: .LBB4_1: # %vector.body
147 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
148 ; CHECK-NEXT: vle32.v v8, (a0)
149 ; CHECK-NEXT: vand.vx v8, v8, a1
150 ; CHECK-NEXT: vse32.v v8, (a0)
151 ; CHECK-NEXT: addi a2, a2, -4
152 ; CHECK-NEXT: addi a0, a0, 16
153 ; CHECK-NEXT: bnez a2, .LBB4_1
154 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
157 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
158 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
159 br label %vector.body
161 vector.body: ; preds = %vector.body, %entry
162 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
163 %0 = getelementptr inbounds i32, ptr %a, i64 %index
164 %wide.load = load <4 x i32>, ptr %0, align 4
165 %1 = and <4 x i32> %wide.load, %broadcast.splat
166 store <4 x i32> %1, ptr %0, align 4
167 %index.next = add nuw i64 %index, 4
168 %2 = icmp eq i64 %index.next, 1024
169 br i1 %2, label %for.cond.cleanup, label %vector.body
171 for.cond.cleanup: ; preds = %vector.body
175 define void @sink_splat_or(ptr nocapture %a, i32 signext %x) {
176 ; CHECK-LABEL: sink_splat_or:
177 ; CHECK: # %bb.0: # %entry
178 ; CHECK-NEXT: li a2, 1024
179 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
180 ; CHECK-NEXT: .LBB5_1: # %vector.body
181 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
182 ; CHECK-NEXT: vle32.v v8, (a0)
183 ; CHECK-NEXT: vor.vx v8, v8, a1
184 ; CHECK-NEXT: vse32.v v8, (a0)
185 ; CHECK-NEXT: addi a2, a2, -4
186 ; CHECK-NEXT: addi a0, a0, 16
187 ; CHECK-NEXT: bnez a2, .LBB5_1
188 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
191 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
192 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
193 br label %vector.body
195 vector.body: ; preds = %vector.body, %entry
196 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
197 %0 = getelementptr inbounds i32, ptr %a, i64 %index
198 %wide.load = load <4 x i32>, ptr %0, align 4
199 %1 = or <4 x i32> %wide.load, %broadcast.splat
200 store <4 x i32> %1, ptr %0, align 4
201 %index.next = add nuw i64 %index, 4
202 %2 = icmp eq i64 %index.next, 1024
203 br i1 %2, label %for.cond.cleanup, label %vector.body
205 for.cond.cleanup: ; preds = %vector.body
209 define void @sink_splat_xor(ptr nocapture %a, i32 signext %x) {
210 ; CHECK-LABEL: sink_splat_xor:
211 ; CHECK: # %bb.0: # %entry
212 ; CHECK-NEXT: li a2, 1024
213 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
214 ; CHECK-NEXT: .LBB6_1: # %vector.body
215 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
216 ; CHECK-NEXT: vle32.v v8, (a0)
217 ; CHECK-NEXT: vxor.vx v8, v8, a1
218 ; CHECK-NEXT: vse32.v v8, (a0)
219 ; CHECK-NEXT: addi a2, a2, -4
220 ; CHECK-NEXT: addi a0, a0, 16
221 ; CHECK-NEXT: bnez a2, .LBB6_1
222 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
225 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
226 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
227 br label %vector.body
229 vector.body: ; preds = %vector.body, %entry
230 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
231 %0 = getelementptr inbounds i32, ptr %a, i64 %index
232 %wide.load = load <4 x i32>, ptr %0, align 4
233 %1 = xor <4 x i32> %wide.load, %broadcast.splat
234 store <4 x i32> %1, ptr %0, align 4
235 %index.next = add nuw i64 %index, 4
236 %2 = icmp eq i64 %index.next, 1024
237 br i1 %2, label %for.cond.cleanup, label %vector.body
239 for.cond.cleanup: ; preds = %vector.body
243 define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
244 ; CHECK-LABEL: sink_splat_mul_scalable:
245 ; CHECK: # %bb.0: # %entry
246 ; CHECK-NEXT: csrr a5, vlenb
247 ; CHECK-NEXT: srli a3, a5, 1
248 ; CHECK-NEXT: li a2, 1024
249 ; CHECK-NEXT: bgeu a2, a3, .LBB7_2
250 ; CHECK-NEXT: # %bb.1:
251 ; CHECK-NEXT: li a2, 0
252 ; CHECK-NEXT: j .LBB7_5
253 ; CHECK-NEXT: .LBB7_2: # %vector.ph
254 ; CHECK-NEXT: addi a2, a3, -1
255 ; CHECK-NEXT: andi a4, a2, 1024
256 ; CHECK-NEXT: xori a2, a4, 1024
257 ; CHECK-NEXT: slli a5, a5, 1
258 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
259 ; CHECK-NEXT: mv a6, a0
260 ; CHECK-NEXT: mv a7, a2
261 ; CHECK-NEXT: .LBB7_3: # %vector.body
262 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
263 ; CHECK-NEXT: vl2re32.v v8, (a6)
264 ; CHECK-NEXT: vmul.vx v8, v8, a1
265 ; CHECK-NEXT: vs2r.v v8, (a6)
266 ; CHECK-NEXT: sub a7, a7, a3
267 ; CHECK-NEXT: add a6, a6, a5
268 ; CHECK-NEXT: bnez a7, .LBB7_3
269 ; CHECK-NEXT: # %bb.4: # %middle.block
270 ; CHECK-NEXT: beqz a4, .LBB7_7
271 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader
272 ; CHECK-NEXT: addi a3, a2, -1024
273 ; CHECK-NEXT: slli a2, a2, 2
274 ; CHECK-NEXT: add a0, a0, a2
275 ; CHECK-NEXT: .LBB7_6: # %for.body
276 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
277 ; CHECK-NEXT: lw a2, 0(a0)
278 ; CHECK-NEXT: mul a2, a2, a1
279 ; CHECK-NEXT: sw a2, 0(a0)
280 ; CHECK-NEXT: addi a3, a3, 1
281 ; CHECK-NEXT: addi a0, a0, 4
282 ; CHECK-NEXT: bnez a3, .LBB7_6
283 ; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
286 %0 = call i64 @llvm.vscale.i64()
288 %min.iters.check = icmp ugt i64 %1, 1024
289 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
291 vector.ph: ; preds = %entry
292 %2 = call i64 @llvm.vscale.i64()
294 %n.mod.vf = urem i64 1024, %3
295 %n.vec = sub nsw i64 1024, %n.mod.vf
296 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
297 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
298 %4 = call i64 @llvm.vscale.i64()
300 br label %vector.body
302 vector.body: ; preds = %vector.body, %vector.ph
303 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
304 %6 = getelementptr inbounds i32, ptr %a, i64 %index
305 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
306 %7 = mul <vscale x 4 x i32> %wide.load, %broadcast.splat
307 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
308 %index.next = add nuw i64 %index, %5
309 %8 = icmp eq i64 %index.next, %n.vec
310 br i1 %8, label %middle.block, label %vector.body
312 middle.block: ; preds = %vector.body
313 %cmp.n = icmp eq i64 %n.mod.vf, 0
314 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
316 for.body.preheader: ; preds = %entry, %middle.block
317 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
320 for.cond.cleanup: ; preds = %for.body, %middle.block
323 for.body: ; preds = %for.body.preheader, %for.body
324 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
325 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
326 %9 = load i32, ptr %arrayidx, align 4
327 %mul = mul i32 %9, %x
328 store i32 %mul, ptr %arrayidx, align 4
329 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
330 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
331 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
334 define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
335 ; CHECK-LABEL: sink_splat_add_scalable:
336 ; CHECK: # %bb.0: # %entry
337 ; CHECK-NEXT: csrr a5, vlenb
338 ; CHECK-NEXT: srli a3, a5, 1
339 ; CHECK-NEXT: li a2, 1024
340 ; CHECK-NEXT: bgeu a2, a3, .LBB8_2
341 ; CHECK-NEXT: # %bb.1:
342 ; CHECK-NEXT: li a2, 0
343 ; CHECK-NEXT: j .LBB8_5
344 ; CHECK-NEXT: .LBB8_2: # %vector.ph
345 ; CHECK-NEXT: addi a2, a3, -1
346 ; CHECK-NEXT: andi a4, a2, 1024
347 ; CHECK-NEXT: xori a2, a4, 1024
348 ; CHECK-NEXT: slli a5, a5, 1
349 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
350 ; CHECK-NEXT: mv a6, a0
351 ; CHECK-NEXT: mv a7, a2
352 ; CHECK-NEXT: .LBB8_3: # %vector.body
353 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
354 ; CHECK-NEXT: vl2re32.v v8, (a6)
355 ; CHECK-NEXT: vadd.vx v8, v8, a1
356 ; CHECK-NEXT: vs2r.v v8, (a6)
357 ; CHECK-NEXT: sub a7, a7, a3
358 ; CHECK-NEXT: add a6, a6, a5
359 ; CHECK-NEXT: bnez a7, .LBB8_3
360 ; CHECK-NEXT: # %bb.4: # %middle.block
361 ; CHECK-NEXT: beqz a4, .LBB8_7
362 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader
363 ; CHECK-NEXT: addi a3, a2, -1024
364 ; CHECK-NEXT: slli a2, a2, 2
365 ; CHECK-NEXT: add a0, a0, a2
366 ; CHECK-NEXT: .LBB8_6: # %for.body
367 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
368 ; CHECK-NEXT: lw a2, 0(a0)
369 ; CHECK-NEXT: add a2, a2, a1
370 ; CHECK-NEXT: sw a2, 0(a0)
371 ; CHECK-NEXT: addi a3, a3, 1
372 ; CHECK-NEXT: addi a0, a0, 4
373 ; CHECK-NEXT: bnez a3, .LBB8_6
374 ; CHECK-NEXT: .LBB8_7: # %for.cond.cleanup
377 %0 = call i64 @llvm.vscale.i64()
379 %min.iters.check = icmp ugt i64 %1, 1024
380 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
382 vector.ph: ; preds = %entry
383 %2 = call i64 @llvm.vscale.i64()
385 %n.mod.vf = urem i64 1024, %3
386 %n.vec = sub nsw i64 1024, %n.mod.vf
387 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
388 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
389 %4 = call i64 @llvm.vscale.i64()
391 br label %vector.body
393 vector.body: ; preds = %vector.body, %vector.ph
394 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
395 %6 = getelementptr inbounds i32, ptr %a, i64 %index
396 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
397 %7 = add <vscale x 4 x i32> %wide.load, %broadcast.splat
398 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
399 %index.next = add nuw i64 %index, %5
400 %8 = icmp eq i64 %index.next, %n.vec
401 br i1 %8, label %middle.block, label %vector.body
403 middle.block: ; preds = %vector.body
404 %cmp.n = icmp eq i64 %n.mod.vf, 0
405 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
407 for.body.preheader: ; preds = %entry, %middle.block
408 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
411 for.cond.cleanup: ; preds = %for.body, %middle.block
414 for.body: ; preds = %for.body.preheader, %for.body
415 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
416 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
417 %9 = load i32, ptr %arrayidx, align 4
418 %add = add i32 %9, %x
419 store i32 %add, ptr %arrayidx, align 4
420 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
421 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
422 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
425 define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
426 ; CHECK-LABEL: sink_splat_sub_scalable:
427 ; CHECK: # %bb.0: # %entry
428 ; CHECK-NEXT: csrr a5, vlenb
429 ; CHECK-NEXT: srli a3, a5, 1
430 ; CHECK-NEXT: li a2, 1024
431 ; CHECK-NEXT: bgeu a2, a3, .LBB9_2
432 ; CHECK-NEXT: # %bb.1:
433 ; CHECK-NEXT: li a2, 0
434 ; CHECK-NEXT: j .LBB9_5
435 ; CHECK-NEXT: .LBB9_2: # %vector.ph
436 ; CHECK-NEXT: addi a2, a3, -1
437 ; CHECK-NEXT: andi a4, a2, 1024
438 ; CHECK-NEXT: xori a2, a4, 1024
439 ; CHECK-NEXT: slli a5, a5, 1
440 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
441 ; CHECK-NEXT: mv a6, a0
442 ; CHECK-NEXT: mv a7, a2
443 ; CHECK-NEXT: .LBB9_3: # %vector.body
444 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
445 ; CHECK-NEXT: vl2re32.v v8, (a6)
446 ; CHECK-NEXT: vsub.vx v8, v8, a1
447 ; CHECK-NEXT: vs2r.v v8, (a6)
448 ; CHECK-NEXT: sub a7, a7, a3
449 ; CHECK-NEXT: add a6, a6, a5
450 ; CHECK-NEXT: bnez a7, .LBB9_3
451 ; CHECK-NEXT: # %bb.4: # %middle.block
452 ; CHECK-NEXT: beqz a4, .LBB9_7
453 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader
454 ; CHECK-NEXT: addi a3, a2, -1024
455 ; CHECK-NEXT: slli a2, a2, 2
456 ; CHECK-NEXT: add a0, a0, a2
457 ; CHECK-NEXT: .LBB9_6: # %for.body
458 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
459 ; CHECK-NEXT: lw a2, 0(a0)
460 ; CHECK-NEXT: add a2, a2, a1
461 ; CHECK-NEXT: sw a2, 0(a0)
462 ; CHECK-NEXT: addi a3, a3, 1
463 ; CHECK-NEXT: addi a0, a0, 4
464 ; CHECK-NEXT: bnez a3, .LBB9_6
465 ; CHECK-NEXT: .LBB9_7: # %for.cond.cleanup
468 %0 = call i64 @llvm.vscale.i64()
470 %min.iters.check = icmp ugt i64 %1, 1024
471 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
473 vector.ph: ; preds = %entry
474 %2 = call i64 @llvm.vscale.i64()
476 %n.mod.vf = urem i64 1024, %3
477 %n.vec = sub nsw i64 1024, %n.mod.vf
478 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
479 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
480 %4 = call i64 @llvm.vscale.i64()
482 br label %vector.body
484 vector.body: ; preds = %vector.body, %vector.ph
485 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
486 %6 = getelementptr inbounds i32, ptr %a, i64 %index
487 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
488 %7 = sub <vscale x 4 x i32> %wide.load, %broadcast.splat
489 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
490 %index.next = add nuw i64 %index, %5
491 %8 = icmp eq i64 %index.next, %n.vec
492 br i1 %8, label %middle.block, label %vector.body
494 middle.block: ; preds = %vector.body
495 %cmp.n = icmp eq i64 %n.mod.vf, 0
496 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
498 for.body.preheader: ; preds = %entry, %middle.block
499 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
502 for.cond.cleanup: ; preds = %for.body, %middle.block
505 for.body: ; preds = %for.body.preheader, %for.body
506 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
507 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
508 %9 = load i32, ptr %arrayidx, align 4
509 %add = add i32 %9, %x
510 store i32 %add, ptr %arrayidx, align 4
511 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
512 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
513 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
516 define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
517 ; CHECK-LABEL: sink_splat_rsub_scalable:
518 ; CHECK: # %bb.0: # %entry
519 ; CHECK-NEXT: csrr a5, vlenb
520 ; CHECK-NEXT: srli a3, a5, 1
521 ; CHECK-NEXT: li a2, 1024
522 ; CHECK-NEXT: bgeu a2, a3, .LBB10_2
523 ; CHECK-NEXT: # %bb.1:
524 ; CHECK-NEXT: li a2, 0
525 ; CHECK-NEXT: j .LBB10_5
526 ; CHECK-NEXT: .LBB10_2: # %vector.ph
527 ; CHECK-NEXT: addi a2, a3, -1
528 ; CHECK-NEXT: andi a4, a2, 1024
529 ; CHECK-NEXT: xori a2, a4, 1024
530 ; CHECK-NEXT: slli a5, a5, 1
531 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
532 ; CHECK-NEXT: mv a6, a0
533 ; CHECK-NEXT: mv a7, a2
534 ; CHECK-NEXT: .LBB10_3: # %vector.body
535 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
536 ; CHECK-NEXT: vl2re32.v v8, (a6)
537 ; CHECK-NEXT: vrsub.vx v8, v8, a1
538 ; CHECK-NEXT: vs2r.v v8, (a6)
539 ; CHECK-NEXT: sub a7, a7, a3
540 ; CHECK-NEXT: add a6, a6, a5
541 ; CHECK-NEXT: bnez a7, .LBB10_3
542 ; CHECK-NEXT: # %bb.4: # %middle.block
543 ; CHECK-NEXT: beqz a4, .LBB10_7
544 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader
545 ; CHECK-NEXT: addi a3, a2, -1024
546 ; CHECK-NEXT: slli a2, a2, 2
547 ; CHECK-NEXT: add a0, a0, a2
548 ; CHECK-NEXT: .LBB10_6: # %for.body
549 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
550 ; CHECK-NEXT: lw a2, 0(a0)
551 ; CHECK-NEXT: subw a2, a1, a2
552 ; CHECK-NEXT: sw a2, 0(a0)
553 ; CHECK-NEXT: addi a3, a3, 1
554 ; CHECK-NEXT: addi a0, a0, 4
555 ; CHECK-NEXT: bnez a3, .LBB10_6
556 ; CHECK-NEXT: .LBB10_7: # %for.cond.cleanup
559 %0 = call i64 @llvm.vscale.i64()
561 %min.iters.check = icmp ugt i64 %1, 1024
562 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
564 vector.ph: ; preds = %entry
565 %2 = call i64 @llvm.vscale.i64()
567 %n.mod.vf = urem i64 1024, %3
568 %n.vec = sub nsw i64 1024, %n.mod.vf
569 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
570 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
571 %4 = call i64 @llvm.vscale.i64()
573 br label %vector.body
575 vector.body: ; preds = %vector.body, %vector.ph
576 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
577 %6 = getelementptr inbounds i32, ptr %a, i64 %index
578 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
579 %7 = sub <vscale x 4 x i32> %broadcast.splat, %wide.load
580 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
581 %index.next = add nuw i64 %index, %5
582 %8 = icmp eq i64 %index.next, %n.vec
583 br i1 %8, label %middle.block, label %vector.body
585 middle.block: ; preds = %vector.body
586 %cmp.n = icmp eq i64 %n.mod.vf, 0
587 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
589 for.body.preheader: ; preds = %entry, %middle.block
590 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
593 for.cond.cleanup: ; preds = %for.body, %middle.block
596 for.body: ; preds = %for.body.preheader, %for.body
597 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
598 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
599 %9 = load i32, ptr %arrayidx, align 4
600 %add = sub i32 %x, %9
601 store i32 %add, ptr %arrayidx, align 4
602 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
603 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
604 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
607 define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
608 ; CHECK-LABEL: sink_splat_and_scalable:
609 ; CHECK: # %bb.0: # %entry
610 ; CHECK-NEXT: csrr a5, vlenb
611 ; CHECK-NEXT: srli a3, a5, 1
612 ; CHECK-NEXT: li a2, 1024
613 ; CHECK-NEXT: bgeu a2, a3, .LBB11_2
614 ; CHECK-NEXT: # %bb.1:
615 ; CHECK-NEXT: li a2, 0
616 ; CHECK-NEXT: j .LBB11_5
617 ; CHECK-NEXT: .LBB11_2: # %vector.ph
618 ; CHECK-NEXT: addi a2, a3, -1
619 ; CHECK-NEXT: andi a4, a2, 1024
620 ; CHECK-NEXT: xori a2, a4, 1024
621 ; CHECK-NEXT: slli a5, a5, 1
622 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
623 ; CHECK-NEXT: mv a6, a0
624 ; CHECK-NEXT: mv a7, a2
625 ; CHECK-NEXT: .LBB11_3: # %vector.body
626 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
627 ; CHECK-NEXT: vl2re32.v v8, (a6)
628 ; CHECK-NEXT: vand.vx v8, v8, a1
629 ; CHECK-NEXT: vs2r.v v8, (a6)
630 ; CHECK-NEXT: sub a7, a7, a3
631 ; CHECK-NEXT: add a6, a6, a5
632 ; CHECK-NEXT: bnez a7, .LBB11_3
633 ; CHECK-NEXT: # %bb.4: # %middle.block
634 ; CHECK-NEXT: beqz a4, .LBB11_7
635 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader
636 ; CHECK-NEXT: addi a3, a2, -1024
637 ; CHECK-NEXT: slli a2, a2, 2
638 ; CHECK-NEXT: add a0, a0, a2
639 ; CHECK-NEXT: .LBB11_6: # %for.body
640 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
641 ; CHECK-NEXT: lw a2, 0(a0)
642 ; CHECK-NEXT: and a2, a2, a1
643 ; CHECK-NEXT: sw a2, 0(a0)
644 ; CHECK-NEXT: addi a3, a3, 1
645 ; CHECK-NEXT: addi a0, a0, 4
646 ; CHECK-NEXT: bnez a3, .LBB11_6
647 ; CHECK-NEXT: .LBB11_7: # %for.cond.cleanup
650 %0 = call i64 @llvm.vscale.i64()
652 %min.iters.check = icmp ugt i64 %1, 1024
653 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
655 vector.ph: ; preds = %entry
656 %2 = call i64 @llvm.vscale.i64()
658 %n.mod.vf = urem i64 1024, %3
659 %n.vec = sub nsw i64 1024, %n.mod.vf
660 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
661 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
662 %4 = call i64 @llvm.vscale.i64()
664 br label %vector.body
666 vector.body: ; preds = %vector.body, %vector.ph
667 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
668 %6 = getelementptr inbounds i32, ptr %a, i64 %index
669 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
670 %7 = and <vscale x 4 x i32> %wide.load, %broadcast.splat
671 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
672 %index.next = add nuw i64 %index, %5
673 %8 = icmp eq i64 %index.next, %n.vec
674 br i1 %8, label %middle.block, label %vector.body
676 middle.block: ; preds = %vector.body
677 %cmp.n = icmp eq i64 %n.mod.vf, 0
678 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
680 for.body.preheader: ; preds = %entry, %middle.block
681 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
684 for.cond.cleanup: ; preds = %for.body, %middle.block
687 for.body: ; preds = %for.body.preheader, %for.body
688 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
689 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
690 %9 = load i32, ptr %arrayidx, align 4
691 %and = and i32 %9, %x
692 store i32 %and, ptr %arrayidx, align 4
693 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
694 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
695 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
698 define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
699 ; CHECK-LABEL: sink_splat_or_scalable:
700 ; CHECK: # %bb.0: # %entry
701 ; CHECK-NEXT: csrr a5, vlenb
702 ; CHECK-NEXT: srli a3, a5, 1
703 ; CHECK-NEXT: li a2, 1024
704 ; CHECK-NEXT: bgeu a2, a3, .LBB12_2
705 ; CHECK-NEXT: # %bb.1:
706 ; CHECK-NEXT: li a2, 0
707 ; CHECK-NEXT: j .LBB12_5
708 ; CHECK-NEXT: .LBB12_2: # %vector.ph
709 ; CHECK-NEXT: addi a2, a3, -1
710 ; CHECK-NEXT: andi a4, a2, 1024
711 ; CHECK-NEXT: xori a2, a4, 1024
712 ; CHECK-NEXT: slli a5, a5, 1
713 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
714 ; CHECK-NEXT: mv a6, a0
715 ; CHECK-NEXT: mv a7, a2
716 ; CHECK-NEXT: .LBB12_3: # %vector.body
717 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
718 ; CHECK-NEXT: vl2re32.v v8, (a6)
719 ; CHECK-NEXT: vor.vx v8, v8, a1
720 ; CHECK-NEXT: vs2r.v v8, (a6)
721 ; CHECK-NEXT: sub a7, a7, a3
722 ; CHECK-NEXT: add a6, a6, a5
723 ; CHECK-NEXT: bnez a7, .LBB12_3
724 ; CHECK-NEXT: # %bb.4: # %middle.block
725 ; CHECK-NEXT: beqz a4, .LBB12_7
726 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader
727 ; CHECK-NEXT: addi a3, a2, -1024
728 ; CHECK-NEXT: slli a2, a2, 2
729 ; CHECK-NEXT: add a0, a0, a2
730 ; CHECK-NEXT: .LBB12_6: # %for.body
731 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
732 ; CHECK-NEXT: lw a2, 0(a0)
733 ; CHECK-NEXT: or a2, a2, a1
734 ; CHECK-NEXT: sw a2, 0(a0)
735 ; CHECK-NEXT: addi a3, a3, 1
736 ; CHECK-NEXT: addi a0, a0, 4
737 ; CHECK-NEXT: bnez a3, .LBB12_6
738 ; CHECK-NEXT: .LBB12_7: # %for.cond.cleanup
741 %0 = call i64 @llvm.vscale.i64()
743 %min.iters.check = icmp ugt i64 %1, 1024
744 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
746 vector.ph: ; preds = %entry
747 %2 = call i64 @llvm.vscale.i64()
749 %n.mod.vf = urem i64 1024, %3
750 %n.vec = sub nsw i64 1024, %n.mod.vf
751 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
752 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
753 %4 = call i64 @llvm.vscale.i64()
755 br label %vector.body
757 vector.body: ; preds = %vector.body, %vector.ph
758 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
759 %6 = getelementptr inbounds i32, ptr %a, i64 %index
760 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
761 %7 = or <vscale x 4 x i32> %wide.load, %broadcast.splat
762 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
763 %index.next = add nuw i64 %index, %5
764 %8 = icmp eq i64 %index.next, %n.vec
765 br i1 %8, label %middle.block, label %vector.body
767 middle.block: ; preds = %vector.body
768 %cmp.n = icmp eq i64 %n.mod.vf, 0
769 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
771 for.body.preheader: ; preds = %entry, %middle.block
772 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
775 for.cond.cleanup: ; preds = %for.body, %middle.block
778 for.body: ; preds = %for.body.preheader, %for.body
779 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
780 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
781 %9 = load i32, ptr %arrayidx, align 4
783 store i32 %or, ptr %arrayidx, align 4
784 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
785 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
786 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
789 define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
790 ; CHECK-LABEL: sink_splat_xor_scalable:
791 ; CHECK: # %bb.0: # %entry
792 ; CHECK-NEXT: csrr a5, vlenb
793 ; CHECK-NEXT: srli a3, a5, 1
794 ; CHECK-NEXT: li a2, 1024
795 ; CHECK-NEXT: bgeu a2, a3, .LBB13_2
796 ; CHECK-NEXT: # %bb.1:
797 ; CHECK-NEXT: li a2, 0
798 ; CHECK-NEXT: j .LBB13_5
799 ; CHECK-NEXT: .LBB13_2: # %vector.ph
800 ; CHECK-NEXT: addi a2, a3, -1
801 ; CHECK-NEXT: andi a4, a2, 1024
802 ; CHECK-NEXT: xori a2, a4, 1024
803 ; CHECK-NEXT: slli a5, a5, 1
804 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
805 ; CHECK-NEXT: mv a6, a0
806 ; CHECK-NEXT: mv a7, a2
807 ; CHECK-NEXT: .LBB13_3: # %vector.body
808 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
809 ; CHECK-NEXT: vl2re32.v v8, (a6)
810 ; CHECK-NEXT: vxor.vx v8, v8, a1
811 ; CHECK-NEXT: vs2r.v v8, (a6)
812 ; CHECK-NEXT: sub a7, a7, a3
813 ; CHECK-NEXT: add a6, a6, a5
814 ; CHECK-NEXT: bnez a7, .LBB13_3
815 ; CHECK-NEXT: # %bb.4: # %middle.block
816 ; CHECK-NEXT: beqz a4, .LBB13_7
817 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader
818 ; CHECK-NEXT: addi a3, a2, -1024
819 ; CHECK-NEXT: slli a2, a2, 2
820 ; CHECK-NEXT: add a0, a0, a2
821 ; CHECK-NEXT: .LBB13_6: # %for.body
822 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
823 ; CHECK-NEXT: lw a2, 0(a0)
824 ; CHECK-NEXT: xor a2, a2, a1
825 ; CHECK-NEXT: sw a2, 0(a0)
826 ; CHECK-NEXT: addi a3, a3, 1
827 ; CHECK-NEXT: addi a0, a0, 4
828 ; CHECK-NEXT: bnez a3, .LBB13_6
829 ; CHECK-NEXT: .LBB13_7: # %for.cond.cleanup
832 %0 = call i64 @llvm.vscale.i64()
834 %min.iters.check = icmp ugt i64 %1, 1024
835 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
837 vector.ph: ; preds = %entry
838 %2 = call i64 @llvm.vscale.i64()
840 %n.mod.vf = urem i64 1024, %3
841 %n.vec = sub nsw i64 1024, %n.mod.vf
842 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
843 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
844 %4 = call i64 @llvm.vscale.i64()
846 br label %vector.body
848 vector.body: ; preds = %vector.body, %vector.ph
849 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
850 %6 = getelementptr inbounds i32, ptr %a, i64 %index
851 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
852 %7 = xor <vscale x 4 x i32> %wide.load, %broadcast.splat
853 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
854 %index.next = add nuw i64 %index, %5
855 %8 = icmp eq i64 %index.next, %n.vec
856 br i1 %8, label %middle.block, label %vector.body
858 middle.block: ; preds = %vector.body
859 %cmp.n = icmp eq i64 %n.mod.vf, 0
860 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
862 for.body.preheader: ; preds = %entry, %middle.block
863 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
866 for.cond.cleanup: ; preds = %for.body, %middle.block
869 for.body: ; preds = %for.body.preheader, %for.body
870 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
871 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
872 %9 = load i32, ptr %arrayidx, align 4
873 %xor = xor i32 %9, %x
874 store i32 %xor, ptr %arrayidx, align 4
875 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
876 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
877 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
880 define void @sink_splat_shl(ptr nocapture %a, i32 signext %x) {
881 ; CHECK-LABEL: sink_splat_shl:
882 ; CHECK: # %bb.0: # %entry
883 ; CHECK-NEXT: li a2, 1024
884 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
885 ; CHECK-NEXT: .LBB14_1: # %vector.body
886 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
887 ; CHECK-NEXT: vle32.v v8, (a0)
888 ; CHECK-NEXT: vsll.vx v8, v8, a1
889 ; CHECK-NEXT: vse32.v v8, (a0)
890 ; CHECK-NEXT: addi a2, a2, -4
891 ; CHECK-NEXT: addi a0, a0, 16
892 ; CHECK-NEXT: bnez a2, .LBB14_1
893 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
896 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
897 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
898 br label %vector.body
900 vector.body: ; preds = %vector.body, %entry
901 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
902 %0 = getelementptr inbounds i32, ptr %a, i64 %index
903 %wide.load = load <4 x i32>, ptr %0, align 4
904 %1 = shl <4 x i32> %wide.load, %broadcast.splat
905 store <4 x i32> %1, ptr %0, align 4
906 %index.next = add nuw i64 %index, 4
907 %2 = icmp eq i64 %index.next, 1024
908 br i1 %2, label %for.cond.cleanup, label %vector.body
910 for.cond.cleanup: ; preds = %vector.body
914 define void @sink_splat_lshr(ptr nocapture %a, i32 signext %x) {
915 ; CHECK-LABEL: sink_splat_lshr:
916 ; CHECK: # %bb.0: # %entry
917 ; CHECK-NEXT: li a2, 1024
918 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
919 ; CHECK-NEXT: .LBB15_1: # %vector.body
920 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
921 ; CHECK-NEXT: vle32.v v8, (a0)
922 ; CHECK-NEXT: vsrl.vx v8, v8, a1
923 ; CHECK-NEXT: vse32.v v8, (a0)
924 ; CHECK-NEXT: addi a2, a2, -4
925 ; CHECK-NEXT: addi a0, a0, 16
926 ; CHECK-NEXT: bnez a2, .LBB15_1
927 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
930 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
931 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
932 br label %vector.body
934 vector.body: ; preds = %vector.body, %entry
935 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
936 %0 = getelementptr inbounds i32, ptr %a, i64 %index
937 %wide.load = load <4 x i32>, ptr %0, align 4
938 %1 = lshr <4 x i32> %wide.load, %broadcast.splat
939 store <4 x i32> %1, ptr %0, align 4
940 %index.next = add nuw i64 %index, 4
941 %2 = icmp eq i64 %index.next, 1024
942 br i1 %2, label %for.cond.cleanup, label %vector.body
944 for.cond.cleanup: ; preds = %vector.body
948 define void @sink_splat_ashr(ptr nocapture %a, i32 signext %x) {
949 ; CHECK-LABEL: sink_splat_ashr:
950 ; CHECK: # %bb.0: # %entry
951 ; CHECK-NEXT: li a2, 1024
952 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
953 ; CHECK-NEXT: .LBB16_1: # %vector.body
954 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
955 ; CHECK-NEXT: vle32.v v8, (a0)
956 ; CHECK-NEXT: vsra.vx v8, v8, a1
957 ; CHECK-NEXT: vse32.v v8, (a0)
958 ; CHECK-NEXT: addi a2, a2, -4
959 ; CHECK-NEXT: addi a0, a0, 16
960 ; CHECK-NEXT: bnez a2, .LBB16_1
961 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
964 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
965 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
966 br label %vector.body
968 vector.body: ; preds = %vector.body, %entry
969 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
970 %0 = getelementptr inbounds i32, ptr %a, i64 %index
971 %wide.load = load <4 x i32>, ptr %0, align 4
972 %1 = ashr <4 x i32> %wide.load, %broadcast.splat
973 store <4 x i32> %1, ptr %0, align 4
974 %index.next = add nuw i64 %index, 4
975 %2 = icmp eq i64 %index.next, 1024
976 br i1 %2, label %for.cond.cleanup, label %vector.body
978 for.cond.cleanup: ; preds = %vector.body
982 define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
983 ; CHECK-LABEL: sink_splat_shl_scalable:
984 ; CHECK: # %bb.0: # %entry
985 ; CHECK-NEXT: csrr a5, vlenb
986 ; CHECK-NEXT: srli a3, a5, 1
987 ; CHECK-NEXT: li a2, 1024
988 ; CHECK-NEXT: bgeu a2, a3, .LBB17_2
989 ; CHECK-NEXT: # %bb.1:
990 ; CHECK-NEXT: li a2, 0
991 ; CHECK-NEXT: j .LBB17_5
992 ; CHECK-NEXT: .LBB17_2: # %vector.ph
993 ; CHECK-NEXT: addi a2, a3, -1
994 ; CHECK-NEXT: andi a4, a2, 1024
995 ; CHECK-NEXT: xori a2, a4, 1024
996 ; CHECK-NEXT: slli a5, a5, 1
997 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
998 ; CHECK-NEXT: mv a6, a0
999 ; CHECK-NEXT: mv a7, a2
1000 ; CHECK-NEXT: .LBB17_3: # %vector.body
1001 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1002 ; CHECK-NEXT: vl2re32.v v8, (a6)
1003 ; CHECK-NEXT: vsll.vx v8, v8, a1
1004 ; CHECK-NEXT: vs2r.v v8, (a6)
1005 ; CHECK-NEXT: sub a7, a7, a3
1006 ; CHECK-NEXT: add a6, a6, a5
1007 ; CHECK-NEXT: bnez a7, .LBB17_3
1008 ; CHECK-NEXT: # %bb.4: # %middle.block
1009 ; CHECK-NEXT: beqz a4, .LBB17_7
1010 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader
1011 ; CHECK-NEXT: addi a3, a2, -1024
1012 ; CHECK-NEXT: slli a2, a2, 2
1013 ; CHECK-NEXT: add a0, a0, a2
1014 ; CHECK-NEXT: .LBB17_6: # %for.body
1015 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1016 ; CHECK-NEXT: lw a2, 0(a0)
1017 ; CHECK-NEXT: sllw a2, a2, a1
1018 ; CHECK-NEXT: sw a2, 0(a0)
1019 ; CHECK-NEXT: addi a3, a3, 1
1020 ; CHECK-NEXT: addi a0, a0, 4
1021 ; CHECK-NEXT: bnez a3, .LBB17_6
1022 ; CHECK-NEXT: .LBB17_7: # %for.cond.cleanup
1025 %0 = call i64 @llvm.vscale.i64()
1027 %min.iters.check = icmp ugt i64 %1, 1024
1028 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1030 vector.ph: ; preds = %entry
1031 %2 = call i64 @llvm.vscale.i64()
1033 %n.mod.vf = urem i64 1024, %3
1034 %n.vec = sub nsw i64 1024, %n.mod.vf
1035 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
1036 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1037 %4 = call i64 @llvm.vscale.i64()
1039 br label %vector.body
1041 vector.body: ; preds = %vector.body, %vector.ph
1042 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1043 %6 = getelementptr inbounds i32, ptr %a, i64 %index
1044 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
1045 %7 = shl <vscale x 4 x i32> %wide.load, %broadcast.splat
1046 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
1047 %index.next = add nuw i64 %index, %5
1048 %8 = icmp eq i64 %index.next, %n.vec
1049 br i1 %8, label %middle.block, label %vector.body
1051 middle.block: ; preds = %vector.body
1052 %cmp.n = icmp eq i64 %n.mod.vf, 0
1053 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1055 for.body.preheader: ; preds = %entry, %middle.block
1056 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1059 for.cond.cleanup: ; preds = %for.body, %middle.block
1062 for.body: ; preds = %for.body.preheader, %for.body
1063 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1064 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1065 %9 = load i32, ptr %arrayidx, align 4
1066 %shl = shl i32 %9, %x
1067 store i32 %shl, ptr %arrayidx, align 4
1068 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1069 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1070 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1073 define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
1074 ; CHECK-LABEL: sink_splat_lshr_scalable:
1075 ; CHECK: # %bb.0: # %entry
1076 ; CHECK-NEXT: csrr a5, vlenb
1077 ; CHECK-NEXT: srli a3, a5, 1
1078 ; CHECK-NEXT: li a2, 1024
1079 ; CHECK-NEXT: bgeu a2, a3, .LBB18_2
1080 ; CHECK-NEXT: # %bb.1:
1081 ; CHECK-NEXT: li a2, 0
1082 ; CHECK-NEXT: j .LBB18_5
1083 ; CHECK-NEXT: .LBB18_2: # %vector.ph
1084 ; CHECK-NEXT: addi a2, a3, -1
1085 ; CHECK-NEXT: andi a4, a2, 1024
1086 ; CHECK-NEXT: xori a2, a4, 1024
1087 ; CHECK-NEXT: slli a5, a5, 1
1088 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
1089 ; CHECK-NEXT: mv a6, a0
1090 ; CHECK-NEXT: mv a7, a2
1091 ; CHECK-NEXT: .LBB18_3: # %vector.body
1092 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1093 ; CHECK-NEXT: vl2re32.v v8, (a6)
1094 ; CHECK-NEXT: vsrl.vx v8, v8, a1
1095 ; CHECK-NEXT: vs2r.v v8, (a6)
1096 ; CHECK-NEXT: sub a7, a7, a3
1097 ; CHECK-NEXT: add a6, a6, a5
1098 ; CHECK-NEXT: bnez a7, .LBB18_3
1099 ; CHECK-NEXT: # %bb.4: # %middle.block
1100 ; CHECK-NEXT: beqz a4, .LBB18_7
1101 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader
1102 ; CHECK-NEXT: addi a3, a2, -1024
1103 ; CHECK-NEXT: slli a2, a2, 2
1104 ; CHECK-NEXT: add a0, a0, a2
1105 ; CHECK-NEXT: .LBB18_6: # %for.body
1106 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1107 ; CHECK-NEXT: lw a2, 0(a0)
1108 ; CHECK-NEXT: srlw a2, a2, a1
1109 ; CHECK-NEXT: sw a2, 0(a0)
1110 ; CHECK-NEXT: addi a3, a3, 1
1111 ; CHECK-NEXT: addi a0, a0, 4
1112 ; CHECK-NEXT: bnez a3, .LBB18_6
1113 ; CHECK-NEXT: .LBB18_7: # %for.cond.cleanup
1116 %0 = call i64 @llvm.vscale.i64()
1118 %min.iters.check = icmp ugt i64 %1, 1024
1119 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1121 vector.ph: ; preds = %entry
1122 %2 = call i64 @llvm.vscale.i64()
1124 %n.mod.vf = urem i64 1024, %3
1125 %n.vec = sub nsw i64 1024, %n.mod.vf
1126 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
1127 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1128 %4 = call i64 @llvm.vscale.i64()
1130 br label %vector.body
1132 vector.body: ; preds = %vector.body, %vector.ph
1133 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1134 %6 = getelementptr inbounds i32, ptr %a, i64 %index
1135 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
1136 %7 = lshr <vscale x 4 x i32> %wide.load, %broadcast.splat
1137 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
1138 %index.next = add nuw i64 %index, %5
1139 %8 = icmp eq i64 %index.next, %n.vec
1140 br i1 %8, label %middle.block, label %vector.body
1142 middle.block: ; preds = %vector.body
1143 %cmp.n = icmp eq i64 %n.mod.vf, 0
1144 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1146 for.body.preheader: ; preds = %entry, %middle.block
1147 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1150 for.cond.cleanup: ; preds = %for.body, %middle.block
1153 for.body: ; preds = %for.body.preheader, %for.body
1154 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1155 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1156 %9 = load i32, ptr %arrayidx, align 4
1157 %lshr = lshr i32 %9, %x
1158 store i32 %lshr, ptr %arrayidx, align 4
1159 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1160 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1161 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1164 define void @sink_splat_ashr_scalable(ptr nocapture %a) {
1165 ; CHECK-LABEL: sink_splat_ashr_scalable:
1166 ; CHECK: # %bb.0: # %entry
1167 ; CHECK-NEXT: csrr a4, vlenb
1168 ; CHECK-NEXT: srli a2, a4, 1
1169 ; CHECK-NEXT: li a1, 1024
1170 ; CHECK-NEXT: bgeu a1, a2, .LBB19_2
1171 ; CHECK-NEXT: # %bb.1:
1172 ; CHECK-NEXT: li a1, 0
1173 ; CHECK-NEXT: j .LBB19_5
1174 ; CHECK-NEXT: .LBB19_2: # %vector.ph
1175 ; CHECK-NEXT: addi a1, a2, -1
1176 ; CHECK-NEXT: andi a3, a1, 1024
1177 ; CHECK-NEXT: xori a1, a3, 1024
1178 ; CHECK-NEXT: slli a4, a4, 1
1179 ; CHECK-NEXT: vsetvli a5, zero, e32, m2, ta, ma
1180 ; CHECK-NEXT: mv a5, a0
1181 ; CHECK-NEXT: mv a6, a1
1182 ; CHECK-NEXT: .LBB19_3: # %vector.body
1183 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1184 ; CHECK-NEXT: vl2re32.v v8, (a5)
1185 ; CHECK-NEXT: vsra.vi v8, v8, 2
1186 ; CHECK-NEXT: vs2r.v v8, (a5)
1187 ; CHECK-NEXT: sub a6, a6, a2
1188 ; CHECK-NEXT: add a5, a5, a4
1189 ; CHECK-NEXT: bnez a6, .LBB19_3
1190 ; CHECK-NEXT: # %bb.4: # %middle.block
1191 ; CHECK-NEXT: beqz a3, .LBB19_7
1192 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader
1193 ; CHECK-NEXT: addi a2, a1, -1024
1194 ; CHECK-NEXT: slli a1, a1, 2
1195 ; CHECK-NEXT: add a0, a0, a1
1196 ; CHECK-NEXT: .LBB19_6: # %for.body
1197 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1198 ; CHECK-NEXT: lw a1, 0(a0)
1199 ; CHECK-NEXT: srli a1, a1, 2
1200 ; CHECK-NEXT: sw a1, 0(a0)
1201 ; CHECK-NEXT: addi a2, a2, 1
1202 ; CHECK-NEXT: addi a0, a0, 4
1203 ; CHECK-NEXT: bnez a2, .LBB19_6
1204 ; CHECK-NEXT: .LBB19_7: # %for.cond.cleanup
1207 %0 = call i64 @llvm.vscale.i64()
1209 %min.iters.check = icmp ugt i64 %1, 1024
1210 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1212 vector.ph: ; preds = %entry
1213 %2 = call i64 @llvm.vscale.i64()
1215 %n.mod.vf = urem i64 1024, %3
1216 %n.vec = sub nsw i64 1024, %n.mod.vf
1217 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 2, i32 0
1218 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1219 %4 = call i64 @llvm.vscale.i64()
1221 br label %vector.body
1223 vector.body: ; preds = %vector.body, %vector.ph
1224 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1225 %6 = getelementptr inbounds i32, ptr %a, i64 %index
1226 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
1227 %7 = ashr <vscale x 4 x i32> %wide.load, %broadcast.splat
1228 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
1229 %index.next = add nuw i64 %index, %5
1230 %8 = icmp eq i64 %index.next, %n.vec
1231 br i1 %8, label %middle.block, label %vector.body
1233 middle.block: ; preds = %vector.body
1234 %cmp.n = icmp eq i64 %n.mod.vf, 0
1235 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1237 for.body.preheader: ; preds = %entry, %middle.block
1238 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1241 for.cond.cleanup: ; preds = %for.body, %middle.block
1244 for.body: ; preds = %for.body.preheader, %for.body
1245 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1246 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1247 %9 = load i32, ptr %arrayidx, align 4
1248 %ashr = ashr i32 %9, 2
1249 store i32 %ashr, ptr %arrayidx, align 4
1250 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1251 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1252 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1255 define void @sink_splat_fmul(ptr nocapture %a, float %x) {
1256 ; CHECK-LABEL: sink_splat_fmul:
1257 ; CHECK: # %bb.0: # %entry
1258 ; CHECK-NEXT: li a1, 1024
1259 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1260 ; CHECK-NEXT: .LBB20_1: # %vector.body
1261 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1262 ; CHECK-NEXT: vle32.v v8, (a0)
1263 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
1264 ; CHECK-NEXT: vse32.v v8, (a0)
1265 ; CHECK-NEXT: addi a1, a1, -4
1266 ; CHECK-NEXT: addi a0, a0, 16
1267 ; CHECK-NEXT: bnez a1, .LBB20_1
1268 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1271 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1272 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1273 br label %vector.body
1275 vector.body: ; preds = %vector.body, %entry
1276 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1277 %0 = getelementptr inbounds float, ptr %a, i64 %index
1278 %wide.load = load <4 x float>, ptr %0, align 4
1279 %1 = fmul <4 x float> %wide.load, %broadcast.splat
1280 store <4 x float> %1, ptr %0, align 4
1281 %index.next = add nuw i64 %index, 4
1282 %2 = icmp eq i64 %index.next, 1024
1283 br i1 %2, label %for.cond.cleanup, label %vector.body
1285 for.cond.cleanup: ; preds = %vector.body
1289 define void @sink_splat_fdiv(ptr nocapture %a, float %x) {
1290 ; CHECK-LABEL: sink_splat_fdiv:
1291 ; CHECK: # %bb.0: # %entry
1292 ; CHECK-NEXT: li a1, 1024
1293 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1294 ; CHECK-NEXT: .LBB21_1: # %vector.body
1295 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1296 ; CHECK-NEXT: vle32.v v8, (a0)
1297 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
1298 ; CHECK-NEXT: vse32.v v8, (a0)
1299 ; CHECK-NEXT: addi a1, a1, -4
1300 ; CHECK-NEXT: addi a0, a0, 16
1301 ; CHECK-NEXT: bnez a1, .LBB21_1
1302 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1305 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1306 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1307 br label %vector.body
1309 vector.body: ; preds = %vector.body, %entry
1310 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1311 %0 = getelementptr inbounds float, ptr %a, i64 %index
1312 %wide.load = load <4 x float>, ptr %0, align 4
1313 %1 = fdiv <4 x float> %wide.load, %broadcast.splat
1314 store <4 x float> %1, ptr %0, align 4
1315 %index.next = add nuw i64 %index, 4
1316 %2 = icmp eq i64 %index.next, 1024
1317 br i1 %2, label %for.cond.cleanup, label %vector.body
1319 for.cond.cleanup: ; preds = %vector.body
1323 define void @sink_splat_frdiv(ptr nocapture %a, float %x) {
1324 ; CHECK-LABEL: sink_splat_frdiv:
1325 ; CHECK: # %bb.0: # %entry
1326 ; CHECK-NEXT: li a1, 1024
1327 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1328 ; CHECK-NEXT: .LBB22_1: # %vector.body
1329 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1330 ; CHECK-NEXT: vle32.v v8, (a0)
1331 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
1332 ; CHECK-NEXT: vse32.v v8, (a0)
1333 ; CHECK-NEXT: addi a1, a1, -4
1334 ; CHECK-NEXT: addi a0, a0, 16
1335 ; CHECK-NEXT: bnez a1, .LBB22_1
1336 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1339 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1340 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1341 br label %vector.body
1343 vector.body: ; preds = %vector.body, %entry
1344 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1345 %0 = getelementptr inbounds float, ptr %a, i64 %index
1346 %wide.load = load <4 x float>, ptr %0, align 4
1347 %1 = fdiv <4 x float> %broadcast.splat, %wide.load
1348 store <4 x float> %1, ptr %0, align 4
1349 %index.next = add nuw i64 %index, 4
1350 %2 = icmp eq i64 %index.next, 1024
1351 br i1 %2, label %for.cond.cleanup, label %vector.body
1353 for.cond.cleanup: ; preds = %vector.body
1357 define void @sink_splat_fadd(ptr nocapture %a, float %x) {
1358 ; CHECK-LABEL: sink_splat_fadd:
1359 ; CHECK: # %bb.0: # %entry
1360 ; CHECK-NEXT: li a1, 1024
1361 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1362 ; CHECK-NEXT: .LBB23_1: # %vector.body
1363 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1364 ; CHECK-NEXT: vle32.v v8, (a0)
1365 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
1366 ; CHECK-NEXT: vse32.v v8, (a0)
1367 ; CHECK-NEXT: addi a1, a1, -4
1368 ; CHECK-NEXT: addi a0, a0, 16
1369 ; CHECK-NEXT: bnez a1, .LBB23_1
1370 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1373 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1374 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1375 br label %vector.body
1377 vector.body: ; preds = %vector.body, %entry
1378 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1379 %0 = getelementptr inbounds float, ptr %a, i64 %index
1380 %wide.load = load <4 x float>, ptr %0, align 4
1381 %1 = fadd <4 x float> %wide.load, %broadcast.splat
1382 store <4 x float> %1, ptr %0, align 4
1383 %index.next = add nuw i64 %index, 4
1384 %2 = icmp eq i64 %index.next, 1024
1385 br i1 %2, label %for.cond.cleanup, label %vector.body
1387 for.cond.cleanup: ; preds = %vector.body
1391 define void @sink_splat_fsub(ptr nocapture %a, float %x) {
1392 ; CHECK-LABEL: sink_splat_fsub:
1393 ; CHECK: # %bb.0: # %entry
1394 ; CHECK-NEXT: li a1, 1024
1395 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1396 ; CHECK-NEXT: .LBB24_1: # %vector.body
1397 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1398 ; CHECK-NEXT: vle32.v v8, (a0)
1399 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1400 ; CHECK-NEXT: vse32.v v8, (a0)
1401 ; CHECK-NEXT: addi a1, a1, -4
1402 ; CHECK-NEXT: addi a0, a0, 16
1403 ; CHECK-NEXT: bnez a1, .LBB24_1
1404 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1407 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1408 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1409 br label %vector.body
1411 vector.body: ; preds = %vector.body, %entry
1412 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1413 %0 = getelementptr inbounds float, ptr %a, i64 %index
1414 %wide.load = load <4 x float>, ptr %0, align 4
1415 %1 = fsub <4 x float> %wide.load, %broadcast.splat
1416 store <4 x float> %1, ptr %0, align 4
1417 %index.next = add nuw i64 %index, 4
1418 %2 = icmp eq i64 %index.next, 1024
1419 br i1 %2, label %for.cond.cleanup, label %vector.body
1421 for.cond.cleanup: ; preds = %vector.body
1425 define void @sink_splat_frsub(ptr nocapture %a, float %x) {
1426 ; CHECK-LABEL: sink_splat_frsub:
1427 ; CHECK: # %bb.0: # %entry
1428 ; CHECK-NEXT: li a1, 1024
1429 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1430 ; CHECK-NEXT: .LBB25_1: # %vector.body
1431 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1432 ; CHECK-NEXT: vle32.v v8, (a0)
1433 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
1434 ; CHECK-NEXT: vse32.v v8, (a0)
1435 ; CHECK-NEXT: addi a1, a1, -4
1436 ; CHECK-NEXT: addi a0, a0, 16
1437 ; CHECK-NEXT: bnez a1, .LBB25_1
1438 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1441 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1442 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1443 br label %vector.body
1445 vector.body: ; preds = %vector.body, %entry
1446 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1447 %0 = getelementptr inbounds float, ptr %a, i64 %index
1448 %wide.load = load <4 x float>, ptr %0, align 4
1449 %1 = fsub <4 x float> %broadcast.splat, %wide.load
1450 store <4 x float> %1, ptr %0, align 4
1451 %index.next = add nuw i64 %index, 4
1452 %2 = icmp eq i64 %index.next, 1024
1453 br i1 %2, label %for.cond.cleanup, label %vector.body
1455 for.cond.cleanup: ; preds = %vector.body
1459 define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
1460 ; CHECK-LABEL: sink_splat_fmul_scalable:
1461 ; CHECK: # %bb.0: # %entry
1462 ; CHECK-NEXT: csrr a1, vlenb
1463 ; CHECK-NEXT: srli a3, a1, 2
1464 ; CHECK-NEXT: li a2, 1024
1465 ; CHECK-NEXT: bgeu a2, a3, .LBB26_2
1466 ; CHECK-NEXT: # %bb.1:
1467 ; CHECK-NEXT: li a2, 0
1468 ; CHECK-NEXT: j .LBB26_5
1469 ; CHECK-NEXT: .LBB26_2: # %vector.ph
1470 ; CHECK-NEXT: addi a2, a3, -1
1471 ; CHECK-NEXT: andi a4, a2, 1024
1472 ; CHECK-NEXT: xori a2, a4, 1024
1473 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
1474 ; CHECK-NEXT: mv a5, a0
1475 ; CHECK-NEXT: mv a6, a2
1476 ; CHECK-NEXT: .LBB26_3: # %vector.body
1477 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1478 ; CHECK-NEXT: vl1re32.v v8, (a5)
1479 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
1480 ; CHECK-NEXT: vs1r.v v8, (a5)
1481 ; CHECK-NEXT: sub a6, a6, a3
1482 ; CHECK-NEXT: add a5, a5, a1
1483 ; CHECK-NEXT: bnez a6, .LBB26_3
1484 ; CHECK-NEXT: # %bb.4: # %middle.block
1485 ; CHECK-NEXT: beqz a4, .LBB26_7
1486 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader
1487 ; CHECK-NEXT: addi a1, a2, -1024
1488 ; CHECK-NEXT: slli a2, a2, 2
1489 ; CHECK-NEXT: add a0, a0, a2
1490 ; CHECK-NEXT: .LBB26_6: # %for.body
1491 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1492 ; CHECK-NEXT: flw fa5, 0(a0)
1493 ; CHECK-NEXT: fmul.s fa5, fa5, fa0
1494 ; CHECK-NEXT: fsw fa5, 0(a0)
1495 ; CHECK-NEXT: addi a1, a1, 1
1496 ; CHECK-NEXT: addi a0, a0, 4
1497 ; CHECK-NEXT: bnez a1, .LBB26_6
1498 ; CHECK-NEXT: .LBB26_7: # %for.cond.cleanup
1501 %0 = call i64 @llvm.vscale.i64()
1503 %min.iters.check = icmp ugt i64 %1, 1024
1504 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1506 vector.ph: ; preds = %entry
1507 %2 = call i64 @llvm.vscale.i64()
1509 %n.mod.vf = urem i64 1024, %3
1510 %n.vec = sub nsw i64 1024, %n.mod.vf
1511 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1512 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1513 %4 = call i64 @llvm.vscale.i64()
1515 br label %vector.body
1517 vector.body: ; preds = %vector.body, %vector.ph
1518 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1519 %6 = getelementptr inbounds float, ptr %a, i64 %index
1520 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
1521 %7 = fmul <vscale x 2 x float> %wide.load, %broadcast.splat
1522 store <vscale x 2 x float> %7, <vscale x 2 x float>* %6, align 4
1523 %index.next = add nuw i64 %index, %5
1524 %8 = icmp eq i64 %index.next, %n.vec
1525 br i1 %8, label %middle.block, label %vector.body
1527 middle.block: ; preds = %vector.body
1528 %cmp.n = icmp eq i64 %n.mod.vf, 0
1529 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1531 for.body.preheader: ; preds = %entry, %middle.block
1532 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1535 for.cond.cleanup: ; preds = %for.body, %middle.block
1538 for.body: ; preds = %for.body.preheader, %for.body
1539 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1540 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1541 %9 = load float, ptr %arrayidx, align 4
1542 %mul = fmul float %9, %x
1543 store float %mul, ptr %arrayidx, align 4
1544 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1545 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1546 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1549 define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
1550 ; CHECK-LABEL: sink_splat_fdiv_scalable:
1551 ; CHECK: # %bb.0: # %entry
1552 ; CHECK-NEXT: csrr a1, vlenb
1553 ; CHECK-NEXT: srli a3, a1, 2
1554 ; CHECK-NEXT: li a2, 1024
1555 ; CHECK-NEXT: bgeu a2, a3, .LBB27_2
1556 ; CHECK-NEXT: # %bb.1:
1557 ; CHECK-NEXT: li a2, 0
1558 ; CHECK-NEXT: j .LBB27_5
1559 ; CHECK-NEXT: .LBB27_2: # %vector.ph
1560 ; CHECK-NEXT: addi a2, a3, -1
1561 ; CHECK-NEXT: andi a4, a2, 1024
1562 ; CHECK-NEXT: xori a2, a4, 1024
1563 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
1564 ; CHECK-NEXT: mv a5, a0
1565 ; CHECK-NEXT: mv a6, a2
1566 ; CHECK-NEXT: .LBB27_3: # %vector.body
1567 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1568 ; CHECK-NEXT: vl1re32.v v8, (a5)
1569 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
1570 ; CHECK-NEXT: vs1r.v v8, (a5)
1571 ; CHECK-NEXT: sub a6, a6, a3
1572 ; CHECK-NEXT: add a5, a5, a1
1573 ; CHECK-NEXT: bnez a6, .LBB27_3
1574 ; CHECK-NEXT: # %bb.4: # %middle.block
1575 ; CHECK-NEXT: beqz a4, .LBB27_7
1576 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader
1577 ; CHECK-NEXT: addi a1, a2, -1024
1578 ; CHECK-NEXT: slli a2, a2, 2
1579 ; CHECK-NEXT: add a0, a0, a2
1580 ; CHECK-NEXT: .LBB27_6: # %for.body
1581 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1582 ; CHECK-NEXT: flw fa5, 0(a0)
1583 ; CHECK-NEXT: fdiv.s fa5, fa5, fa0
1584 ; CHECK-NEXT: fsw fa5, 0(a0)
1585 ; CHECK-NEXT: addi a1, a1, 1
1586 ; CHECK-NEXT: addi a0, a0, 4
1587 ; CHECK-NEXT: bnez a1, .LBB27_6
1588 ; CHECK-NEXT: .LBB27_7: # %for.cond.cleanup
1591 %0 = call i64 @llvm.vscale.i64()
1593 %min.iters.check = icmp ugt i64 %1, 1024
1594 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1596 vector.ph: ; preds = %entry
1597 %2 = call i64 @llvm.vscale.i64()
1599 %n.mod.vf = urem i64 1024, %3
1600 %n.vec = sub nsw i64 1024, %n.mod.vf
1601 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1602 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1603 %4 = call i64 @llvm.vscale.i64()
1605 br label %vector.body
1607 vector.body: ; preds = %vector.body, %vector.ph
1608 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1609 %6 = getelementptr inbounds float, ptr %a, i64 %index
1610 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
1611 %7 = fdiv <vscale x 2 x float> %wide.load, %broadcast.splat
1612 store <vscale x 2 x float> %7, <vscale x 2 x float>* %6, align 4
1613 %index.next = add nuw i64 %index, %5
1614 %8 = icmp eq i64 %index.next, %n.vec
1615 br i1 %8, label %middle.block, label %vector.body
1617 middle.block: ; preds = %vector.body
1618 %cmp.n = icmp eq i64 %n.mod.vf, 0
1619 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1621 for.body.preheader: ; preds = %entry, %middle.block
1622 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1625 for.cond.cleanup: ; preds = %for.body, %middle.block
1628 for.body: ; preds = %for.body.preheader, %for.body
1629 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1630 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1631 %9 = load float, ptr %arrayidx, align 4
1632 %mul = fdiv float %9, %x
1633 store float %mul, ptr %arrayidx, align 4
1634 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1635 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1636 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1639 define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
1640 ; CHECK-LABEL: sink_splat_frdiv_scalable:
1641 ; CHECK: # %bb.0: # %entry
1642 ; CHECK-NEXT: csrr a1, vlenb
1643 ; CHECK-NEXT: srli a3, a1, 2
1644 ; CHECK-NEXT: li a2, 1024
1645 ; CHECK-NEXT: bgeu a2, a3, .LBB28_2
1646 ; CHECK-NEXT: # %bb.1:
1647 ; CHECK-NEXT: li a2, 0
1648 ; CHECK-NEXT: j .LBB28_5
1649 ; CHECK-NEXT: .LBB28_2: # %vector.ph
1650 ; CHECK-NEXT: addi a2, a3, -1
1651 ; CHECK-NEXT: andi a4, a2, 1024
1652 ; CHECK-NEXT: xori a2, a4, 1024
1653 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
1654 ; CHECK-NEXT: mv a5, a0
1655 ; CHECK-NEXT: mv a6, a2
1656 ; CHECK-NEXT: .LBB28_3: # %vector.body
1657 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1658 ; CHECK-NEXT: vl1re32.v v8, (a5)
1659 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
1660 ; CHECK-NEXT: vs1r.v v8, (a5)
1661 ; CHECK-NEXT: sub a6, a6, a3
1662 ; CHECK-NEXT: add a5, a5, a1
1663 ; CHECK-NEXT: bnez a6, .LBB28_3
1664 ; CHECK-NEXT: # %bb.4: # %middle.block
1665 ; CHECK-NEXT: beqz a4, .LBB28_7
1666 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader
1667 ; CHECK-NEXT: addi a1, a2, -1024
1668 ; CHECK-NEXT: slli a2, a2, 2
1669 ; CHECK-NEXT: add a0, a0, a2
1670 ; CHECK-NEXT: .LBB28_6: # %for.body
1671 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1672 ; CHECK-NEXT: flw fa5, 0(a0)
1673 ; CHECK-NEXT: fdiv.s fa5, fa0, fa5
1674 ; CHECK-NEXT: fsw fa5, 0(a0)
1675 ; CHECK-NEXT: addi a1, a1, 1
1676 ; CHECK-NEXT: addi a0, a0, 4
1677 ; CHECK-NEXT: bnez a1, .LBB28_6
1678 ; CHECK-NEXT: .LBB28_7: # %for.cond.cleanup
1681 %0 = call i64 @llvm.vscale.i64()
1683 %min.iters.check = icmp ugt i64 %1, 1024
1684 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1686 vector.ph: ; preds = %entry
1687 %2 = call i64 @llvm.vscale.i64()
1689 %n.mod.vf = urem i64 1024, %3
1690 %n.vec = sub nsw i64 1024, %n.mod.vf
1691 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1692 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1693 %4 = call i64 @llvm.vscale.i64()
1695 br label %vector.body
1697 vector.body: ; preds = %vector.body, %vector.ph
1698 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1699 %6 = getelementptr inbounds float, ptr %a, i64 %index
1700 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
1701 %7 = fdiv <vscale x 2 x float> %broadcast.splat, %wide.load
1702 store <vscale x 2 x float> %7, <vscale x 2 x float>* %6, align 4
1703 %index.next = add nuw i64 %index, %5
1704 %8 = icmp eq i64 %index.next, %n.vec
1705 br i1 %8, label %middle.block, label %vector.body
1707 middle.block: ; preds = %vector.body
1708 %cmp.n = icmp eq i64 %n.mod.vf, 0
1709 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1711 for.body.preheader: ; preds = %entry, %middle.block
1712 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1715 for.cond.cleanup: ; preds = %for.body, %middle.block
1718 for.body: ; preds = %for.body.preheader, %for.body
1719 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1720 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1721 %9 = load float, ptr %arrayidx, align 4
1722 %mul = fdiv float %x, %9
1723 store float %mul, ptr %arrayidx, align 4
1724 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1725 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1726 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1729 define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
1730 ; CHECK-LABEL: sink_splat_fadd_scalable:
1731 ; CHECK: # %bb.0: # %entry
1732 ; CHECK-NEXT: csrr a1, vlenb
1733 ; CHECK-NEXT: srli a3, a1, 2
1734 ; CHECK-NEXT: li a2, 1024
1735 ; CHECK-NEXT: bgeu a2, a3, .LBB29_2
1736 ; CHECK-NEXT: # %bb.1:
1737 ; CHECK-NEXT: li a2, 0
1738 ; CHECK-NEXT: j .LBB29_5
1739 ; CHECK-NEXT: .LBB29_2: # %vector.ph
1740 ; CHECK-NEXT: addi a2, a3, -1
1741 ; CHECK-NEXT: andi a4, a2, 1024
1742 ; CHECK-NEXT: xori a2, a4, 1024
1743 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
1744 ; CHECK-NEXT: mv a5, a0
1745 ; CHECK-NEXT: mv a6, a2
1746 ; CHECK-NEXT: .LBB29_3: # %vector.body
1747 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1748 ; CHECK-NEXT: vl1re32.v v8, (a5)
1749 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
1750 ; CHECK-NEXT: vs1r.v v8, (a5)
1751 ; CHECK-NEXT: sub a6, a6, a3
1752 ; CHECK-NEXT: add a5, a5, a1
1753 ; CHECK-NEXT: bnez a6, .LBB29_3
1754 ; CHECK-NEXT: # %bb.4: # %middle.block
1755 ; CHECK-NEXT: beqz a4, .LBB29_7
1756 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader
1757 ; CHECK-NEXT: addi a1, a2, -1024
1758 ; CHECK-NEXT: slli a2, a2, 2
1759 ; CHECK-NEXT: add a0, a0, a2
1760 ; CHECK-NEXT: .LBB29_6: # %for.body
1761 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1762 ; CHECK-NEXT: flw fa5, 0(a0)
1763 ; CHECK-NEXT: fadd.s fa5, fa5, fa0
1764 ; CHECK-NEXT: fsw fa5, 0(a0)
1765 ; CHECK-NEXT: addi a1, a1, 1
1766 ; CHECK-NEXT: addi a0, a0, 4
1767 ; CHECK-NEXT: bnez a1, .LBB29_6
1768 ; CHECK-NEXT: .LBB29_7: # %for.cond.cleanup
1771 %0 = call i64 @llvm.vscale.i64()
1773 %min.iters.check = icmp ugt i64 %1, 1024
1774 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1776 vector.ph: ; preds = %entry
1777 %2 = call i64 @llvm.vscale.i64()
1779 %n.mod.vf = urem i64 1024, %3
1780 %n.vec = sub nsw i64 1024, %n.mod.vf
1781 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1782 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1783 %4 = call i64 @llvm.vscale.i64()
1785 br label %vector.body
1787 vector.body: ; preds = %vector.body, %vector.ph
1788 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1789 %6 = getelementptr inbounds float, ptr %a, i64 %index
1790 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
1791 %7 = fadd <vscale x 2 x float> %wide.load, %broadcast.splat
1792 store <vscale x 2 x float> %7, <vscale x 2 x float>* %6, align 4
1793 %index.next = add nuw i64 %index, %5
1794 %8 = icmp eq i64 %index.next, %n.vec
1795 br i1 %8, label %middle.block, label %vector.body
1797 middle.block: ; preds = %vector.body
1798 %cmp.n = icmp eq i64 %n.mod.vf, 0
1799 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1801 for.body.preheader: ; preds = %entry, %middle.block
1802 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1805 for.cond.cleanup: ; preds = %for.body, %middle.block
1808 for.body: ; preds = %for.body.preheader, %for.body
1809 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1810 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1811 %9 = load float, ptr %arrayidx, align 4
1812 %mul = fadd float %9, %x
1813 store float %mul, ptr %arrayidx, align 4
1814 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1815 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1816 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1819 define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
1820 ; CHECK-LABEL: sink_splat_fsub_scalable:
1821 ; CHECK: # %bb.0: # %entry
1822 ; CHECK-NEXT: csrr a1, vlenb
1823 ; CHECK-NEXT: srli a3, a1, 2
1824 ; CHECK-NEXT: li a2, 1024
1825 ; CHECK-NEXT: bgeu a2, a3, .LBB30_2
1826 ; CHECK-NEXT: # %bb.1:
1827 ; CHECK-NEXT: li a2, 0
1828 ; CHECK-NEXT: j .LBB30_5
1829 ; CHECK-NEXT: .LBB30_2: # %vector.ph
1830 ; CHECK-NEXT: addi a2, a3, -1
1831 ; CHECK-NEXT: andi a4, a2, 1024
1832 ; CHECK-NEXT: xori a2, a4, 1024
1833 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
1834 ; CHECK-NEXT: mv a5, a0
1835 ; CHECK-NEXT: mv a6, a2
1836 ; CHECK-NEXT: .LBB30_3: # %vector.body
1837 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1838 ; CHECK-NEXT: vl1re32.v v8, (a5)
1839 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1840 ; CHECK-NEXT: vs1r.v v8, (a5)
1841 ; CHECK-NEXT: sub a6, a6, a3
1842 ; CHECK-NEXT: add a5, a5, a1
1843 ; CHECK-NEXT: bnez a6, .LBB30_3
1844 ; CHECK-NEXT: # %bb.4: # %middle.block
1845 ; CHECK-NEXT: beqz a4, .LBB30_7
1846 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader
1847 ; CHECK-NEXT: addi a1, a2, -1024
1848 ; CHECK-NEXT: slli a2, a2, 2
1849 ; CHECK-NEXT: add a0, a0, a2
1850 ; CHECK-NEXT: .LBB30_6: # %for.body
1851 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1852 ; CHECK-NEXT: flw fa5, 0(a0)
1853 ; CHECK-NEXT: fsub.s fa5, fa5, fa0
1854 ; CHECK-NEXT: fsw fa5, 0(a0)
1855 ; CHECK-NEXT: addi a1, a1, 1
1856 ; CHECK-NEXT: addi a0, a0, 4
1857 ; CHECK-NEXT: bnez a1, .LBB30_6
1858 ; CHECK-NEXT: .LBB30_7: # %for.cond.cleanup
1861 %0 = call i64 @llvm.vscale.i64()
1863 %min.iters.check = icmp ugt i64 %1, 1024
1864 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1866 vector.ph: ; preds = %entry
1867 %2 = call i64 @llvm.vscale.i64()
1869 %n.mod.vf = urem i64 1024, %3
1870 %n.vec = sub nsw i64 1024, %n.mod.vf
1871 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1872 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1873 %4 = call i64 @llvm.vscale.i64()
1875 br label %vector.body
1877 vector.body: ; preds = %vector.body, %vector.ph
1878 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1879 %6 = getelementptr inbounds float, ptr %a, i64 %index
1880 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
1881 %7 = fsub <vscale x 2 x float> %wide.load, %broadcast.splat
1882 store <vscale x 2 x float> %7, <vscale x 2 x float>* %6, align 4
1883 %index.next = add nuw i64 %index, %5
1884 %8 = icmp eq i64 %index.next, %n.vec
1885 br i1 %8, label %middle.block, label %vector.body
1887 middle.block: ; preds = %vector.body
1888 %cmp.n = icmp eq i64 %n.mod.vf, 0
1889 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1891 for.body.preheader: ; preds = %entry, %middle.block
1892 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1895 for.cond.cleanup: ; preds = %for.body, %middle.block
1898 for.body: ; preds = %for.body.preheader, %for.body
1899 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1900 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1901 %9 = load float, ptr %arrayidx, align 4
1902 %mul = fsub float %9, %x
1903 store float %mul, ptr %arrayidx, align 4
1904 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1905 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1906 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1909 define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
1910 ; CHECK-LABEL: sink_splat_frsub_scalable:
1911 ; CHECK: # %bb.0: # %entry
1912 ; CHECK-NEXT: csrr a1, vlenb
1913 ; CHECK-NEXT: srli a3, a1, 2
1914 ; CHECK-NEXT: li a2, 1024
1915 ; CHECK-NEXT: bgeu a2, a3, .LBB31_2
1916 ; CHECK-NEXT: # %bb.1:
1917 ; CHECK-NEXT: li a2, 0
1918 ; CHECK-NEXT: j .LBB31_5
1919 ; CHECK-NEXT: .LBB31_2: # %vector.ph
1920 ; CHECK-NEXT: addi a2, a3, -1
1921 ; CHECK-NEXT: andi a4, a2, 1024
1922 ; CHECK-NEXT: xori a2, a4, 1024
1923 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
1924 ; CHECK-NEXT: mv a5, a0
1925 ; CHECK-NEXT: mv a6, a2
1926 ; CHECK-NEXT: .LBB31_3: # %vector.body
1927 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1928 ; CHECK-NEXT: vl1re32.v v8, (a5)
1929 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
1930 ; CHECK-NEXT: vs1r.v v8, (a5)
1931 ; CHECK-NEXT: sub a6, a6, a3
1932 ; CHECK-NEXT: add a5, a5, a1
1933 ; CHECK-NEXT: bnez a6, .LBB31_3
1934 ; CHECK-NEXT: # %bb.4: # %middle.block
1935 ; CHECK-NEXT: beqz a4, .LBB31_7
1936 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader
1937 ; CHECK-NEXT: addi a1, a2, -1024
1938 ; CHECK-NEXT: slli a2, a2, 2
1939 ; CHECK-NEXT: add a0, a0, a2
1940 ; CHECK-NEXT: .LBB31_6: # %for.body
1941 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1942 ; CHECK-NEXT: flw fa5, 0(a0)
1943 ; CHECK-NEXT: fsub.s fa5, fa0, fa5
1944 ; CHECK-NEXT: fsw fa5, 0(a0)
1945 ; CHECK-NEXT: addi a1, a1, 1
1946 ; CHECK-NEXT: addi a0, a0, 4
1947 ; CHECK-NEXT: bnez a1, .LBB31_6
1948 ; CHECK-NEXT: .LBB31_7: # %for.cond.cleanup
1951 %0 = call i64 @llvm.vscale.i64()
1953 %min.iters.check = icmp ugt i64 %1, 1024
1954 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1956 vector.ph: ; preds = %entry
1957 %2 = call i64 @llvm.vscale.i64()
1959 %n.mod.vf = urem i64 1024, %3
1960 %n.vec = sub nsw i64 1024, %n.mod.vf
1961 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1962 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1963 %4 = call i64 @llvm.vscale.i64()
1965 br label %vector.body
1967 vector.body: ; preds = %vector.body, %vector.ph
1968 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1969 %6 = getelementptr inbounds float, ptr %a, i64 %index
1970 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
1971 %7 = fsub <vscale x 2 x float> %broadcast.splat, %wide.load
1972 store <vscale x 2 x float> %7, <vscale x 2 x float>* %6, align 4
1973 %index.next = add nuw i64 %index, %5
1974 %8 = icmp eq i64 %index.next, %n.vec
1975 br i1 %8, label %middle.block, label %vector.body
1977 middle.block: ; preds = %vector.body
1978 %cmp.n = icmp eq i64 %n.mod.vf, 0
1979 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1981 for.body.preheader: ; preds = %entry, %middle.block
1982 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1985 for.cond.cleanup: ; preds = %for.body, %middle.block
1988 for.body: ; preds = %for.body.preheader, %for.body
1989 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1990 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1991 %9 = load float, ptr %arrayidx, align 4
1992 %mul = fsub float %x, %9
1993 store float %mul, ptr %arrayidx, align 4
1994 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1995 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1996 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1999 define void @sink_splat_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
2000 ; CHECK-LABEL: sink_splat_fma:
2001 ; CHECK: # %bb.0: # %entry
2002 ; CHECK-NEXT: li a2, 1024
2003 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2004 ; CHECK-NEXT: .LBB32_1: # %vector.body
2005 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2006 ; CHECK-NEXT: vle32.v v8, (a0)
2007 ; CHECK-NEXT: vle32.v v9, (a1)
2008 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2009 ; CHECK-NEXT: vse32.v v9, (a0)
2010 ; CHECK-NEXT: addi a2, a2, -4
2011 ; CHECK-NEXT: addi a1, a1, 16
2012 ; CHECK-NEXT: addi a0, a0, 16
2013 ; CHECK-NEXT: bnez a2, .LBB32_1
2014 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2017 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
2018 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2019 br label %vector.body
2021 vector.body: ; preds = %vector.body, %entry
2022 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2023 %0 = getelementptr inbounds float, ptr %a, i64 %index
2024 %wide.load = load <4 x float>, ptr %0, align 4
2025 %1 = getelementptr inbounds float, ptr %b, i64 %index
2026 %wide.load12 = load <4 x float>, ptr %1, align 4
2027 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12)
2028 store <4 x float> %2, ptr %0, align 4
2029 %index.next = add nuw i64 %index, 4
2030 %3 = icmp eq i64 %index.next, 1024
2031 br i1 %3, label %for.cond.cleanup, label %vector.body
2033 for.cond.cleanup: ; preds = %vector.body
2037 define void @sink_splat_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
2038 ; CHECK-LABEL: sink_splat_fma_commute:
2039 ; CHECK: # %bb.0: # %entry
2040 ; CHECK-NEXT: li a2, 1024
2041 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2042 ; CHECK-NEXT: .LBB33_1: # %vector.body
2043 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2044 ; CHECK-NEXT: vle32.v v8, (a0)
2045 ; CHECK-NEXT: vle32.v v9, (a1)
2046 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2047 ; CHECK-NEXT: vse32.v v9, (a0)
2048 ; CHECK-NEXT: addi a2, a2, -4
2049 ; CHECK-NEXT: addi a1, a1, 16
2050 ; CHECK-NEXT: addi a0, a0, 16
2051 ; CHECK-NEXT: bnez a2, .LBB33_1
2052 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2055 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
2056 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2057 br label %vector.body
2059 vector.body: ; preds = %vector.body, %entry
2060 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2061 %0 = getelementptr inbounds float, ptr %a, i64 %index
2062 %wide.load = load <4 x float>, ptr %0, align 4
2063 %1 = getelementptr inbounds float, ptr %b, i64 %index
2064 %wide.load12 = load <4 x float>, ptr %1, align 4
2065 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
2066 store <4 x float> %2, ptr %0, align 4
2067 %index.next = add nuw i64 %index, 4
2068 %3 = icmp eq i64 %index.next, 1024
2069 br i1 %3, label %for.cond.cleanup, label %vector.body
2071 for.cond.cleanup: ; preds = %vector.body
2075 define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) {
2076 ; CHECK-LABEL: sink_splat_fma_scalable:
2077 ; CHECK: # %bb.0: # %entry
2078 ; CHECK-NEXT: csrr a2, vlenb
2079 ; CHECK-NEXT: srli a3, a2, 2
2080 ; CHECK-NEXT: li a4, 1024
2081 ; CHECK-NEXT: bgeu a4, a3, .LBB34_2
2082 ; CHECK-NEXT: # %bb.1:
2083 ; CHECK-NEXT: li a4, 0
2084 ; CHECK-NEXT: j .LBB34_5
2085 ; CHECK-NEXT: .LBB34_2: # %vector.ph
2086 ; CHECK-NEXT: addi a4, a3, -1
2087 ; CHECK-NEXT: andi a5, a4, 1024
2088 ; CHECK-NEXT: xori a4, a5, 1024
2089 ; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
2090 ; CHECK-NEXT: mv a6, a0
2091 ; CHECK-NEXT: mv a7, a1
2092 ; CHECK-NEXT: mv t0, a4
2093 ; CHECK-NEXT: .LBB34_3: # %vector.body
2094 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2095 ; CHECK-NEXT: vl1re32.v v8, (a6)
2096 ; CHECK-NEXT: vl1re32.v v9, (a7)
2097 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2098 ; CHECK-NEXT: vs1r.v v9, (a6)
2099 ; CHECK-NEXT: sub t0, t0, a3
2100 ; CHECK-NEXT: add a7, a7, a2
2101 ; CHECK-NEXT: add a6, a6, a2
2102 ; CHECK-NEXT: bnez t0, .LBB34_3
2103 ; CHECK-NEXT: # %bb.4: # %middle.block
2104 ; CHECK-NEXT: beqz a5, .LBB34_7
2105 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader
2106 ; CHECK-NEXT: addi a2, a4, -1024
2107 ; CHECK-NEXT: slli a4, a4, 2
2108 ; CHECK-NEXT: add a1, a1, a4
2109 ; CHECK-NEXT: add a0, a0, a4
2110 ; CHECK-NEXT: .LBB34_6: # %for.body
2111 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2112 ; CHECK-NEXT: flw fa5, 0(a0)
2113 ; CHECK-NEXT: flw fa4, 0(a1)
2114 ; CHECK-NEXT: fmadd.s fa5, fa5, fa0, fa4
2115 ; CHECK-NEXT: fsw fa5, 0(a0)
2116 ; CHECK-NEXT: addi a2, a2, 1
2117 ; CHECK-NEXT: addi a1, a1, 4
2118 ; CHECK-NEXT: addi a0, a0, 4
2119 ; CHECK-NEXT: bnez a2, .LBB34_6
2120 ; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup
2123 %0 = call i64 @llvm.vscale.i64()
2125 %min.iters.check = icmp ugt i64 %1, 1024
2126 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2128 vector.ph: ; preds = %entry
2129 %2 = call i64 @llvm.vscale.i64()
2131 %n.mod.vf = urem i64 1024, %3
2132 %n.vec = sub nsw i64 1024, %n.mod.vf
2133 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
2134 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
2135 %4 = call i64 @llvm.vscale.i64()
2137 br label %vector.body
2139 vector.body: ; preds = %vector.body, %vector.ph
2140 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2141 %6 = getelementptr inbounds float, ptr %a, i64 %index
2142 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
2143 %7 = getelementptr inbounds float, ptr %b, i64 %index
2144 %wide.load12 = load <vscale x 2 x float>, <vscale x 2 x float>* %7, align 4
2145 %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %wide.load, <vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load12)
2146 store <vscale x 2 x float> %8, <vscale x 2 x float>* %6, align 4
2147 %index.next = add nuw i64 %index, %5
2148 %9 = icmp eq i64 %index.next, %n.vec
2149 br i1 %9, label %middle.block, label %vector.body
2151 middle.block: ; preds = %vector.body
2152 %cmp.n = icmp eq i64 %n.mod.vf, 0
2153 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2155 for.body.preheader: ; preds = %entry, %middle.block
2156 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2159 for.cond.cleanup: ; preds = %for.body, %middle.block
2162 for.body: ; preds = %for.body.preheader, %for.body
2163 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2164 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
2165 %10 = load float, ptr %arrayidx, align 4
2166 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
2167 %11 = load float, ptr %arrayidx2, align 4
2168 %12 = tail call float @llvm.fma.f32(float %10, float %x, float %11)
2169 store float %12, ptr %arrayidx, align 4
2170 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2171 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2172 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2175 define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) {
2176 ; CHECK-LABEL: sink_splat_fma_commute_scalable:
2177 ; CHECK: # %bb.0: # %entry
2178 ; CHECK-NEXT: csrr a2, vlenb
2179 ; CHECK-NEXT: srli a3, a2, 2
2180 ; CHECK-NEXT: li a4, 1024
2181 ; CHECK-NEXT: bgeu a4, a3, .LBB35_2
2182 ; CHECK-NEXT: # %bb.1:
2183 ; CHECK-NEXT: li a4, 0
2184 ; CHECK-NEXT: j .LBB35_5
2185 ; CHECK-NEXT: .LBB35_2: # %vector.ph
2186 ; CHECK-NEXT: addi a4, a3, -1
2187 ; CHECK-NEXT: andi a5, a4, 1024
2188 ; CHECK-NEXT: xori a4, a5, 1024
2189 ; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
2190 ; CHECK-NEXT: mv a6, a0
2191 ; CHECK-NEXT: mv a7, a1
2192 ; CHECK-NEXT: mv t0, a4
2193 ; CHECK-NEXT: .LBB35_3: # %vector.body
2194 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2195 ; CHECK-NEXT: vl1re32.v v8, (a6)
2196 ; CHECK-NEXT: vl1re32.v v9, (a7)
2197 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
2198 ; CHECK-NEXT: vs1r.v v9, (a6)
2199 ; CHECK-NEXT: sub t0, t0, a3
2200 ; CHECK-NEXT: add a7, a7, a2
2201 ; CHECK-NEXT: add a6, a6, a2
2202 ; CHECK-NEXT: bnez t0, .LBB35_3
2203 ; CHECK-NEXT: # %bb.4: # %middle.block
2204 ; CHECK-NEXT: beqz a5, .LBB35_7
2205 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader
2206 ; CHECK-NEXT: addi a2, a4, -1024
2207 ; CHECK-NEXT: slli a4, a4, 2
2208 ; CHECK-NEXT: add a1, a1, a4
2209 ; CHECK-NEXT: add a0, a0, a4
2210 ; CHECK-NEXT: .LBB35_6: # %for.body
2211 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2212 ; CHECK-NEXT: flw fa5, 0(a0)
2213 ; CHECK-NEXT: flw fa4, 0(a1)
2214 ; CHECK-NEXT: fmadd.s fa5, fa0, fa5, fa4
2215 ; CHECK-NEXT: fsw fa5, 0(a0)
2216 ; CHECK-NEXT: addi a2, a2, 1
2217 ; CHECK-NEXT: addi a1, a1, 4
2218 ; CHECK-NEXT: addi a0, a0, 4
2219 ; CHECK-NEXT: bnez a2, .LBB35_6
2220 ; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup
2223 %0 = call i64 @llvm.vscale.i64()
2225 %min.iters.check = icmp ugt i64 %1, 1024
2226 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2228 vector.ph: ; preds = %entry
2229 %2 = call i64 @llvm.vscale.i64()
2231 %n.mod.vf = urem i64 1024, %3
2232 %n.vec = sub nsw i64 1024, %n.mod.vf
2233 %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
2234 %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
2235 %4 = call i64 @llvm.vscale.i64()
2237 br label %vector.body
2239 vector.body: ; preds = %vector.body, %vector.ph
2240 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2241 %6 = getelementptr inbounds float, ptr %a, i64 %index
2242 %wide.load = load <vscale x 2 x float>, <vscale x 2 x float>* %6, align 4
2243 %7 = getelementptr inbounds float, ptr %b, i64 %index
2244 %wide.load12 = load <vscale x 2 x float>, <vscale x 2 x float>* %7, align 4
2245 %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load, <vscale x 2 x float> %wide.load12)
2246 store <vscale x 2 x float> %8, <vscale x 2 x float>* %6, align 4
2247 %index.next = add nuw i64 %index, %5
2248 %9 = icmp eq i64 %index.next, %n.vec
2249 br i1 %9, label %middle.block, label %vector.body
2251 middle.block: ; preds = %vector.body
2252 %cmp.n = icmp eq i64 %n.mod.vf, 0
2253 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2255 for.body.preheader: ; preds = %entry, %middle.block
2256 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2259 for.cond.cleanup: ; preds = %for.body, %middle.block
2262 for.body: ; preds = %for.body.preheader, %for.body
2263 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2264 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
2265 %10 = load float, ptr %arrayidx, align 4
2266 %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
2267 %11 = load float, ptr %arrayidx2, align 4
2268 %12 = tail call float @llvm.fma.f32(float %x, float %10, float %11)
2269 store float %12, ptr %arrayidx, align 4
2270 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2271 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2272 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2275 declare i64 @llvm.vscale.i64()
2276 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2277 declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
2278 declare float @llvm.fma.f32(float, float, float)
2280 define void @sink_splat_icmp(ptr nocapture %x, i32 signext %y) {
2281 ; CHECK-LABEL: sink_splat_icmp:
2282 ; CHECK: # %bb.0: # %entry
2283 ; CHECK-NEXT: li a2, 1024
2284 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2285 ; CHECK-NEXT: vmv.v.i v8, 0
2286 ; CHECK-NEXT: .LBB36_1: # %vector.body
2287 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2288 ; CHECK-NEXT: vle32.v v9, (a0)
2289 ; CHECK-NEXT: vmseq.vx v0, v9, a1
2290 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
2291 ; CHECK-NEXT: addi a2, a2, -4
2292 ; CHECK-NEXT: addi a0, a0, 16
2293 ; CHECK-NEXT: bnez a2, .LBB36_1
2294 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2297 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0
2298 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2299 br label %vector.body
2301 vector.body: ; preds = %vector.body, %entry
2302 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2303 %0 = getelementptr inbounds i32, ptr %x, i64 %index
2304 %wide.load = load <4 x i32>, ptr %0, align 4
2305 %1 = icmp eq <4 x i32> %wide.load, %broadcast.splat
2306 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
2307 %index.next = add nuw i64 %index, 4
2308 %2 = icmp eq i64 %index.next, 1024
2309 br i1 %2, label %for.cond.cleanup, label %vector.body
2311 for.cond.cleanup: ; preds = %vector.body
2314 declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
2316 define void @sink_splat_fcmp(ptr nocapture %x, float %y) {
2317 ; CHECK-LABEL: sink_splat_fcmp:
2318 ; CHECK: # %bb.0: # %entry
2319 ; CHECK-NEXT: li a1, 1024
2320 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2321 ; CHECK-NEXT: vmv.v.i v8, 0
2322 ; CHECK-NEXT: .LBB37_1: # %vector.body
2323 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2324 ; CHECK-NEXT: vle32.v v9, (a0)
2325 ; CHECK-NEXT: vmfeq.vf v0, v9, fa0
2326 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
2327 ; CHECK-NEXT: addi a1, a1, -4
2328 ; CHECK-NEXT: addi a0, a0, 16
2329 ; CHECK-NEXT: bnez a1, .LBB37_1
2330 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2333 %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0
2334 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2335 br label %vector.body
2337 vector.body: ; preds = %vector.body, %entry
2338 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2339 %0 = getelementptr inbounds float, ptr %x, i64 %index
2340 %wide.load = load <4 x float>, ptr %0, align 4
2341 %1 = fcmp fast oeq <4 x float> %wide.load, %broadcast.splat
2342 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
2343 %index.next = add nuw i64 %index, 4
2344 %2 = icmp eq i64 %index.next, 1024
2345 br i1 %2, label %for.cond.cleanup, label %vector.body
2347 for.cond.cleanup: ; preds = %vector.body
2350 declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
2352 define void @sink_splat_udiv(ptr nocapture %a, i32 signext %x) {
2353 ; CHECK-LABEL: sink_splat_udiv:
2354 ; CHECK: # %bb.0: # %entry
2355 ; CHECK-NEXT: li a2, 1024
2356 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2357 ; CHECK-NEXT: .LBB38_1: # %vector.body
2358 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2359 ; CHECK-NEXT: vle32.v v8, (a0)
2360 ; CHECK-NEXT: vdivu.vx v8, v8, a1
2361 ; CHECK-NEXT: vse32.v v8, (a0)
2362 ; CHECK-NEXT: addi a2, a2, -4
2363 ; CHECK-NEXT: addi a0, a0, 16
2364 ; CHECK-NEXT: bnez a2, .LBB38_1
2365 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2368 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2369 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2370 br label %vector.body
2372 vector.body: ; preds = %vector.body, %entry
2373 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2374 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2375 %wide.load = load <4 x i32>, ptr %0, align 4
2376 %1 = udiv <4 x i32> %wide.load, %broadcast.splat
2377 store <4 x i32> %1, ptr %0, align 4
2378 %index.next = add nuw i64 %index, 4
2379 %2 = icmp eq i64 %index.next, 1024
2380 br i1 %2, label %for.cond.cleanup, label %vector.body
2382 for.cond.cleanup: ; preds = %vector.body
2386 define void @sink_splat_sdiv(ptr nocapture %a, i32 signext %x) {
2387 ; CHECK-LABEL: sink_splat_sdiv:
2388 ; CHECK: # %bb.0: # %entry
2389 ; CHECK-NEXT: li a2, 1024
2390 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2391 ; CHECK-NEXT: .LBB39_1: # %vector.body
2392 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2393 ; CHECK-NEXT: vle32.v v8, (a0)
2394 ; CHECK-NEXT: vdiv.vx v8, v8, a1
2395 ; CHECK-NEXT: vse32.v v8, (a0)
2396 ; CHECK-NEXT: addi a2, a2, -4
2397 ; CHECK-NEXT: addi a0, a0, 16
2398 ; CHECK-NEXT: bnez a2, .LBB39_1
2399 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2402 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2403 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2404 br label %vector.body
2406 vector.body: ; preds = %vector.body, %entry
2407 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2408 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2409 %wide.load = load <4 x i32>, ptr %0, align 4
2410 %1 = sdiv <4 x i32> %wide.load, %broadcast.splat
2411 store <4 x i32> %1, ptr %0, align 4
2412 %index.next = add nuw i64 %index, 4
2413 %2 = icmp eq i64 %index.next, 1024
2414 br i1 %2, label %for.cond.cleanup, label %vector.body
2416 for.cond.cleanup: ; preds = %vector.body
2420 define void @sink_splat_urem(ptr nocapture %a, i32 signext %x) {
2421 ; CHECK-LABEL: sink_splat_urem:
2422 ; CHECK: # %bb.0: # %entry
2423 ; CHECK-NEXT: li a2, 1024
2424 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2425 ; CHECK-NEXT: .LBB40_1: # %vector.body
2426 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2427 ; CHECK-NEXT: vle32.v v8, (a0)
2428 ; CHECK-NEXT: vremu.vx v8, v8, a1
2429 ; CHECK-NEXT: vse32.v v8, (a0)
2430 ; CHECK-NEXT: addi a2, a2, -4
2431 ; CHECK-NEXT: addi a0, a0, 16
2432 ; CHECK-NEXT: bnez a2, .LBB40_1
2433 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2436 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2437 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2438 br label %vector.body
2440 vector.body: ; preds = %vector.body, %entry
2441 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2442 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2443 %wide.load = load <4 x i32>, ptr %0, align 4
2444 %1 = urem <4 x i32> %wide.load, %broadcast.splat
2445 store <4 x i32> %1, ptr %0, align 4
2446 %index.next = add nuw i64 %index, 4
2447 %2 = icmp eq i64 %index.next, 1024
2448 br i1 %2, label %for.cond.cleanup, label %vector.body
2450 for.cond.cleanup: ; preds = %vector.body
2454 define void @sink_splat_srem(ptr nocapture %a, i32 signext %x) {
2455 ; CHECK-LABEL: sink_splat_srem:
2456 ; CHECK: # %bb.0: # %entry
2457 ; CHECK-NEXT: li a2, 1024
2458 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2459 ; CHECK-NEXT: .LBB41_1: # %vector.body
2460 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2461 ; CHECK-NEXT: vle32.v v8, (a0)
2462 ; CHECK-NEXT: vrem.vx v8, v8, a1
2463 ; CHECK-NEXT: vse32.v v8, (a0)
2464 ; CHECK-NEXT: addi a2, a2, -4
2465 ; CHECK-NEXT: addi a0, a0, 16
2466 ; CHECK-NEXT: bnez a2, .LBB41_1
2467 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2470 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2471 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2472 br label %vector.body
2474 vector.body: ; preds = %vector.body, %entry
2475 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2476 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2477 %wide.load = load <4 x i32>, ptr %0, align 4
2478 %1 = srem <4 x i32> %wide.load, %broadcast.splat
2479 store <4 x i32> %1, ptr %0, align 4
2480 %index.next = add nuw i64 %index, 4
2481 %2 = icmp eq i64 %index.next, 1024
2482 br i1 %2, label %for.cond.cleanup, label %vector.body
2484 for.cond.cleanup: ; preds = %vector.body
2488 define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
2489 ; CHECK-LABEL: sink_splat_udiv_scalable:
2490 ; CHECK: # %bb.0: # %entry
2491 ; CHECK-NEXT: csrr a5, vlenb
2492 ; CHECK-NEXT: srli a3, a5, 1
2493 ; CHECK-NEXT: li a2, 1024
2494 ; CHECK-NEXT: bgeu a2, a3, .LBB42_2
2495 ; CHECK-NEXT: # %bb.1:
2496 ; CHECK-NEXT: li a2, 0
2497 ; CHECK-NEXT: j .LBB42_5
2498 ; CHECK-NEXT: .LBB42_2: # %vector.ph
2499 ; CHECK-NEXT: addi a2, a3, -1
2500 ; CHECK-NEXT: andi a4, a2, 1024
2501 ; CHECK-NEXT: xori a2, a4, 1024
2502 ; CHECK-NEXT: slli a5, a5, 1
2503 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
2504 ; CHECK-NEXT: mv a6, a0
2505 ; CHECK-NEXT: mv a7, a2
2506 ; CHECK-NEXT: .LBB42_3: # %vector.body
2507 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2508 ; CHECK-NEXT: vl2re32.v v8, (a6)
2509 ; CHECK-NEXT: vdivu.vx v8, v8, a1
2510 ; CHECK-NEXT: vs2r.v v8, (a6)
2511 ; CHECK-NEXT: sub a7, a7, a3
2512 ; CHECK-NEXT: add a6, a6, a5
2513 ; CHECK-NEXT: bnez a7, .LBB42_3
2514 ; CHECK-NEXT: # %bb.4: # %middle.block
2515 ; CHECK-NEXT: beqz a4, .LBB42_7
2516 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader
2517 ; CHECK-NEXT: addi a3, a2, -1024
2518 ; CHECK-NEXT: slli a2, a2, 2
2519 ; CHECK-NEXT: add a0, a0, a2
2520 ; CHECK-NEXT: .LBB42_6: # %for.body
2521 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2522 ; CHECK-NEXT: lw a2, 0(a0)
2523 ; CHECK-NEXT: divuw a2, a2, a1
2524 ; CHECK-NEXT: sw a2, 0(a0)
2525 ; CHECK-NEXT: addi a3, a3, 1
2526 ; CHECK-NEXT: addi a0, a0, 4
2527 ; CHECK-NEXT: bnez a3, .LBB42_6
2528 ; CHECK-NEXT: .LBB42_7: # %for.cond.cleanup
2531 %0 = call i64 @llvm.vscale.i64()
2533 %min.iters.check = icmp ugt i64 %1, 1024
2534 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2536 vector.ph: ; preds = %entry
2537 %2 = call i64 @llvm.vscale.i64()
2539 %n.mod.vf = urem i64 1024, %3
2540 %n.vec = sub nsw i64 1024, %n.mod.vf
2541 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2542 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2543 %4 = call i64 @llvm.vscale.i64()
2545 br label %vector.body
2547 vector.body: ; preds = %vector.body, %vector.ph
2548 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2549 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2550 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
2551 %7 = udiv <vscale x 4 x i32> %wide.load, %broadcast.splat
2552 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
2553 %index.next = add nuw i64 %index, %5
2554 %8 = icmp eq i64 %index.next, %n.vec
2555 br i1 %8, label %middle.block, label %vector.body
2557 middle.block: ; preds = %vector.body
2558 %cmp.n = icmp eq i64 %n.mod.vf, 0
2559 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2561 for.body.preheader: ; preds = %entry, %middle.block
2562 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2565 for.cond.cleanup: ; preds = %for.body, %middle.block
2568 for.body: ; preds = %for.body.preheader, %for.body
2569 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2570 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2571 %9 = load i32, ptr %arrayidx, align 4
2572 %div = udiv i32 %9, %x
2573 store i32 %div, ptr %arrayidx, align 4
2574 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2575 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2576 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2579 define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
2580 ; CHECK-LABEL: sink_splat_sdiv_scalable:
2581 ; CHECK: # %bb.0: # %entry
2582 ; CHECK-NEXT: csrr a5, vlenb
2583 ; CHECK-NEXT: srli a3, a5, 1
2584 ; CHECK-NEXT: li a2, 1024
2585 ; CHECK-NEXT: bgeu a2, a3, .LBB43_2
2586 ; CHECK-NEXT: # %bb.1:
2587 ; CHECK-NEXT: li a2, 0
2588 ; CHECK-NEXT: j .LBB43_5
2589 ; CHECK-NEXT: .LBB43_2: # %vector.ph
2590 ; CHECK-NEXT: addi a2, a3, -1
2591 ; CHECK-NEXT: andi a4, a2, 1024
2592 ; CHECK-NEXT: xori a2, a4, 1024
2593 ; CHECK-NEXT: slli a5, a5, 1
2594 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
2595 ; CHECK-NEXT: mv a6, a0
2596 ; CHECK-NEXT: mv a7, a2
2597 ; CHECK-NEXT: .LBB43_3: # %vector.body
2598 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2599 ; CHECK-NEXT: vl2re32.v v8, (a6)
2600 ; CHECK-NEXT: vdiv.vx v8, v8, a1
2601 ; CHECK-NEXT: vs2r.v v8, (a6)
2602 ; CHECK-NEXT: sub a7, a7, a3
2603 ; CHECK-NEXT: add a6, a6, a5
2604 ; CHECK-NEXT: bnez a7, .LBB43_3
2605 ; CHECK-NEXT: # %bb.4: # %middle.block
2606 ; CHECK-NEXT: beqz a4, .LBB43_7
2607 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader
2608 ; CHECK-NEXT: addi a3, a2, -1024
2609 ; CHECK-NEXT: slli a2, a2, 2
2610 ; CHECK-NEXT: add a0, a0, a2
2611 ; CHECK-NEXT: .LBB43_6: # %for.body
2612 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2613 ; CHECK-NEXT: lw a2, 0(a0)
2614 ; CHECK-NEXT: divw a2, a2, a1
2615 ; CHECK-NEXT: sw a2, 0(a0)
2616 ; CHECK-NEXT: addi a3, a3, 1
2617 ; CHECK-NEXT: addi a0, a0, 4
2618 ; CHECK-NEXT: bnez a3, .LBB43_6
2619 ; CHECK-NEXT: .LBB43_7: # %for.cond.cleanup
2622 %0 = call i64 @llvm.vscale.i64()
2624 %min.iters.check = icmp ugt i64 %1, 1024
2625 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2627 vector.ph: ; preds = %entry
2628 %2 = call i64 @llvm.vscale.i64()
2630 %n.mod.vf = urem i64 1024, %3
2631 %n.vec = sub nsw i64 1024, %n.mod.vf
2632 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2633 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2634 %4 = call i64 @llvm.vscale.i64()
2636 br label %vector.body
2638 vector.body: ; preds = %vector.body, %vector.ph
2639 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2640 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2641 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
2642 %7 = sdiv <vscale x 4 x i32> %wide.load, %broadcast.splat
2643 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
2644 %index.next = add nuw i64 %index, %5
2645 %8 = icmp eq i64 %index.next, %n.vec
2646 br i1 %8, label %middle.block, label %vector.body
2648 middle.block: ; preds = %vector.body
2649 %cmp.n = icmp eq i64 %n.mod.vf, 0
2650 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2652 for.body.preheader: ; preds = %entry, %middle.block
2653 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2656 for.cond.cleanup: ; preds = %for.body, %middle.block
2659 for.body: ; preds = %for.body.preheader, %for.body
2660 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2661 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2662 %9 = load i32, ptr %arrayidx, align 4
2663 %div = sdiv i32 %9, %x
2664 store i32 %div, ptr %arrayidx, align 4
2665 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2666 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2667 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2670 define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
2671 ; CHECK-LABEL: sink_splat_urem_scalable:
2672 ; CHECK: # %bb.0: # %entry
2673 ; CHECK-NEXT: csrr a5, vlenb
2674 ; CHECK-NEXT: srli a3, a5, 1
2675 ; CHECK-NEXT: li a2, 1024
2676 ; CHECK-NEXT: bgeu a2, a3, .LBB44_2
2677 ; CHECK-NEXT: # %bb.1:
2678 ; CHECK-NEXT: li a2, 0
2679 ; CHECK-NEXT: j .LBB44_5
2680 ; CHECK-NEXT: .LBB44_2: # %vector.ph
2681 ; CHECK-NEXT: addi a2, a3, -1
2682 ; CHECK-NEXT: andi a4, a2, 1024
2683 ; CHECK-NEXT: xori a2, a4, 1024
2684 ; CHECK-NEXT: slli a5, a5, 1
2685 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
2686 ; CHECK-NEXT: mv a6, a0
2687 ; CHECK-NEXT: mv a7, a2
2688 ; CHECK-NEXT: .LBB44_3: # %vector.body
2689 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2690 ; CHECK-NEXT: vl2re32.v v8, (a6)
2691 ; CHECK-NEXT: vremu.vx v8, v8, a1
2692 ; CHECK-NEXT: vs2r.v v8, (a6)
2693 ; CHECK-NEXT: sub a7, a7, a3
2694 ; CHECK-NEXT: add a6, a6, a5
2695 ; CHECK-NEXT: bnez a7, .LBB44_3
2696 ; CHECK-NEXT: # %bb.4: # %middle.block
2697 ; CHECK-NEXT: beqz a4, .LBB44_7
2698 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader
2699 ; CHECK-NEXT: addi a3, a2, -1024
2700 ; CHECK-NEXT: slli a2, a2, 2
2701 ; CHECK-NEXT: add a0, a0, a2
2702 ; CHECK-NEXT: .LBB44_6: # %for.body
2703 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2704 ; CHECK-NEXT: lw a2, 0(a0)
2705 ; CHECK-NEXT: remuw a2, a2, a1
2706 ; CHECK-NEXT: sw a2, 0(a0)
2707 ; CHECK-NEXT: addi a3, a3, 1
2708 ; CHECK-NEXT: addi a0, a0, 4
2709 ; CHECK-NEXT: bnez a3, .LBB44_6
2710 ; CHECK-NEXT: .LBB44_7: # %for.cond.cleanup
2713 %0 = call i64 @llvm.vscale.i64()
2715 %min.iters.check = icmp ugt i64 %1, 1024
2716 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2718 vector.ph: ; preds = %entry
2719 %2 = call i64 @llvm.vscale.i64()
2721 %n.mod.vf = urem i64 1024, %3
2722 %n.vec = sub nsw i64 1024, %n.mod.vf
2723 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2724 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2725 %4 = call i64 @llvm.vscale.i64()
2727 br label %vector.body
2729 vector.body: ; preds = %vector.body, %vector.ph
2730 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2731 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2732 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
2733 %7 = urem <vscale x 4 x i32> %wide.load, %broadcast.splat
2734 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
2735 %index.next = add nuw i64 %index, %5
2736 %8 = icmp eq i64 %index.next, %n.vec
2737 br i1 %8, label %middle.block, label %vector.body
2739 middle.block: ; preds = %vector.body
2740 %cmp.n = icmp eq i64 %n.mod.vf, 0
2741 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2743 for.body.preheader: ; preds = %entry, %middle.block
2744 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2747 for.cond.cleanup: ; preds = %for.body, %middle.block
2750 for.body: ; preds = %for.body.preheader, %for.body
2751 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2752 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2753 %9 = load i32, ptr %arrayidx, align 4
2754 %rem = urem i32 %9, %x
2755 store i32 %rem, ptr %arrayidx, align 4
2756 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2757 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2758 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2761 define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
2762 ; CHECK-LABEL: sink_splat_srem_scalable:
2763 ; CHECK: # %bb.0: # %entry
2764 ; CHECK-NEXT: csrr a5, vlenb
2765 ; CHECK-NEXT: srli a3, a5, 1
2766 ; CHECK-NEXT: li a2, 1024
2767 ; CHECK-NEXT: bgeu a2, a3, .LBB45_2
2768 ; CHECK-NEXT: # %bb.1:
2769 ; CHECK-NEXT: li a2, 0
2770 ; CHECK-NEXT: j .LBB45_5
2771 ; CHECK-NEXT: .LBB45_2: # %vector.ph
2772 ; CHECK-NEXT: addi a2, a3, -1
2773 ; CHECK-NEXT: andi a4, a2, 1024
2774 ; CHECK-NEXT: xori a2, a4, 1024
2775 ; CHECK-NEXT: slli a5, a5, 1
2776 ; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
2777 ; CHECK-NEXT: mv a6, a0
2778 ; CHECK-NEXT: mv a7, a2
2779 ; CHECK-NEXT: .LBB45_3: # %vector.body
2780 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2781 ; CHECK-NEXT: vl2re32.v v8, (a6)
2782 ; CHECK-NEXT: vrem.vx v8, v8, a1
2783 ; CHECK-NEXT: vs2r.v v8, (a6)
2784 ; CHECK-NEXT: sub a7, a7, a3
2785 ; CHECK-NEXT: add a6, a6, a5
2786 ; CHECK-NEXT: bnez a7, .LBB45_3
2787 ; CHECK-NEXT: # %bb.4: # %middle.block
2788 ; CHECK-NEXT: beqz a4, .LBB45_7
2789 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader
2790 ; CHECK-NEXT: addi a3, a2, -1024
2791 ; CHECK-NEXT: slli a2, a2, 2
2792 ; CHECK-NEXT: add a0, a0, a2
2793 ; CHECK-NEXT: .LBB45_6: # %for.body
2794 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2795 ; CHECK-NEXT: lw a2, 0(a0)
2796 ; CHECK-NEXT: remw a2, a2, a1
2797 ; CHECK-NEXT: sw a2, 0(a0)
2798 ; CHECK-NEXT: addi a3, a3, 1
2799 ; CHECK-NEXT: addi a0, a0, 4
2800 ; CHECK-NEXT: bnez a3, .LBB45_6
2801 ; CHECK-NEXT: .LBB45_7: # %for.cond.cleanup
2804 %0 = call i64 @llvm.vscale.i64()
2806 %min.iters.check = icmp ugt i64 %1, 1024
2807 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2809 vector.ph: ; preds = %entry
2810 %2 = call i64 @llvm.vscale.i64()
2812 %n.mod.vf = urem i64 1024, %3
2813 %n.vec = sub nsw i64 1024, %n.mod.vf
2814 %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2815 %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2816 %4 = call i64 @llvm.vscale.i64()
2818 br label %vector.body
2820 vector.body: ; preds = %vector.body, %vector.ph
2821 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2822 %6 = getelementptr inbounds i32, ptr %a, i64 %index
2823 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %6, align 4
2824 %7 = srem <vscale x 4 x i32> %wide.load, %broadcast.splat
2825 store <vscale x 4 x i32> %7, <vscale x 4 x i32>* %6, align 4
2826 %index.next = add nuw i64 %index, %5
2827 %8 = icmp eq i64 %index.next, %n.vec
2828 br i1 %8, label %middle.block, label %vector.body
2830 middle.block: ; preds = %vector.body
2831 %cmp.n = icmp eq i64 %n.mod.vf, 0
2832 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2834 for.body.preheader: ; preds = %entry, %middle.block
2835 %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2838 for.cond.cleanup: ; preds = %for.body, %middle.block
2841 for.body: ; preds = %for.body.preheader, %for.body
2842 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2843 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2844 %9 = load i32, ptr %arrayidx, align 4
2845 %rem = srem i32 %9, %x
2846 store i32 %rem, ptr %arrayidx, align 4
2847 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2848 %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2849 br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2852 declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
2854 define void @sink_splat_vp_mul(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
2855 ; CHECK-LABEL: sink_splat_vp_mul:
2856 ; CHECK: # %bb.0: # %entry
2857 ; CHECK-NEXT: li a3, 1024
2858 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2859 ; CHECK-NEXT: .LBB46_1: # %vector.body
2860 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2861 ; CHECK-NEXT: vle32.v v8, (a0)
2862 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
2863 ; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t
2864 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2865 ; CHECK-NEXT: vse32.v v8, (a0)
2866 ; CHECK-NEXT: addi a3, a3, -4
2867 ; CHECK-NEXT: addi a0, a0, 16
2868 ; CHECK-NEXT: bnez a3, .LBB46_1
2869 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2872 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2873 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2874 br label %vector.body
2876 vector.body: ; preds = %vector.body, %entry
2877 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2878 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2879 %wide.load = load <4 x i32>, ptr %0, align 4
2880 %1 = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
2881 store <4 x i32> %1, ptr %0, align 4
2882 %index.next = add nuw i64 %index, 4
2883 %2 = icmp eq i64 %index.next, 1024
2884 br i1 %2, label %for.cond.cleanup, label %vector.body
2886 for.cond.cleanup: ; preds = %vector.body
2890 declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
2892 define void @sink_splat_vp_add(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
2893 ; CHECK-LABEL: sink_splat_vp_add:
2894 ; CHECK: # %bb.0: # %entry
2895 ; CHECK-NEXT: li a3, 1024
2896 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2897 ; CHECK-NEXT: .LBB47_1: # %vector.body
2898 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2899 ; CHECK-NEXT: vle32.v v8, (a0)
2900 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
2901 ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
2902 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2903 ; CHECK-NEXT: vse32.v v8, (a0)
2904 ; CHECK-NEXT: addi a3, a3, -4
2905 ; CHECK-NEXT: addi a0, a0, 16
2906 ; CHECK-NEXT: bnez a3, .LBB47_1
2907 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2910 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2911 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2912 br label %vector.body
2914 vector.body: ; preds = %vector.body, %entry
2915 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2916 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2917 %wide.load = load <4 x i32>, ptr %0, align 4
2918 %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
2919 store <4 x i32> %1, ptr %0, align 4
2920 %index.next = add nuw i64 %index, 4
2921 %2 = icmp eq i64 %index.next, 1024
2922 br i1 %2, label %for.cond.cleanup, label %vector.body
2924 for.cond.cleanup: ; preds = %vector.body
2928 define void @sink_splat_vp_add_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
2929 ; CHECK-LABEL: sink_splat_vp_add_commute:
2930 ; CHECK: # %bb.0: # %entry
2931 ; CHECK-NEXT: li a3, 1024
2932 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2933 ; CHECK-NEXT: .LBB48_1: # %vector.body
2934 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2935 ; CHECK-NEXT: vle32.v v8, (a0)
2936 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
2937 ; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
2938 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2939 ; CHECK-NEXT: vse32.v v8, (a0)
2940 ; CHECK-NEXT: addi a3, a3, -4
2941 ; CHECK-NEXT: addi a0, a0, 16
2942 ; CHECK-NEXT: bnez a3, .LBB48_1
2943 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2946 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2947 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2948 br label %vector.body
2950 vector.body: ; preds = %vector.body, %entry
2951 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2952 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2953 %wide.load = load <4 x i32>, ptr %0, align 4
2954 %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
2955 store <4 x i32> %1, ptr %0, align 4
2956 %index.next = add nuw i64 %index, 4
2957 %2 = icmp eq i64 %index.next, 1024
2958 br i1 %2, label %for.cond.cleanup, label %vector.body
2960 for.cond.cleanup: ; preds = %vector.body
2964 declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
2966 define void @sink_splat_vp_sub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
2967 ; CHECK-LABEL: sink_splat_vp_sub:
2968 ; CHECK: # %bb.0: # %entry
2969 ; CHECK-NEXT: li a3, 1024
2970 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2971 ; CHECK-NEXT: .LBB49_1: # %vector.body
2972 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2973 ; CHECK-NEXT: vle32.v v8, (a0)
2974 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
2975 ; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
2976 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2977 ; CHECK-NEXT: vse32.v v8, (a0)
2978 ; CHECK-NEXT: addi a3, a3, -4
2979 ; CHECK-NEXT: addi a0, a0, 16
2980 ; CHECK-NEXT: bnez a3, .LBB49_1
2981 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
2984 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2985 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2986 br label %vector.body
2988 vector.body: ; preds = %vector.body, %entry
2989 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2990 %0 = getelementptr inbounds i32, ptr %a, i64 %index
2991 %wide.load = load <4 x i32>, ptr %0, align 4
2992 %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
2993 store <4 x i32> %1, ptr %0, align 4
2994 %index.next = add nuw i64 %index, 4
2995 %2 = icmp eq i64 %index.next, 1024
2996 br i1 %2, label %for.cond.cleanup, label %vector.body
2998 for.cond.cleanup: ; preds = %vector.body
3002 define void @sink_splat_vp_rsub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3003 ; CHECK-LABEL: sink_splat_vp_rsub:
3004 ; CHECK: # %bb.0: # %entry
3005 ; CHECK-NEXT: li a3, 1024
3006 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3007 ; CHECK-NEXT: .LBB50_1: # %vector.body
3008 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3009 ; CHECK-NEXT: vle32.v v8, (a0)
3010 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3011 ; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t
3012 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3013 ; CHECK-NEXT: vse32.v v8, (a0)
3014 ; CHECK-NEXT: addi a3, a3, -4
3015 ; CHECK-NEXT: addi a0, a0, 16
3016 ; CHECK-NEXT: bnez a3, .LBB50_1
3017 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3020 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3021 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3022 br label %vector.body
3024 vector.body: ; preds = %vector.body, %entry
3025 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3026 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3027 %wide.load = load <4 x i32>, ptr %0, align 4
3028 %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
3029 store <4 x i32> %1, ptr %0, align 4
3030 %index.next = add nuw i64 %index, 4
3031 %2 = icmp eq i64 %index.next, 1024
3032 br i1 %2, label %for.cond.cleanup, label %vector.body
3034 for.cond.cleanup: ; preds = %vector.body
3038 declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3040 define void @sink_splat_vp_shl(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3041 ; CHECK-LABEL: sink_splat_vp_shl:
3042 ; CHECK: # %bb.0: # %entry
3043 ; CHECK-NEXT: li a3, 1024
3044 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3045 ; CHECK-NEXT: .LBB51_1: # %vector.body
3046 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3047 ; CHECK-NEXT: vle32.v v8, (a0)
3048 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3049 ; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t
3050 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3051 ; CHECK-NEXT: vse32.v v8, (a0)
3052 ; CHECK-NEXT: addi a3, a3, -4
3053 ; CHECK-NEXT: addi a0, a0, 16
3054 ; CHECK-NEXT: bnez a3, .LBB51_1
3055 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3058 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3059 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3060 br label %vector.body
3062 vector.body: ; preds = %vector.body, %entry
3063 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3064 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3065 %wide.load = load <4 x i32>, ptr %0, align 4
3066 %1 = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3067 store <4 x i32> %1, ptr %0, align 4
3068 %index.next = add nuw i64 %index, 4
3069 %2 = icmp eq i64 %index.next, 1024
3070 br i1 %2, label %for.cond.cleanup, label %vector.body
3072 for.cond.cleanup: ; preds = %vector.body
3076 declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3078 define void @sink_splat_vp_lshr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3079 ; CHECK-LABEL: sink_splat_vp_lshr:
3080 ; CHECK: # %bb.0: # %entry
3081 ; CHECK-NEXT: li a3, 1024
3082 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3083 ; CHECK-NEXT: .LBB52_1: # %vector.body
3084 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3085 ; CHECK-NEXT: vle32.v v8, (a0)
3086 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3087 ; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
3088 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3089 ; CHECK-NEXT: vse32.v v8, (a0)
3090 ; CHECK-NEXT: addi a3, a3, -4
3091 ; CHECK-NEXT: addi a0, a0, 16
3092 ; CHECK-NEXT: bnez a3, .LBB52_1
3093 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3096 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3097 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3098 br label %vector.body
3100 vector.body: ; preds = %vector.body, %entry
3101 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3102 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3103 %wide.load = load <4 x i32>, ptr %0, align 4
3104 %1 = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3105 store <4 x i32> %1, ptr %0, align 4
3106 %index.next = add nuw i64 %index, 4
3107 %2 = icmp eq i64 %index.next, 1024
3108 br i1 %2, label %for.cond.cleanup, label %vector.body
3110 for.cond.cleanup: ; preds = %vector.body
3114 declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3116 define void @sink_splat_vp_ashr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3117 ; CHECK-LABEL: sink_splat_vp_ashr:
3118 ; CHECK: # %bb.0: # %entry
3119 ; CHECK-NEXT: li a3, 1024
3120 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3121 ; CHECK-NEXT: .LBB53_1: # %vector.body
3122 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3123 ; CHECK-NEXT: vle32.v v8, (a0)
3124 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3125 ; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t
3126 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3127 ; CHECK-NEXT: vse32.v v8, (a0)
3128 ; CHECK-NEXT: addi a3, a3, -4
3129 ; CHECK-NEXT: addi a0, a0, 16
3130 ; CHECK-NEXT: bnez a3, .LBB53_1
3131 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3134 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3135 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3136 br label %vector.body
3138 vector.body: ; preds = %vector.body, %entry
3139 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3140 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3141 %wide.load = load <4 x i32>, ptr %0, align 4
3142 %1 = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3143 store <4 x i32> %1, ptr %0, align 4
3144 %index.next = add nuw i64 %index, 4
3145 %2 = icmp eq i64 %index.next, 1024
3146 br i1 %2, label %for.cond.cleanup, label %vector.body
3148 for.cond.cleanup: ; preds = %vector.body
3152 declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3154 define void @sink_splat_vp_fmul(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3155 ; CHECK-LABEL: sink_splat_vp_fmul:
3156 ; CHECK: # %bb.0: # %entry
3157 ; CHECK-NEXT: li a2, 1024
3158 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3159 ; CHECK-NEXT: .LBB54_1: # %vector.body
3160 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3161 ; CHECK-NEXT: vle32.v v8, (a0)
3162 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3163 ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t
3164 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3165 ; CHECK-NEXT: vse32.v v8, (a0)
3166 ; CHECK-NEXT: addi a2, a2, -4
3167 ; CHECK-NEXT: addi a0, a0, 16
3168 ; CHECK-NEXT: bnez a2, .LBB54_1
3169 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3172 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3173 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3174 br label %vector.body
3176 vector.body: ; preds = %vector.body, %entry
3177 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3178 %0 = getelementptr inbounds float, ptr %a, i64 %index
3179 %wide.load = load <4 x float>, ptr %0, align 4
3180 %1 = call <4 x float> @llvm.vp.fmul.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3181 store <4 x float> %1, ptr %0, align 4
3182 %index.next = add nuw i64 %index, 4
3183 %2 = icmp eq i64 %index.next, 1024
3184 br i1 %2, label %for.cond.cleanup, label %vector.body
3186 for.cond.cleanup: ; preds = %vector.body
3190 declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3192 define void @sink_splat_vp_fdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3193 ; CHECK-LABEL: sink_splat_vp_fdiv:
3194 ; CHECK: # %bb.0: # %entry
3195 ; CHECK-NEXT: li a2, 1024
3196 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3197 ; CHECK-NEXT: .LBB55_1: # %vector.body
3198 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3199 ; CHECK-NEXT: vle32.v v8, (a0)
3200 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3201 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t
3202 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3203 ; CHECK-NEXT: vse32.v v8, (a0)
3204 ; CHECK-NEXT: addi a2, a2, -4
3205 ; CHECK-NEXT: addi a0, a0, 16
3206 ; CHECK-NEXT: bnez a2, .LBB55_1
3207 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3210 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3211 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3212 br label %vector.body
3214 vector.body: ; preds = %vector.body, %entry
3215 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3216 %0 = getelementptr inbounds float, ptr %a, i64 %index
3217 %wide.load = load <4 x float>, ptr %0, align 4
3218 %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3219 store <4 x float> %1, ptr %0, align 4
3220 %index.next = add nuw i64 %index, 4
3221 %2 = icmp eq i64 %index.next, 1024
3222 br i1 %2, label %for.cond.cleanup, label %vector.body
3224 for.cond.cleanup: ; preds = %vector.body
3228 define void @sink_splat_vp_frdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3229 ; CHECK-LABEL: sink_splat_vp_frdiv:
3230 ; CHECK: # %bb.0: # %entry
3231 ; CHECK-NEXT: li a2, 1024
3232 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3233 ; CHECK-NEXT: .LBB56_1: # %vector.body
3234 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3235 ; CHECK-NEXT: vle32.v v8, (a0)
3236 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3237 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t
3238 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3239 ; CHECK-NEXT: vse32.v v8, (a0)
3240 ; CHECK-NEXT: addi a2, a2, -4
3241 ; CHECK-NEXT: addi a0, a0, 16
3242 ; CHECK-NEXT: bnez a2, .LBB56_1
3243 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3246 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3247 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3248 br label %vector.body
3250 vector.body: ; preds = %vector.body, %entry
3251 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3252 %0 = getelementptr inbounds float, ptr %a, i64 %index
3253 %wide.load = load <4 x float>, ptr %0, align 4
3254 %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
3255 store <4 x float> %1, ptr %0, align 4
3256 %index.next = add nuw i64 %index, 4
3257 %2 = icmp eq i64 %index.next, 1024
3258 br i1 %2, label %for.cond.cleanup, label %vector.body
3260 for.cond.cleanup: ; preds = %vector.body
3264 declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3266 define void @sink_splat_vp_fadd(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3267 ; CHECK-LABEL: sink_splat_vp_fadd:
3268 ; CHECK: # %bb.0: # %entry
3269 ; CHECK-NEXT: li a2, 1024
3270 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3271 ; CHECK-NEXT: .LBB57_1: # %vector.body
3272 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3273 ; CHECK-NEXT: vle32.v v8, (a0)
3274 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3275 ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t
3276 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3277 ; CHECK-NEXT: vse32.v v8, (a0)
3278 ; CHECK-NEXT: addi a2, a2, -4
3279 ; CHECK-NEXT: addi a0, a0, 16
3280 ; CHECK-NEXT: bnez a2, .LBB57_1
3281 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3284 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3285 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3286 br label %vector.body
3288 vector.body: ; preds = %vector.body, %entry
3289 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3290 %0 = getelementptr inbounds float, ptr %a, i64 %index
3291 %wide.load = load <4 x float>, ptr %0, align 4
3292 %1 = call <4 x float> @llvm.vp.fadd.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3293 store <4 x float> %1, ptr %0, align 4
3294 %index.next = add nuw i64 %index, 4
3295 %2 = icmp eq i64 %index.next, 1024
3296 br i1 %2, label %for.cond.cleanup, label %vector.body
3298 for.cond.cleanup: ; preds = %vector.body
3302 declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3304 define void @sink_splat_vp_fsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3305 ; CHECK-LABEL: sink_splat_vp_fsub:
3306 ; CHECK: # %bb.0: # %entry
3307 ; CHECK-NEXT: li a2, 1024
3308 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3309 ; CHECK-NEXT: .LBB58_1: # %vector.body
3310 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3311 ; CHECK-NEXT: vle32.v v8, (a0)
3312 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3313 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
3314 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3315 ; CHECK-NEXT: vse32.v v8, (a0)
3316 ; CHECK-NEXT: addi a2, a2, -4
3317 ; CHECK-NEXT: addi a0, a0, 16
3318 ; CHECK-NEXT: bnez a2, .LBB58_1
3319 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3322 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3323 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3324 br label %vector.body
3326 vector.body: ; preds = %vector.body, %entry
3327 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3328 %0 = getelementptr inbounds float, ptr %a, i64 %index
3329 %wide.load = load <4 x float>, ptr %0, align 4
3330 %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3331 store <4 x float> %1, ptr %0, align 4
3332 %index.next = add nuw i64 %index, 4
3333 %2 = icmp eq i64 %index.next, 1024
3334 br i1 %2, label %for.cond.cleanup, label %vector.body
3336 for.cond.cleanup: ; preds = %vector.body
3340 declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3342 define void @sink_splat_vp_frsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3343 ; CHECK-LABEL: sink_splat_vp_frsub:
3344 ; CHECK: # %bb.0: # %entry
3345 ; CHECK-NEXT: li a2, 1024
3346 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3347 ; CHECK-NEXT: .LBB59_1: # %vector.body
3348 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3349 ; CHECK-NEXT: vle32.v v8, (a0)
3350 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
3351 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t
3352 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3353 ; CHECK-NEXT: vse32.v v8, (a0)
3354 ; CHECK-NEXT: addi a2, a2, -4
3355 ; CHECK-NEXT: addi a0, a0, 16
3356 ; CHECK-NEXT: bnez a2, .LBB59_1
3357 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3360 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3361 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3362 br label %vector.body
3364 vector.body: ; preds = %vector.body, %entry
3365 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3366 %0 = getelementptr inbounds float, ptr %a, i64 %index
3367 %wide.load = load <4 x float>, ptr %0, align 4
3368 %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
3369 store <4 x float> %1, ptr %0, align 4
3370 %index.next = add nuw i64 %index, 4
3371 %2 = icmp eq i64 %index.next, 1024
3372 br i1 %2, label %for.cond.cleanup, label %vector.body
3374 for.cond.cleanup: ; preds = %vector.body
3378 declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3380 define void @sink_splat_vp_udiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3381 ; CHECK-LABEL: sink_splat_vp_udiv:
3382 ; CHECK: # %bb.0: # %entry
3383 ; CHECK-NEXT: li a3, 1024
3384 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3385 ; CHECK-NEXT: .LBB60_1: # %vector.body
3386 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3387 ; CHECK-NEXT: vle32.v v8, (a0)
3388 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3389 ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
3390 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3391 ; CHECK-NEXT: vse32.v v8, (a0)
3392 ; CHECK-NEXT: addi a3, a3, -4
3393 ; CHECK-NEXT: addi a0, a0, 16
3394 ; CHECK-NEXT: bnez a3, .LBB60_1
3395 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3398 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3399 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3400 br label %vector.body
3402 vector.body: ; preds = %vector.body, %entry
3403 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3404 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3405 %wide.load = load <4 x i32>, ptr %0, align 4
3406 %1 = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3407 store <4 x i32> %1, ptr %0, align 4
3408 %index.next = add nuw i64 %index, 4
3409 %2 = icmp eq i64 %index.next, 1024
3410 br i1 %2, label %for.cond.cleanup, label %vector.body
3412 for.cond.cleanup: ; preds = %vector.body
3416 declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3418 define void @sink_splat_vp_sdiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3419 ; CHECK-LABEL: sink_splat_vp_sdiv:
3420 ; CHECK: # %bb.0: # %entry
3421 ; CHECK-NEXT: li a3, 1024
3422 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3423 ; CHECK-NEXT: .LBB61_1: # %vector.body
3424 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3425 ; CHECK-NEXT: vle32.v v8, (a0)
3426 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3427 ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
3428 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3429 ; CHECK-NEXT: vse32.v v8, (a0)
3430 ; CHECK-NEXT: addi a3, a3, -4
3431 ; CHECK-NEXT: addi a0, a0, 16
3432 ; CHECK-NEXT: bnez a3, .LBB61_1
3433 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3436 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3437 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3438 br label %vector.body
3440 vector.body: ; preds = %vector.body, %entry
3441 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3442 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3443 %wide.load = load <4 x i32>, ptr %0, align 4
3444 %1 = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3445 store <4 x i32> %1, ptr %0, align 4
3446 %index.next = add nuw i64 %index, 4
3447 %2 = icmp eq i64 %index.next, 1024
3448 br i1 %2, label %for.cond.cleanup, label %vector.body
3450 for.cond.cleanup: ; preds = %vector.body
3454 declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3456 define void @sink_splat_vp_urem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3457 ; CHECK-LABEL: sink_splat_vp_urem:
3458 ; CHECK: # %bb.0: # %entry
3459 ; CHECK-NEXT: li a3, 1024
3460 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3461 ; CHECK-NEXT: .LBB62_1: # %vector.body
3462 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3463 ; CHECK-NEXT: vle32.v v8, (a0)
3464 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3465 ; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t
3466 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3467 ; CHECK-NEXT: vse32.v v8, (a0)
3468 ; CHECK-NEXT: addi a3, a3, -4
3469 ; CHECK-NEXT: addi a0, a0, 16
3470 ; CHECK-NEXT: bnez a3, .LBB62_1
3471 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3474 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3475 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3476 br label %vector.body
3478 vector.body: ; preds = %vector.body, %entry
3479 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3480 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3481 %wide.load = load <4 x i32>, ptr %0, align 4
3482 %1 = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3483 store <4 x i32> %1, ptr %0, align 4
3484 %index.next = add nuw i64 %index, 4
3485 %2 = icmp eq i64 %index.next, 1024
3486 br i1 %2, label %for.cond.cleanup, label %vector.body
3488 for.cond.cleanup: ; preds = %vector.body
3492 declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3494 define void @sink_splat_vp_srem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3495 ; CHECK-LABEL: sink_splat_vp_srem:
3496 ; CHECK: # %bb.0: # %entry
3497 ; CHECK-NEXT: li a3, 1024
3498 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3499 ; CHECK-NEXT: .LBB63_1: # %vector.body
3500 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3501 ; CHECK-NEXT: vle32.v v8, (a0)
3502 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3503 ; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t
3504 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3505 ; CHECK-NEXT: vse32.v v8, (a0)
3506 ; CHECK-NEXT: addi a3, a3, -4
3507 ; CHECK-NEXT: addi a0, a0, 16
3508 ; CHECK-NEXT: bnez a3, .LBB63_1
3509 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3512 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3513 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3514 br label %vector.body
3516 vector.body: ; preds = %vector.body, %entry
3517 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3518 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3519 %wide.load = load <4 x i32>, ptr %0, align 4
3520 %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3521 store <4 x i32> %1, ptr %0, align 4
3522 %index.next = add nuw i64 %index, 4
3523 %2 = icmp eq i64 %index.next, 1024
3524 br i1 %2, label %for.cond.cleanup, label %vector.body
3526 for.cond.cleanup: ; preds = %vector.body
3530 ; Check that we don't sink a splat operand that has no chance of being folded.
3532 define void @sink_splat_vp_srem_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3533 ; CHECK-LABEL: sink_splat_vp_srem_commute:
3534 ; CHECK: # %bb.0: # %entry
3535 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3536 ; CHECK-NEXT: vmv.v.x v8, a1
3537 ; CHECK-NEXT: li a1, 1024
3538 ; CHECK-NEXT: .LBB64_1: # %vector.body
3539 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3540 ; CHECK-NEXT: vle32.v v9, (a0)
3541 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3542 ; CHECK-NEXT: vrem.vv v9, v8, v9, v0.t
3543 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3544 ; CHECK-NEXT: vse32.v v9, (a0)
3545 ; CHECK-NEXT: addi a1, a1, -4
3546 ; CHECK-NEXT: addi a0, a0, 16
3547 ; CHECK-NEXT: bnez a1, .LBB64_1
3548 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3551 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3552 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3553 br label %vector.body
3555 vector.body: ; preds = %vector.body, %entry
3556 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3557 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3558 %wide.load = load <4 x i32>, ptr %0, align 4
3559 %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
3560 store <4 x i32> %1, ptr %0, align 4
3561 %index.next = add nuw i64 %index, 4
3562 %2 = icmp eq i64 %index.next, 1024
3563 br i1 %2, label %for.cond.cleanup, label %vector.body
3565 for.cond.cleanup: ; preds = %vector.body
3569 declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
3571 define void @sink_splat_vp_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
3572 ; CHECK-LABEL: sink_splat_vp_fma:
3573 ; CHECK: # %bb.0: # %entry
3574 ; CHECK-NEXT: li a3, 1024
3575 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3576 ; CHECK-NEXT: .LBB65_1: # %vector.body
3577 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3578 ; CHECK-NEXT: vle32.v v8, (a0)
3579 ; CHECK-NEXT: vle32.v v9, (a1)
3580 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3581 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
3582 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3583 ; CHECK-NEXT: vse32.v v8, (a0)
3584 ; CHECK-NEXT: addi a3, a3, -4
3585 ; CHECK-NEXT: addi a1, a1, 16
3586 ; CHECK-NEXT: addi a0, a0, 16
3587 ; CHECK-NEXT: bnez a3, .LBB65_1
3588 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3591 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3592 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3593 br label %vector.body
3595 vector.body: ; preds = %vector.body, %entry
3596 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3597 %0 = getelementptr inbounds float, ptr %a, i64 %index
3598 %wide.load = load <4 x float>, ptr %0, align 4
3599 %1 = getelementptr inbounds float, ptr %b, i64 %index
3600 %wide.load12 = load <4 x float>, ptr %1, align 4
3601 %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
3602 store <4 x float> %2, ptr %0, align 4
3603 %index.next = add nuw i64 %index, 4
3604 %3 = icmp eq i64 %index.next, 1024
3605 br i1 %3, label %for.cond.cleanup, label %vector.body
3607 for.cond.cleanup: ; preds = %vector.body
3611 define void @sink_splat_vp_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
3612 ; CHECK-LABEL: sink_splat_vp_fma_commute:
3613 ; CHECK: # %bb.0: # %entry
3614 ; CHECK-NEXT: li a3, 1024
3615 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3616 ; CHECK-NEXT: .LBB66_1: # %vector.body
3617 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3618 ; CHECK-NEXT: vle32.v v8, (a0)
3619 ; CHECK-NEXT: vle32.v v9, (a1)
3620 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
3621 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
3622 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3623 ; CHECK-NEXT: vse32.v v8, (a0)
3624 ; CHECK-NEXT: addi a3, a3, -4
3625 ; CHECK-NEXT: addi a1, a1, 16
3626 ; CHECK-NEXT: addi a0, a0, 16
3627 ; CHECK-NEXT: bnez a3, .LBB66_1
3628 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3631 %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3632 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3633 br label %vector.body
3635 vector.body: ; preds = %vector.body, %entry
3636 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3637 %0 = getelementptr inbounds float, ptr %a, i64 %index
3638 %wide.load = load <4 x float>, ptr %0, align 4
3639 %1 = getelementptr inbounds float, ptr %b, i64 %index
3640 %wide.load12 = load <4 x float>, ptr %1, align 4
3641 %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
3642 store <4 x float> %2, ptr %0, align 4
3643 %index.next = add nuw i64 %index, 4
3644 %3 = icmp eq i64 %index.next, 1024
3645 br i1 %3, label %for.cond.cleanup, label %vector.body
3647 for.cond.cleanup: ; preds = %vector.body
3652 define void @sink_splat_mul_lmul2(ptr nocapture %a, i64 signext %x) {
3653 ; CHECK-LABEL: sink_splat_mul_lmul2:
3654 ; CHECK: # %bb.0: # %entry
3655 ; CHECK-NEXT: li a2, 1024
3656 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3657 ; CHECK-NEXT: .LBB67_1: # %vector.body
3658 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3659 ; CHECK-NEXT: vle64.v v8, (a0)
3660 ; CHECK-NEXT: vmul.vx v8, v8, a1
3661 ; CHECK-NEXT: vse64.v v8, (a0)
3662 ; CHECK-NEXT: addi a2, a2, -4
3663 ; CHECK-NEXT: addi a0, a0, 32
3664 ; CHECK-NEXT: bnez a2, .LBB67_1
3665 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3668 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3669 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3670 br label %vector.body
3672 vector.body: ; preds = %vector.body, %entry
3673 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3674 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3675 %wide.load = load <4 x i64>, ptr %0, align 8
3676 %1 = mul <4 x i64> %wide.load, %broadcast.splat
3677 store <4 x i64> %1, ptr %0, align 8
3678 %index.next = add nuw i64 %index, 4
3679 %2 = icmp eq i64 %index.next, 1024
3680 br i1 %2, label %for.cond.cleanup, label %vector.body
3682 for.cond.cleanup: ; preds = %vector.body
3686 define void @sink_splat_add_lmul2(ptr nocapture %a, i64 signext %x) {
3687 ; CHECK-LABEL: sink_splat_add_lmul2:
3688 ; CHECK: # %bb.0: # %entry
3689 ; CHECK-NEXT: li a2, 1024
3690 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3691 ; CHECK-NEXT: .LBB68_1: # %vector.body
3692 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3693 ; CHECK-NEXT: vle64.v v8, (a0)
3694 ; CHECK-NEXT: vadd.vx v8, v8, a1
3695 ; CHECK-NEXT: vse64.v v8, (a0)
3696 ; CHECK-NEXT: addi a2, a2, -4
3697 ; CHECK-NEXT: addi a0, a0, 32
3698 ; CHECK-NEXT: bnez a2, .LBB68_1
3699 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3702 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3703 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3704 br label %vector.body
3706 vector.body: ; preds = %vector.body, %entry
3707 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3708 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3709 %wide.load = load <4 x i64>, ptr %0, align 8
3710 %1 = add <4 x i64> %wide.load, %broadcast.splat
3711 store <4 x i64> %1, ptr %0, align 8
3712 %index.next = add nuw i64 %index, 4
3713 %2 = icmp eq i64 %index.next, 1024
3714 br i1 %2, label %for.cond.cleanup, label %vector.body
3716 for.cond.cleanup: ; preds = %vector.body
3720 define void @sink_splat_sub_lmul2(ptr nocapture %a, i64 signext %x) {
3721 ; CHECK-LABEL: sink_splat_sub_lmul2:
3722 ; CHECK: # %bb.0: # %entry
3723 ; CHECK-NEXT: li a2, 1024
3724 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3725 ; CHECK-NEXT: .LBB69_1: # %vector.body
3726 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3727 ; CHECK-NEXT: vle64.v v8, (a0)
3728 ; CHECK-NEXT: vsub.vx v8, v8, a1
3729 ; CHECK-NEXT: vse64.v v8, (a0)
3730 ; CHECK-NEXT: addi a2, a2, -4
3731 ; CHECK-NEXT: addi a0, a0, 32
3732 ; CHECK-NEXT: bnez a2, .LBB69_1
3733 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3736 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3737 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3738 br label %vector.body
3740 vector.body: ; preds = %vector.body, %entry
3741 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3742 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3743 %wide.load = load <4 x i64>, ptr %0, align 8
3744 %1 = sub <4 x i64> %wide.load, %broadcast.splat
3745 store <4 x i64> %1, ptr %0, align 8
3746 %index.next = add nuw i64 %index, 4
3747 %2 = icmp eq i64 %index.next, 1024
3748 br i1 %2, label %for.cond.cleanup, label %vector.body
3750 for.cond.cleanup: ; preds = %vector.body
3754 define void @sink_splat_rsub_lmul2(ptr nocapture %a, i64 signext %x) {
3755 ; CHECK-LABEL: sink_splat_rsub_lmul2:
3756 ; CHECK: # %bb.0: # %entry
3757 ; CHECK-NEXT: li a2, 1024
3758 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3759 ; CHECK-NEXT: .LBB70_1: # %vector.body
3760 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3761 ; CHECK-NEXT: vle64.v v8, (a0)
3762 ; CHECK-NEXT: vrsub.vx v8, v8, a1
3763 ; CHECK-NEXT: vse64.v v8, (a0)
3764 ; CHECK-NEXT: addi a2, a2, -4
3765 ; CHECK-NEXT: addi a0, a0, 32
3766 ; CHECK-NEXT: bnez a2, .LBB70_1
3767 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3770 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3771 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3772 br label %vector.body
3774 vector.body: ; preds = %vector.body, %entry
3775 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3776 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3777 %wide.load = load <4 x i64>, ptr %0, align 8
3778 %1 = sub <4 x i64> %broadcast.splat, %wide.load
3779 store <4 x i64> %1, ptr %0, align 8
3780 %index.next = add nuw i64 %index, 4
3781 %2 = icmp eq i64 %index.next, 1024
3782 br i1 %2, label %for.cond.cleanup, label %vector.body
3784 for.cond.cleanup: ; preds = %vector.body
3788 define void @sink_splat_and_lmul2(ptr nocapture %a, i64 signext %x) {
3789 ; CHECK-LABEL: sink_splat_and_lmul2:
3790 ; CHECK: # %bb.0: # %entry
3791 ; CHECK-NEXT: li a2, 1024
3792 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3793 ; CHECK-NEXT: .LBB71_1: # %vector.body
3794 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3795 ; CHECK-NEXT: vle64.v v8, (a0)
3796 ; CHECK-NEXT: vand.vx v8, v8, a1
3797 ; CHECK-NEXT: vse64.v v8, (a0)
3798 ; CHECK-NEXT: addi a2, a2, -4
3799 ; CHECK-NEXT: addi a0, a0, 32
3800 ; CHECK-NEXT: bnez a2, .LBB71_1
3801 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3804 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3805 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3806 br label %vector.body
3808 vector.body: ; preds = %vector.body, %entry
3809 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3810 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3811 %wide.load = load <4 x i64>, ptr %0, align 8
3812 %1 = and <4 x i64> %wide.load, %broadcast.splat
3813 store <4 x i64> %1, ptr %0, align 8
3814 %index.next = add nuw i64 %index, 4
3815 %2 = icmp eq i64 %index.next, 1024
3816 br i1 %2, label %for.cond.cleanup, label %vector.body
3818 for.cond.cleanup: ; preds = %vector.body
3822 define void @sink_splat_or_lmul2(ptr nocapture %a, i64 signext %x) {
3823 ; CHECK-LABEL: sink_splat_or_lmul2:
3824 ; CHECK: # %bb.0: # %entry
3825 ; CHECK-NEXT: li a2, 1024
3826 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3827 ; CHECK-NEXT: .LBB72_1: # %vector.body
3828 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3829 ; CHECK-NEXT: vle64.v v8, (a0)
3830 ; CHECK-NEXT: vor.vx v8, v8, a1
3831 ; CHECK-NEXT: vse64.v v8, (a0)
3832 ; CHECK-NEXT: addi a2, a2, -4
3833 ; CHECK-NEXT: addi a0, a0, 32
3834 ; CHECK-NEXT: bnez a2, .LBB72_1
3835 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3838 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3839 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3840 br label %vector.body
3842 vector.body: ; preds = %vector.body, %entry
3843 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3844 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3845 %wide.load = load <4 x i64>, ptr %0, align 8
3846 %1 = or <4 x i64> %wide.load, %broadcast.splat
3847 store <4 x i64> %1, ptr %0, align 8
3848 %index.next = add nuw i64 %index, 4
3849 %2 = icmp eq i64 %index.next, 1024
3850 br i1 %2, label %for.cond.cleanup, label %vector.body
3852 for.cond.cleanup: ; preds = %vector.body
3856 define void @sink_splat_xor_lmul2(ptr nocapture %a, i64 signext %x) {
3857 ; CHECK-LABEL: sink_splat_xor_lmul2:
3858 ; CHECK: # %bb.0: # %entry
3859 ; CHECK-NEXT: li a2, 1024
3860 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3861 ; CHECK-NEXT: .LBB73_1: # %vector.body
3862 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3863 ; CHECK-NEXT: vle64.v v8, (a0)
3864 ; CHECK-NEXT: vxor.vx v8, v8, a1
3865 ; CHECK-NEXT: vse64.v v8, (a0)
3866 ; CHECK-NEXT: addi a2, a2, -4
3867 ; CHECK-NEXT: addi a0, a0, 32
3868 ; CHECK-NEXT: bnez a2, .LBB73_1
3869 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3872 %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
3873 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
3874 br label %vector.body
3876 vector.body: ; preds = %vector.body, %entry
3877 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3878 %0 = getelementptr inbounds i64, ptr %a, i64 %index
3879 %wide.load = load <4 x i64>, ptr %0, align 8
3880 %1 = xor <4 x i64> %wide.load, %broadcast.splat
3881 store <4 x i64> %1, ptr %0, align 8
3882 %index.next = add nuw i64 %index, 4
3883 %2 = icmp eq i64 %index.next, 1024
3884 br i1 %2, label %for.cond.cleanup, label %vector.body
3886 for.cond.cleanup: ; preds = %vector.body
3890 define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) {
3891 ; CHECK-LABEL: sink_splat_mul_lmul8:
3892 ; CHECK: # %bb.0: # %entry
3893 ; CHECK-NEXT: li a2, 1024
3894 ; CHECK-NEXT: li a3, 32
3895 ; CHECK-NEXT: .LBB74_1: # %vector.body
3896 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3897 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
3898 ; CHECK-NEXT: vle32.v v8, (a0)
3899 ; CHECK-NEXT: vmul.vx v8, v8, a1
3900 ; CHECK-NEXT: vse32.v v8, (a0)
3901 ; CHECK-NEXT: addi a2, a2, -4
3902 ; CHECK-NEXT: addi a0, a0, 16
3903 ; CHECK-NEXT: bnez a2, .LBB74_1
3904 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3907 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
3908 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
3909 br label %vector.body
3911 vector.body: ; preds = %vector.body, %entry
3912 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3913 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3914 %wide.load = load <32 x i32>, ptr %0, align 4
3915 %1 = mul <32 x i32> %wide.load, %broadcast.splat
3916 store <32 x i32> %1, ptr %0, align 4
3917 %index.next = add nuw i64 %index, 4
3918 %2 = icmp eq i64 %index.next, 1024
3919 br i1 %2, label %for.cond.cleanup, label %vector.body
3921 for.cond.cleanup: ; preds = %vector.body
3925 define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) {
3926 ; CHECK-LABEL: sink_splat_add_lmul8:
3927 ; CHECK: # %bb.0: # %entry
3928 ; CHECK-NEXT: li a2, 1024
3929 ; CHECK-NEXT: li a3, 32
3930 ; CHECK-NEXT: .LBB75_1: # %vector.body
3931 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3932 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
3933 ; CHECK-NEXT: vle32.v v8, (a0)
3934 ; CHECK-NEXT: vadd.vx v8, v8, a1
3935 ; CHECK-NEXT: vse32.v v8, (a0)
3936 ; CHECK-NEXT: addi a2, a2, -4
3937 ; CHECK-NEXT: addi a0, a0, 16
3938 ; CHECK-NEXT: bnez a2, .LBB75_1
3939 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3942 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
3943 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
3944 br label %vector.body
3946 vector.body: ; preds = %vector.body, %entry
3947 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3948 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3949 %wide.load = load <32 x i32>, ptr %0, align 4
3950 %1 = add <32 x i32> %wide.load, %broadcast.splat
3951 store <32 x i32> %1, ptr %0, align 4
3952 %index.next = add nuw i64 %index, 4
3953 %2 = icmp eq i64 %index.next, 1024
3954 br i1 %2, label %for.cond.cleanup, label %vector.body
3956 for.cond.cleanup: ; preds = %vector.body
3960 define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) {
3961 ; CHECK-LABEL: sink_splat_sub_lmul8:
3962 ; CHECK: # %bb.0: # %entry
3963 ; CHECK-NEXT: li a2, 1024
3964 ; CHECK-NEXT: li a3, 32
3965 ; CHECK-NEXT: .LBB76_1: # %vector.body
3966 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3967 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
3968 ; CHECK-NEXT: vle32.v v8, (a0)
3969 ; CHECK-NEXT: vsub.vx v8, v8, a1
3970 ; CHECK-NEXT: vse32.v v8, (a0)
3971 ; CHECK-NEXT: addi a2, a2, -4
3972 ; CHECK-NEXT: addi a0, a0, 16
3973 ; CHECK-NEXT: bnez a2, .LBB76_1
3974 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
3977 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
3978 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
3979 br label %vector.body
3981 vector.body: ; preds = %vector.body, %entry
3982 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3983 %0 = getelementptr inbounds i32, ptr %a, i64 %index
3984 %wide.load = load <32 x i32>, ptr %0, align 4
3985 %1 = sub <32 x i32> %wide.load, %broadcast.splat
3986 store <32 x i32> %1, ptr %0, align 4
3987 %index.next = add nuw i64 %index, 4
3988 %2 = icmp eq i64 %index.next, 1024
3989 br i1 %2, label %for.cond.cleanup, label %vector.body
3991 for.cond.cleanup: ; preds = %vector.body
3995 define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) {
3996 ; CHECK-LABEL: sink_splat_rsub_lmul8:
3997 ; CHECK: # %bb.0: # %entry
3998 ; CHECK-NEXT: li a2, 1024
3999 ; CHECK-NEXT: li a3, 32
4000 ; CHECK-NEXT: .LBB77_1: # %vector.body
4001 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4002 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4003 ; CHECK-NEXT: vle32.v v8, (a0)
4004 ; CHECK-NEXT: vrsub.vx v8, v8, a1
4005 ; CHECK-NEXT: vse32.v v8, (a0)
4006 ; CHECK-NEXT: addi a2, a2, -4
4007 ; CHECK-NEXT: addi a0, a0, 16
4008 ; CHECK-NEXT: bnez a2, .LBB77_1
4009 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4012 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4013 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4014 br label %vector.body
4016 vector.body: ; preds = %vector.body, %entry
4017 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4018 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4019 %wide.load = load <32 x i32>, ptr %0, align 4
4020 %1 = sub <32 x i32> %broadcast.splat, %wide.load
4021 store <32 x i32> %1, ptr %0, align 4
4022 %index.next = add nuw i64 %index, 4
4023 %2 = icmp eq i64 %index.next, 1024
4024 br i1 %2, label %for.cond.cleanup, label %vector.body
4026 for.cond.cleanup: ; preds = %vector.body
4030 define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) {
4031 ; CHECK-LABEL: sink_splat_and_lmul8:
4032 ; CHECK: # %bb.0: # %entry
4033 ; CHECK-NEXT: li a2, 1024
4034 ; CHECK-NEXT: li a3, 32
4035 ; CHECK-NEXT: .LBB78_1: # %vector.body
4036 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4037 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4038 ; CHECK-NEXT: vle32.v v8, (a0)
4039 ; CHECK-NEXT: vand.vx v8, v8, a1
4040 ; CHECK-NEXT: vse32.v v8, (a0)
4041 ; CHECK-NEXT: addi a2, a2, -4
4042 ; CHECK-NEXT: addi a0, a0, 16
4043 ; CHECK-NEXT: bnez a2, .LBB78_1
4044 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4047 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4048 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4049 br label %vector.body
4051 vector.body: ; preds = %vector.body, %entry
4052 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4053 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4054 %wide.load = load <32 x i32>, ptr %0, align 4
4055 %1 = and <32 x i32> %wide.load, %broadcast.splat
4056 store <32 x i32> %1, ptr %0, align 4
4057 %index.next = add nuw i64 %index, 4
4058 %2 = icmp eq i64 %index.next, 1024
4059 br i1 %2, label %for.cond.cleanup, label %vector.body
4061 for.cond.cleanup: ; preds = %vector.body
4065 define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) {
4066 ; CHECK-LABEL: sink_splat_or_lmul8:
4067 ; CHECK: # %bb.0: # %entry
4068 ; CHECK-NEXT: li a2, 1024
4069 ; CHECK-NEXT: li a3, 32
4070 ; CHECK-NEXT: .LBB79_1: # %vector.body
4071 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4072 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4073 ; CHECK-NEXT: vle32.v v8, (a0)
4074 ; CHECK-NEXT: vor.vx v8, v8, a1
4075 ; CHECK-NEXT: vse32.v v8, (a0)
4076 ; CHECK-NEXT: addi a2, a2, -4
4077 ; CHECK-NEXT: addi a0, a0, 16
4078 ; CHECK-NEXT: bnez a2, .LBB79_1
4079 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4082 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4083 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4084 br label %vector.body
4086 vector.body: ; preds = %vector.body, %entry
4087 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4088 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4089 %wide.load = load <32 x i32>, ptr %0, align 4
4090 %1 = or <32 x i32> %wide.load, %broadcast.splat
4091 store <32 x i32> %1, ptr %0, align 4
4092 %index.next = add nuw i64 %index, 4
4093 %2 = icmp eq i64 %index.next, 1024
4094 br i1 %2, label %for.cond.cleanup, label %vector.body
4096 for.cond.cleanup: ; preds = %vector.body
4100 define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) {
4101 ; CHECK-LABEL: sink_splat_xor_lmul8:
4102 ; CHECK: # %bb.0: # %entry
4103 ; CHECK-NEXT: li a2, 1024
4104 ; CHECK-NEXT: li a3, 32
4105 ; CHECK-NEXT: .LBB80_1: # %vector.body
4106 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4107 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
4108 ; CHECK-NEXT: vle32.v v8, (a0)
4109 ; CHECK-NEXT: vxor.vx v8, v8, a1
4110 ; CHECK-NEXT: vse32.v v8, (a0)
4111 ; CHECK-NEXT: addi a2, a2, -4
4112 ; CHECK-NEXT: addi a0, a0, 16
4113 ; CHECK-NEXT: bnez a2, .LBB80_1
4114 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4117 %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4118 %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4119 br label %vector.body
4121 vector.body: ; preds = %vector.body, %entry
4122 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4123 %0 = getelementptr inbounds i32, ptr %a, i64 %index
4124 %wide.load = load <32 x i32>, ptr %0, align 4
4125 %1 = xor <32 x i32> %wide.load, %broadcast.splat
4126 store <32 x i32> %1, ptr %0, align 4
4127 %index.next = add nuw i64 %index, 4
4128 %2 = icmp eq i64 %index.next, 1024
4129 br i1 %2, label %for.cond.cleanup, label %vector.body
4131 for.cond.cleanup: ; preds = %vector.body
4135 define void @sink_splat_mul_lmulmf2(ptr nocapture %a, i32 signext %x) {
4136 ; CHECK-LABEL: sink_splat_mul_lmulmf2:
4137 ; CHECK: # %bb.0: # %entry
4138 ; CHECK-NEXT: li a2, 1024
4139 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4140 ; CHECK-NEXT: .LBB81_1: # %vector.body
4141 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4142 ; CHECK-NEXT: vle32.v v8, (a0)
4143 ; CHECK-NEXT: vmul.vx v8, v8, a1
4144 ; CHECK-NEXT: vse32.v v8, (a0)
4145 ; CHECK-NEXT: addi a2, a2, -4
4146 ; CHECK-NEXT: addi a0, a0, 32
4147 ; CHECK-NEXT: bnez a2, .LBB81_1
4148 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4151 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4152 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4153 br label %vector.body
4155 vector.body: ; preds = %vector.body, %entry
4156 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4157 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4158 %wide.load = load <2 x i32>, ptr %0, align 8
4159 %1 = mul <2 x i32> %wide.load, %broadcast.splat
4160 store <2 x i32> %1, ptr %0, align 8
4161 %index.next = add nuw i64 %index, 4
4162 %2 = icmp eq i64 %index.next, 1024
4163 br i1 %2, label %for.cond.cleanup, label %vector.body
4165 for.cond.cleanup: ; preds = %vector.body
4169 define void @sink_splat_add_lmulmf2(ptr nocapture %a, i32 signext %x) {
4170 ; CHECK-LABEL: sink_splat_add_lmulmf2:
4171 ; CHECK: # %bb.0: # %entry
4172 ; CHECK-NEXT: li a2, 1024
4173 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4174 ; CHECK-NEXT: .LBB82_1: # %vector.body
4175 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4176 ; CHECK-NEXT: vle32.v v8, (a0)
4177 ; CHECK-NEXT: vadd.vx v8, v8, a1
4178 ; CHECK-NEXT: vse32.v v8, (a0)
4179 ; CHECK-NEXT: addi a2, a2, -4
4180 ; CHECK-NEXT: addi a0, a0, 32
4181 ; CHECK-NEXT: bnez a2, .LBB82_1
4182 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4185 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4186 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4187 br label %vector.body
4189 vector.body: ; preds = %vector.body, %entry
4190 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4191 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4192 %wide.load = load <2 x i32>, ptr %0, align 8
4193 %1 = add <2 x i32> %wide.load, %broadcast.splat
4194 store <2 x i32> %1, ptr %0, align 8
4195 %index.next = add nuw i64 %index, 4
4196 %2 = icmp eq i64 %index.next, 1024
4197 br i1 %2, label %for.cond.cleanup, label %vector.body
4199 for.cond.cleanup: ; preds = %vector.body
4203 define void @sink_splat_sub_lmulmf2(ptr nocapture %a, i32 signext %x) {
4204 ; CHECK-LABEL: sink_splat_sub_lmulmf2:
4205 ; CHECK: # %bb.0: # %entry
4206 ; CHECK-NEXT: li a2, 1024
4207 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4208 ; CHECK-NEXT: .LBB83_1: # %vector.body
4209 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4210 ; CHECK-NEXT: vle32.v v8, (a0)
4211 ; CHECK-NEXT: vsub.vx v8, v8, a1
4212 ; CHECK-NEXT: vse32.v v8, (a0)
4213 ; CHECK-NEXT: addi a2, a2, -4
4214 ; CHECK-NEXT: addi a0, a0, 32
4215 ; CHECK-NEXT: bnez a2, .LBB83_1
4216 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4219 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4220 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4221 br label %vector.body
4223 vector.body: ; preds = %vector.body, %entry
4224 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4225 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4226 %wide.load = load <2 x i32>, ptr %0, align 8
4227 %1 = sub <2 x i32> %wide.load, %broadcast.splat
4228 store <2 x i32> %1, ptr %0, align 8
4229 %index.next = add nuw i64 %index, 4
4230 %2 = icmp eq i64 %index.next, 1024
4231 br i1 %2, label %for.cond.cleanup, label %vector.body
4233 for.cond.cleanup: ; preds = %vector.body
4237 define void @sink_splat_rsub_lmulmf2(ptr nocapture %a, i32 signext %x) {
4238 ; CHECK-LABEL: sink_splat_rsub_lmulmf2:
4239 ; CHECK: # %bb.0: # %entry
4240 ; CHECK-NEXT: li a2, 1024
4241 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4242 ; CHECK-NEXT: .LBB84_1: # %vector.body
4243 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4244 ; CHECK-NEXT: vle32.v v8, (a0)
4245 ; CHECK-NEXT: vrsub.vx v8, v8, a1
4246 ; CHECK-NEXT: vse32.v v8, (a0)
4247 ; CHECK-NEXT: addi a2, a2, -4
4248 ; CHECK-NEXT: addi a0, a0, 32
4249 ; CHECK-NEXT: bnez a2, .LBB84_1
4250 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4253 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4254 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4255 br label %vector.body
4257 vector.body: ; preds = %vector.body, %entry
4258 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4259 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4260 %wide.load = load <2 x i32>, ptr %0, align 8
4261 %1 = sub <2 x i32> %broadcast.splat, %wide.load
4262 store <2 x i32> %1, ptr %0, align 8
4263 %index.next = add nuw i64 %index, 4
4264 %2 = icmp eq i64 %index.next, 1024
4265 br i1 %2, label %for.cond.cleanup, label %vector.body
4267 for.cond.cleanup: ; preds = %vector.body
4271 define void @sink_splat_and_lmulmf2(ptr nocapture %a, i32 signext %x) {
4272 ; CHECK-LABEL: sink_splat_and_lmulmf2:
4273 ; CHECK: # %bb.0: # %entry
4274 ; CHECK-NEXT: li a2, 1024
4275 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4276 ; CHECK-NEXT: .LBB85_1: # %vector.body
4277 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4278 ; CHECK-NEXT: vle32.v v8, (a0)
4279 ; CHECK-NEXT: vand.vx v8, v8, a1
4280 ; CHECK-NEXT: vse32.v v8, (a0)
4281 ; CHECK-NEXT: addi a2, a2, -4
4282 ; CHECK-NEXT: addi a0, a0, 32
4283 ; CHECK-NEXT: bnez a2, .LBB85_1
4284 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4287 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4288 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4289 br label %vector.body
4291 vector.body: ; preds = %vector.body, %entry
4292 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4293 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4294 %wide.load = load <2 x i32>, ptr %0, align 8
4295 %1 = and <2 x i32> %wide.load, %broadcast.splat
4296 store <2 x i32> %1, ptr %0, align 8
4297 %index.next = add nuw i64 %index, 4
4298 %2 = icmp eq i64 %index.next, 1024
4299 br i1 %2, label %for.cond.cleanup, label %vector.body
4301 for.cond.cleanup: ; preds = %vector.body
4305 define void @sink_splat_or_lmulmf2(ptr nocapture %a, i32 signext %x) {
4306 ; CHECK-LABEL: sink_splat_or_lmulmf2:
4307 ; CHECK: # %bb.0: # %entry
4308 ; CHECK-NEXT: li a2, 1024
4309 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4310 ; CHECK-NEXT: .LBB86_1: # %vector.body
4311 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4312 ; CHECK-NEXT: vle32.v v8, (a0)
4313 ; CHECK-NEXT: vor.vx v8, v8, a1
4314 ; CHECK-NEXT: vse32.v v8, (a0)
4315 ; CHECK-NEXT: addi a2, a2, -4
4316 ; CHECK-NEXT: addi a0, a0, 32
4317 ; CHECK-NEXT: bnez a2, .LBB86_1
4318 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4321 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4322 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4323 br label %vector.body
4325 vector.body: ; preds = %vector.body, %entry
4326 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4327 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4328 %wide.load = load <2 x i32>, ptr %0, align 8
4329 %1 = or <2 x i32> %wide.load, %broadcast.splat
4330 store <2 x i32> %1, ptr %0, align 8
4331 %index.next = add nuw i64 %index, 4
4332 %2 = icmp eq i64 %index.next, 1024
4333 br i1 %2, label %for.cond.cleanup, label %vector.body
4335 for.cond.cleanup: ; preds = %vector.body
4339 define void @sink_splat_xor_lmulmf2(ptr nocapture %a, i32 signext %x) {
4340 ; CHECK-LABEL: sink_splat_xor_lmulmf2:
4341 ; CHECK: # %bb.0: # %entry
4342 ; CHECK-NEXT: li a2, 1024
4343 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4344 ; CHECK-NEXT: .LBB87_1: # %vector.body
4345 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4346 ; CHECK-NEXT: vle32.v v8, (a0)
4347 ; CHECK-NEXT: vxor.vx v8, v8, a1
4348 ; CHECK-NEXT: vse32.v v8, (a0)
4349 ; CHECK-NEXT: addi a2, a2, -4
4350 ; CHECK-NEXT: addi a0, a0, 32
4351 ; CHECK-NEXT: bnez a2, .LBB87_1
4352 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4355 %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4356 %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4357 br label %vector.body
4359 vector.body: ; preds = %vector.body, %entry
4360 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4361 %0 = getelementptr inbounds i64, ptr %a, i64 %index
4362 %wide.load = load <2 x i32>, ptr %0, align 8
4363 %1 = xor <2 x i32> %wide.load, %broadcast.splat
4364 store <2 x i32> %1, ptr %0, align 8
4365 %index.next = add nuw i64 %index, 4
4366 %2 = icmp eq i64 %index.next, 1024
4367 br i1 %2, label %for.cond.cleanup, label %vector.body
4369 for.cond.cleanup: ; preds = %vector.body
4373 declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i32)
4375 define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i32 zeroext %vl) {
4376 ; CHECK-LABEL: sink_splat_vp_icmp:
4377 ; CHECK: # %bb.0: # %entry
4378 ; CHECK-NEXT: vmv1r.v v8, v0
4379 ; CHECK-NEXT: li a3, 1024
4380 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4381 ; CHECK-NEXT: vmv.v.i v9, 0
4382 ; CHECK-NEXT: .LBB88_1: # %vector.body
4383 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4384 ; CHECK-NEXT: vle32.v v10, (a0)
4385 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
4386 ; CHECK-NEXT: vmv1r.v v0, v8
4387 ; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t
4388 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4389 ; CHECK-NEXT: vse32.v v9, (a0), v0.t
4390 ; CHECK-NEXT: addi a3, a3, -4
4391 ; CHECK-NEXT: addi a0, a0, 16
4392 ; CHECK-NEXT: bnez a3, .LBB88_1
4393 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4396 %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0
4397 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4398 br label %vector.body
4400 vector.body: ; preds = %vector.body, %entry
4401 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4402 %0 = getelementptr inbounds i32, ptr %x, i64 %index
4403 %wide.load = load <4 x i32>, ptr %0, align 4
4404 %1 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, metadata !"eq", <4 x i1> %m, i32 %vl)
4405 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
4406 %index.next = add nuw i64 %index, 4
4407 %2 = icmp eq i64 %index.next, 1024
4408 br i1 %2, label %for.cond.cleanup, label %vector.body
4410 for.cond.cleanup: ; preds = %vector.body
4414 declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32)
4416 define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zeroext %vl) {
4417 ; CHECK-LABEL: sink_splat_vp_fcmp:
4418 ; CHECK: # %bb.0: # %entry
4419 ; CHECK-NEXT: vmv1r.v v8, v0
4420 ; CHECK-NEXT: li a2, 1024
4421 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4422 ; CHECK-NEXT: vmv.v.i v9, 0
4423 ; CHECK-NEXT: .LBB89_1: # %vector.body
4424 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4425 ; CHECK-NEXT: vle32.v v10, (a0)
4426 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
4427 ; CHECK-NEXT: vmv1r.v v0, v8
4428 ; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t
4429 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4430 ; CHECK-NEXT: vse32.v v9, (a0), v0.t
4431 ; CHECK-NEXT: addi a2, a2, -4
4432 ; CHECK-NEXT: addi a0, a0, 16
4433 ; CHECK-NEXT: bnez a2, .LBB89_1
4434 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
4437 %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0
4438 %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4439 br label %vector.body
4441 vector.body: ; preds = %vector.body, %entry
4442 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4443 %0 = getelementptr inbounds float, ptr %x, i64 %index
4444 %wide.load = load <4 x float>, ptr %0, align 4
4445 %1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, metadata !"oeq", <4 x i1> %m, i32 %vl)
4446 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
4447 %index.next = add nuw i64 %index, 4
4448 %2 = icmp eq i64 %index.next, 1024
4449 br i1 %2, label %for.cond.cleanup, label %vector.body
4451 for.cond.cleanup: ; preds = %vector.body