1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
6 target triple = "aarch64-unknown-linux-gnu"
12 define i8 @andv_v4i8(<4 x i8> %a) {
13 ; CHECK-LABEL: andv_v4i8:
15 ; CHECK-NEXT: ptrue p0.h, vl4
16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
17 ; CHECK-NEXT: andv h0, p0, z0.h
18 ; CHECK-NEXT: fmov w0, s0
20 %res = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a)
24 define i8 @andv_v8i8(<8 x i8> %a) {
25 ; CHECK-LABEL: andv_v8i8:
27 ; CHECK-NEXT: ptrue p0.b, vl8
28 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
29 ; CHECK-NEXT: andv b0, p0, z0.b
30 ; CHECK-NEXT: fmov w0, s0
32 %res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
36 define i8 @andv_v16i8(<16 x i8> %a) {
37 ; CHECK-LABEL: andv_v16i8:
39 ; CHECK-NEXT: ptrue p0.b, vl16
40 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
41 ; CHECK-NEXT: andv b0, p0, z0.b
42 ; CHECK-NEXT: fmov w0, s0
44 %res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
48 define i8 @andv_v32i8(ptr %a) {
49 ; CHECK-LABEL: andv_v32i8:
51 ; CHECK-NEXT: ptrue p0.b, vl16
52 ; CHECK-NEXT: ldp q1, q0, [x0]
53 ; CHECK-NEXT: and z0.d, z1.d, z0.d
54 ; CHECK-NEXT: andv b0, p0, z0.b
55 ; CHECK-NEXT: fmov w0, s0
57 %op = load <32 x i8>, ptr %a
58 %res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
62 define i16 @andv_v2i16(<2 x i16> %a) {
63 ; CHECK-LABEL: andv_v2i16:
65 ; CHECK-NEXT: ptrue p0.s, vl2
66 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
67 ; CHECK-NEXT: andv s0, p0, z0.s
68 ; CHECK-NEXT: fmov w0, s0
70 %res = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a)
74 define i16 @andv_v4i16(<4 x i16> %a) {
75 ; CHECK-LABEL: andv_v4i16:
77 ; CHECK-NEXT: ptrue p0.h, vl4
78 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
79 ; CHECK-NEXT: andv h0, p0, z0.h
80 ; CHECK-NEXT: fmov w0, s0
82 %res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)
86 define i16 @andv_v8i16(<8 x i16> %a) {
87 ; CHECK-LABEL: andv_v8i16:
89 ; CHECK-NEXT: ptrue p0.h, vl8
90 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
91 ; CHECK-NEXT: andv h0, p0, z0.h
92 ; CHECK-NEXT: fmov w0, s0
94 %res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)
98 define i16 @andv_v16i16(ptr %a) {
99 ; CHECK-LABEL: andv_v16i16:
101 ; CHECK-NEXT: ptrue p0.h, vl8
102 ; CHECK-NEXT: ldp q1, q0, [x0]
103 ; CHECK-NEXT: and z0.d, z1.d, z0.d
104 ; CHECK-NEXT: andv h0, p0, z0.h
105 ; CHECK-NEXT: fmov w0, s0
107 %op = load <16 x i16>, ptr %a
108 %res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
112 define i32 @andv_v2i32(<2 x i32> %a) {
113 ; CHECK-LABEL: andv_v2i32:
115 ; CHECK-NEXT: ptrue p0.s, vl2
116 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
117 ; CHECK-NEXT: andv s0, p0, z0.s
118 ; CHECK-NEXT: fmov w0, s0
120 %res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
124 define i32 @andv_v4i32(<4 x i32> %a) {
125 ; CHECK-LABEL: andv_v4i32:
127 ; CHECK-NEXT: ptrue p0.s, vl4
128 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
129 ; CHECK-NEXT: andv s0, p0, z0.s
130 ; CHECK-NEXT: fmov w0, s0
132 %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
136 define i32 @andv_v8i32(ptr %a) {
137 ; CHECK-LABEL: andv_v8i32:
139 ; CHECK-NEXT: ptrue p0.s, vl4
140 ; CHECK-NEXT: ldp q1, q0, [x0]
141 ; CHECK-NEXT: and z0.d, z1.d, z0.d
142 ; CHECK-NEXT: andv s0, p0, z0.s
143 ; CHECK-NEXT: fmov w0, s0
145 %op = load <8 x i32>, ptr %a
146 %res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
150 define i64 @andv_v2i64(<2 x i64> %a) {
151 ; CHECK-LABEL: andv_v2i64:
153 ; CHECK-NEXT: ptrue p0.d, vl2
154 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
155 ; CHECK-NEXT: andv d0, p0, z0.d
156 ; CHECK-NEXT: fmov x0, d0
158 %res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
162 define i64 @andv_v4i64(ptr %a) {
163 ; CHECK-LABEL: andv_v4i64:
165 ; CHECK-NEXT: ptrue p0.d, vl2
166 ; CHECK-NEXT: ldp q1, q0, [x0]
167 ; CHECK-NEXT: and z0.d, z1.d, z0.d
168 ; CHECK-NEXT: andv d0, p0, z0.d
169 ; CHECK-NEXT: fmov x0, d0
171 %op = load <4 x i64>, ptr %a
172 %res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
180 define i8 @eorv_v4i8(<4 x i8> %a) {
181 ; CHECK-LABEL: eorv_v4i8:
183 ; CHECK-NEXT: ptrue p0.h, vl4
184 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
185 ; CHECK-NEXT: eorv h0, p0, z0.h
186 ; CHECK-NEXT: fmov w0, s0
188 %res = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a)
192 define i8 @eorv_v8i8(<8 x i8> %a) {
193 ; CHECK-LABEL: eorv_v8i8:
195 ; CHECK-NEXT: ptrue p0.b, vl8
196 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
197 ; CHECK-NEXT: eorv b0, p0, z0.b
198 ; CHECK-NEXT: fmov w0, s0
200 %res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
204 define i8 @eorv_v16i8(<16 x i8> %a) {
205 ; CHECK-LABEL: eorv_v16i8:
207 ; CHECK-NEXT: ptrue p0.b, vl16
208 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
209 ; CHECK-NEXT: eorv b0, p0, z0.b
210 ; CHECK-NEXT: fmov w0, s0
212 %res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
216 define i8 @eorv_v32i8(ptr %a) {
217 ; CHECK-LABEL: eorv_v32i8:
219 ; CHECK-NEXT: ptrue p0.b, vl16
220 ; CHECK-NEXT: ldp q1, q0, [x0]
221 ; CHECK-NEXT: eor z0.d, z1.d, z0.d
222 ; CHECK-NEXT: eorv b0, p0, z0.b
223 ; CHECK-NEXT: fmov w0, s0
225 %op = load <32 x i8>, ptr %a
226 %res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
230 define i16 @eorv_v2i16(<2 x i16> %a) {
231 ; CHECK-LABEL: eorv_v2i16:
233 ; CHECK-NEXT: ptrue p0.s, vl2
234 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
235 ; CHECK-NEXT: eorv s0, p0, z0.s
236 ; CHECK-NEXT: fmov w0, s0
238 %res = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a)
242 define i16 @eorv_v4i16(<4 x i16> %a) {
243 ; CHECK-LABEL: eorv_v4i16:
245 ; CHECK-NEXT: ptrue p0.h, vl4
246 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
247 ; CHECK-NEXT: eorv h0, p0, z0.h
248 ; CHECK-NEXT: fmov w0, s0
250 %res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)
254 define i16 @eorv_v8i16(<8 x i16> %a) {
255 ; CHECK-LABEL: eorv_v8i16:
257 ; CHECK-NEXT: ptrue p0.h, vl8
258 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
259 ; CHECK-NEXT: eorv h0, p0, z0.h
260 ; CHECK-NEXT: fmov w0, s0
262 %res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)
266 define i16 @eorv_v16i16(ptr %a) {
267 ; CHECK-LABEL: eorv_v16i16:
269 ; CHECK-NEXT: ptrue p0.h, vl8
270 ; CHECK-NEXT: ldp q1, q0, [x0]
271 ; CHECK-NEXT: eor z0.d, z1.d, z0.d
272 ; CHECK-NEXT: eorv h0, p0, z0.h
273 ; CHECK-NEXT: fmov w0, s0
275 %op = load <16 x i16>, ptr %a
276 %res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
280 define i32 @eorv_v2i32(<2 x i32> %a) {
281 ; CHECK-LABEL: eorv_v2i32:
283 ; CHECK-NEXT: ptrue p0.s, vl2
284 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
285 ; CHECK-NEXT: eorv s0, p0, z0.s
286 ; CHECK-NEXT: fmov w0, s0
288 %res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
292 define i32 @eorv_v4i32(<4 x i32> %a) {
293 ; CHECK-LABEL: eorv_v4i32:
295 ; CHECK-NEXT: ptrue p0.s, vl4
296 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
297 ; CHECK-NEXT: eorv s0, p0, z0.s
298 ; CHECK-NEXT: fmov w0, s0
300 %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
304 define i32 @eorv_v8i32(ptr %a) {
305 ; CHECK-LABEL: eorv_v8i32:
307 ; CHECK-NEXT: ptrue p0.s, vl4
308 ; CHECK-NEXT: ldp q1, q0, [x0]
309 ; CHECK-NEXT: eor z0.d, z1.d, z0.d
310 ; CHECK-NEXT: eorv s0, p0, z0.s
311 ; CHECK-NEXT: fmov w0, s0
313 %op = load <8 x i32>, ptr %a
314 %res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
318 define i64 @eorv_v2i64(<2 x i64> %a) {
319 ; CHECK-LABEL: eorv_v2i64:
321 ; CHECK-NEXT: ptrue p0.d, vl2
322 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
323 ; CHECK-NEXT: eorv d0, p0, z0.d
324 ; CHECK-NEXT: fmov x0, d0
326 %res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)
330 define i64 @eorv_v4i64(ptr %a) {
331 ; CHECK-LABEL: eorv_v4i64:
333 ; CHECK-NEXT: ptrue p0.d, vl2
334 ; CHECK-NEXT: ldp q1, q0, [x0]
335 ; CHECK-NEXT: eor z0.d, z1.d, z0.d
336 ; CHECK-NEXT: eorv d0, p0, z0.d
337 ; CHECK-NEXT: fmov x0, d0
339 %op = load <4 x i64>, ptr %a
340 %res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
348 define i8 @orv_v4i8(<4 x i8> %a) {
349 ; CHECK-LABEL: orv_v4i8:
351 ; CHECK-NEXT: ptrue p0.h, vl4
352 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
353 ; CHECK-NEXT: orv h0, p0, z0.h
354 ; CHECK-NEXT: fmov w0, s0
356 %res = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a)
360 define i8 @orv_v8i8(<8 x i8> %a) {
361 ; CHECK-LABEL: orv_v8i8:
363 ; CHECK-NEXT: ptrue p0.b, vl8
364 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
365 ; CHECK-NEXT: orv b0, p0, z0.b
366 ; CHECK-NEXT: fmov w0, s0
368 %res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
372 define i8 @orv_v16i8(<16 x i8> %a) {
373 ; CHECK-LABEL: orv_v16i8:
375 ; CHECK-NEXT: ptrue p0.b, vl16
376 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
377 ; CHECK-NEXT: orv b0, p0, z0.b
378 ; CHECK-NEXT: fmov w0, s0
380 %res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
384 define i8 @orv_v32i8(ptr %a) {
385 ; CHECK-LABEL: orv_v32i8:
387 ; CHECK-NEXT: ptrue p0.b, vl16
388 ; CHECK-NEXT: ldp q1, q0, [x0]
389 ; CHECK-NEXT: orr z0.d, z1.d, z0.d
390 ; CHECK-NEXT: orv b0, p0, z0.b
391 ; CHECK-NEXT: fmov w0, s0
393 %op = load <32 x i8>, ptr %a
394 %res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
398 define i16 @orv_v2i16(<2 x i16> %a) {
399 ; CHECK-LABEL: orv_v2i16:
401 ; CHECK-NEXT: ptrue p0.s, vl2
402 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
403 ; CHECK-NEXT: orv s0, p0, z0.s
404 ; CHECK-NEXT: fmov w0, s0
406 %res = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a)
410 define i16 @orv_v4i16(<4 x i16> %a) {
411 ; CHECK-LABEL: orv_v4i16:
413 ; CHECK-NEXT: ptrue p0.h, vl4
414 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
415 ; CHECK-NEXT: orv h0, p0, z0.h
416 ; CHECK-NEXT: fmov w0, s0
418 %res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)
422 define i16 @orv_v8i16(<8 x i16> %a) {
423 ; CHECK-LABEL: orv_v8i16:
425 ; CHECK-NEXT: ptrue p0.h, vl8
426 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
427 ; CHECK-NEXT: orv h0, p0, z0.h
428 ; CHECK-NEXT: fmov w0, s0
430 %res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)
434 define i16 @orv_v16i16(ptr %a) {
435 ; CHECK-LABEL: orv_v16i16:
437 ; CHECK-NEXT: ptrue p0.h, vl8
438 ; CHECK-NEXT: ldp q1, q0, [x0]
439 ; CHECK-NEXT: orr z0.d, z1.d, z0.d
440 ; CHECK-NEXT: orv h0, p0, z0.h
441 ; CHECK-NEXT: fmov w0, s0
443 %op = load <16 x i16>, ptr %a
444 %res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
448 define i32 @orv_v2i32(<2 x i32> %a) {
449 ; CHECK-LABEL: orv_v2i32:
451 ; CHECK-NEXT: ptrue p0.s, vl2
452 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
453 ; CHECK-NEXT: orv s0, p0, z0.s
454 ; CHECK-NEXT: fmov w0, s0
456 %res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
460 define i32 @orv_v4i32(<4 x i32> %a) {
461 ; CHECK-LABEL: orv_v4i32:
463 ; CHECK-NEXT: ptrue p0.s, vl4
464 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
465 ; CHECK-NEXT: orv s0, p0, z0.s
466 ; CHECK-NEXT: fmov w0, s0
468 %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
472 define i32 @orv_v8i32(ptr %a) {
473 ; CHECK-LABEL: orv_v8i32:
475 ; CHECK-NEXT: ptrue p0.s, vl4
476 ; CHECK-NEXT: ldp q1, q0, [x0]
477 ; CHECK-NEXT: orr z0.d, z1.d, z0.d
478 ; CHECK-NEXT: orv s0, p0, z0.s
479 ; CHECK-NEXT: fmov w0, s0
481 %op = load <8 x i32>, ptr %a
482 %res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
486 define i64 @orv_v2i64(<2 x i64> %a) {
487 ; CHECK-LABEL: orv_v2i64:
489 ; CHECK-NEXT: ptrue p0.d, vl2
490 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
491 ; CHECK-NEXT: orv d0, p0, z0.d
492 ; CHECK-NEXT: fmov x0, d0
494 %res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
498 define i64 @orv_v4i64(ptr %a) {
499 ; CHECK-LABEL: orv_v4i64:
501 ; CHECK-NEXT: ptrue p0.d, vl2
502 ; CHECK-NEXT: ldp q1, q0, [x0]
503 ; CHECK-NEXT: orr z0.d, z1.d, z0.d
504 ; CHECK-NEXT: orv d0, p0, z0.d
505 ; CHECK-NEXT: fmov x0, d0
507 %op = load <4 x i64>, ptr %a
508 %res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
512 declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
513 declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
514 declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
515 declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
517 declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
518 declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
519 declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
520 declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
522 declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
523 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
524 declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
526 declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
527 declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
529 declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
530 declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
531 declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
532 declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
534 declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
535 declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
536 declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
537 declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
539 declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
540 declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
541 declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
543 declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
544 declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
546 declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>)
547 declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
548 declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
549 declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
551 declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>)
552 declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
553 declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
554 declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
556 declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
557 declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
558 declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
560 declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
561 declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)