1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
3 ; RUN: llc -mattr=+sve2 -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE2
4 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE2
6 target triple = "aarch64-unknown-linux-gnu"
11 define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
12 ; CHECK-LABEL: add_v4i8:
14 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
15 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
16 ; CHECK-NEXT: add z0.h, z0.h, z1.h
17 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
19 %res = add <4 x i8> %op1, %op2
23 define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
24 ; CHECK-LABEL: add_v8i8:
26 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
27 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
28 ; CHECK-NEXT: add z0.b, z0.b, z1.b
29 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
31 %res = add <8 x i8> %op1, %op2
35 define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
36 ; CHECK-LABEL: add_v16i8:
38 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
39 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
40 ; CHECK-NEXT: add z0.b, z0.b, z1.b
41 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
43 %res = add <16 x i8> %op1, %op2
47 define void @add_v32i8(ptr %a, ptr %b) {
48 ; CHECK-LABEL: add_v32i8:
50 ; CHECK-NEXT: ldp q0, q3, [x1]
51 ; CHECK-NEXT: ldp q1, q2, [x0]
52 ; CHECK-NEXT: add z0.b, z1.b, z0.b
53 ; CHECK-NEXT: add z1.b, z2.b, z3.b
54 ; CHECK-NEXT: stp q0, q1, [x0]
56 %op1 = load <32 x i8>, ptr %a
57 %op2 = load <32 x i8>, ptr %b
58 %res = add <32 x i8> %op1, %op2
59 store <32 x i8> %res, ptr %a
63 define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
64 ; CHECK-LABEL: add_v2i16:
66 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
67 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
68 ; CHECK-NEXT: add z0.s, z0.s, z1.s
69 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
71 %res = add <2 x i16> %op1, %op2
75 define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
76 ; CHECK-LABEL: add_v4i16:
78 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
79 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
80 ; CHECK-NEXT: add z0.h, z0.h, z1.h
81 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
83 %res = add <4 x i16> %op1, %op2
87 define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
88 ; CHECK-LABEL: add_v8i16:
90 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
91 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
92 ; CHECK-NEXT: add z0.h, z0.h, z1.h
93 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
95 %res = add <8 x i16> %op1, %op2
99 define void @add_v16i16(ptr %a, ptr %b) {
100 ; CHECK-LABEL: add_v16i16:
102 ; CHECK-NEXT: ldp q0, q3, [x1]
103 ; CHECK-NEXT: ldp q1, q2, [x0]
104 ; CHECK-NEXT: add z0.h, z1.h, z0.h
105 ; CHECK-NEXT: add z1.h, z2.h, z3.h
106 ; CHECK-NEXT: stp q0, q1, [x0]
108 %op1 = load <16 x i16>, ptr %a
109 %op2 = load <16 x i16>, ptr %b
110 %res = add <16 x i16> %op1, %op2
111 store <16 x i16> %res, ptr %a
115 define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
116 ; CHECK-LABEL: add_v2i32:
118 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
119 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
120 ; CHECK-NEXT: add z0.s, z0.s, z1.s
121 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
123 %res = add <2 x i32> %op1, %op2
127 define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
128 ; CHECK-LABEL: add_v4i32:
130 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
131 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
132 ; CHECK-NEXT: add z0.s, z0.s, z1.s
133 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
135 %res = add <4 x i32> %op1, %op2
139 define void @add_v8i32(ptr %a, ptr %b) {
140 ; CHECK-LABEL: add_v8i32:
142 ; CHECK-NEXT: ldp q0, q3, [x1]
143 ; CHECK-NEXT: ldp q1, q2, [x0]
144 ; CHECK-NEXT: add z0.s, z1.s, z0.s
145 ; CHECK-NEXT: add z1.s, z2.s, z3.s
146 ; CHECK-NEXT: stp q0, q1, [x0]
148 %op1 = load <8 x i32>, ptr %a
149 %op2 = load <8 x i32>, ptr %b
150 %res = add <8 x i32> %op1, %op2
151 store <8 x i32> %res, ptr %a
155 define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
156 ; CHECK-LABEL: add_v1i64:
158 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
159 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
160 ; CHECK-NEXT: add z0.d, z0.d, z1.d
161 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
163 %res = add <1 x i64> %op1, %op2
167 define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
168 ; CHECK-LABEL: add_v2i64:
170 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
171 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
172 ; CHECK-NEXT: add z0.d, z0.d, z1.d
173 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
175 %res = add <2 x i64> %op1, %op2
179 define void @add_v4i64(ptr %a, ptr %b) {
180 ; CHECK-LABEL: add_v4i64:
182 ; CHECK-NEXT: ldp q0, q3, [x1]
183 ; CHECK-NEXT: ldp q1, q2, [x0]
184 ; CHECK-NEXT: add z0.d, z1.d, z0.d
185 ; CHECK-NEXT: add z1.d, z2.d, z3.d
186 ; CHECK-NEXT: stp q0, q1, [x0]
188 %op1 = load <4 x i64>, ptr %a
189 %op2 = load <4 x i64>, ptr %b
190 %res = add <4 x i64> %op1, %op2
191 store <4 x i64> %res, ptr %a
199 define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
200 ; SVE-LABEL: mul_v4i8:
202 ; SVE-NEXT: ptrue p0.h, vl4
203 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
204 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
205 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
206 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
209 ; SVE2-LABEL: mul_v4i8:
211 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
212 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
213 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
214 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
216 %res = mul <4 x i8> %op1, %op2
220 define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
221 ; SVE-LABEL: mul_v8i8:
223 ; SVE-NEXT: ptrue p0.b, vl8
224 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
225 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
226 ; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b
227 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
230 ; SVE2-LABEL: mul_v8i8:
232 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
233 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
234 ; SVE2-NEXT: mul z0.b, z0.b, z1.b
235 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
237 %res = mul <8 x i8> %op1, %op2
241 define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
242 ; SVE-LABEL: mul_v16i8:
244 ; SVE-NEXT: ptrue p0.b, vl16
245 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
246 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
247 ; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b
248 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
251 ; SVE2-LABEL: mul_v16i8:
253 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
254 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
255 ; SVE2-NEXT: mul z0.b, z0.b, z1.b
256 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
258 %res = mul <16 x i8> %op1, %op2
262 define void @mul_v32i8(ptr %a, ptr %b) {
263 ; SVE-LABEL: mul_v32i8:
265 ; SVE-NEXT: ptrue p0.b, vl16
266 ; SVE-NEXT: ldp q0, q3, [x1]
267 ; SVE-NEXT: ldp q1, q2, [x0]
268 ; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b
269 ; SVE-NEXT: movprfx z1, z2
270 ; SVE-NEXT: mul z1.b, p0/m, z1.b, z3.b
271 ; SVE-NEXT: stp q0, q1, [x0]
274 ; SVE2-LABEL: mul_v32i8:
276 ; SVE2-NEXT: ldp q0, q3, [x1]
277 ; SVE2-NEXT: ldp q1, q2, [x0]
278 ; SVE2-NEXT: mul z0.b, z1.b, z0.b
279 ; SVE2-NEXT: mul z1.b, z2.b, z3.b
280 ; SVE2-NEXT: stp q0, q1, [x0]
282 %op1 = load <32 x i8>, ptr %a
283 %op2 = load <32 x i8>, ptr %b
284 %res = mul <32 x i8> %op1, %op2
285 store <32 x i8> %res, ptr %a
289 define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
290 ; SVE-LABEL: mul_v2i16:
292 ; SVE-NEXT: ptrue p0.s, vl2
293 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
294 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
295 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
296 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
299 ; SVE2-LABEL: mul_v2i16:
301 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
302 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
303 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
304 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
306 %res = mul <2 x i16> %op1, %op2
310 define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
311 ; SVE-LABEL: mul_v4i16:
313 ; SVE-NEXT: ptrue p0.h, vl4
314 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
315 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
316 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
317 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
320 ; SVE2-LABEL: mul_v4i16:
322 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
323 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
324 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
325 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
327 %res = mul <4 x i16> %op1, %op2
331 define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
332 ; SVE-LABEL: mul_v8i16:
334 ; SVE-NEXT: ptrue p0.h, vl8
335 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
336 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
337 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
338 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
341 ; SVE2-LABEL: mul_v8i16:
343 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
344 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
345 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
346 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
348 %res = mul <8 x i16> %op1, %op2
352 define void @mul_v16i16(ptr %a, ptr %b) {
353 ; SVE-LABEL: mul_v16i16:
355 ; SVE-NEXT: ptrue p0.h, vl8
356 ; SVE-NEXT: ldp q0, q3, [x1]
357 ; SVE-NEXT: ldp q1, q2, [x0]
358 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
359 ; SVE-NEXT: movprfx z1, z2
360 ; SVE-NEXT: mul z1.h, p0/m, z1.h, z3.h
361 ; SVE-NEXT: stp q0, q1, [x0]
364 ; SVE2-LABEL: mul_v16i16:
366 ; SVE2-NEXT: ldp q0, q3, [x1]
367 ; SVE2-NEXT: ldp q1, q2, [x0]
368 ; SVE2-NEXT: mul z0.h, z1.h, z0.h
369 ; SVE2-NEXT: mul z1.h, z2.h, z3.h
370 ; SVE2-NEXT: stp q0, q1, [x0]
372 %op1 = load <16 x i16>, ptr %a
373 %op2 = load <16 x i16>, ptr %b
374 %res = mul <16 x i16> %op1, %op2
375 store <16 x i16> %res, ptr %a
379 define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
380 ; SVE-LABEL: mul_v2i32:
382 ; SVE-NEXT: ptrue p0.s, vl2
383 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
384 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
385 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
386 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
389 ; SVE2-LABEL: mul_v2i32:
391 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
392 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
393 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
394 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
396 %res = mul <2 x i32> %op1, %op2
400 define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
401 ; SVE-LABEL: mul_v4i32:
403 ; SVE-NEXT: ptrue p0.s, vl4
404 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
405 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
406 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
407 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
410 ; SVE2-LABEL: mul_v4i32:
412 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
413 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
414 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
415 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
417 %res = mul <4 x i32> %op1, %op2
421 define void @mul_v8i32(ptr %a, ptr %b) {
422 ; SVE-LABEL: mul_v8i32:
424 ; SVE-NEXT: ptrue p0.s, vl4
425 ; SVE-NEXT: ldp q0, q3, [x1]
426 ; SVE-NEXT: ldp q1, q2, [x0]
427 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
428 ; SVE-NEXT: movprfx z1, z2
429 ; SVE-NEXT: mul z1.s, p0/m, z1.s, z3.s
430 ; SVE-NEXT: stp q0, q1, [x0]
433 ; SVE2-LABEL: mul_v8i32:
435 ; SVE2-NEXT: ldp q0, q3, [x1]
436 ; SVE2-NEXT: ldp q1, q2, [x0]
437 ; SVE2-NEXT: mul z0.s, z1.s, z0.s
438 ; SVE2-NEXT: mul z1.s, z2.s, z3.s
439 ; SVE2-NEXT: stp q0, q1, [x0]
441 %op1 = load <8 x i32>, ptr %a
442 %op2 = load <8 x i32>, ptr %b
443 %res = mul <8 x i32> %op1, %op2
444 store <8 x i32> %res, ptr %a
448 define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
449 ; SVE-LABEL: mul_v1i64:
451 ; SVE-NEXT: ptrue p0.d, vl1
452 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
453 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
454 ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d
455 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
458 ; SVE2-LABEL: mul_v1i64:
460 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
461 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
462 ; SVE2-NEXT: mul z0.d, z0.d, z1.d
463 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
465 %res = mul <1 x i64> %op1, %op2
469 define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
470 ; SVE-LABEL: mul_v2i64:
472 ; SVE-NEXT: ptrue p0.d, vl2
473 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
474 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
475 ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d
476 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
479 ; SVE2-LABEL: mul_v2i64:
481 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
482 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
483 ; SVE2-NEXT: mul z0.d, z0.d, z1.d
484 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
486 %res = mul <2 x i64> %op1, %op2
490 define void @mul_v4i64(ptr %a, ptr %b) {
491 ; SVE-LABEL: mul_v4i64:
493 ; SVE-NEXT: ptrue p0.d, vl2
494 ; SVE-NEXT: ldp q0, q3, [x1]
495 ; SVE-NEXT: ldp q1, q2, [x0]
496 ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d
497 ; SVE-NEXT: movprfx z1, z2
498 ; SVE-NEXT: mul z1.d, p0/m, z1.d, z3.d
499 ; SVE-NEXT: stp q0, q1, [x0]
502 ; SVE2-LABEL: mul_v4i64:
504 ; SVE2-NEXT: ldp q0, q3, [x1]
505 ; SVE2-NEXT: ldp q1, q2, [x0]
506 ; SVE2-NEXT: mul z0.d, z1.d, z0.d
507 ; SVE2-NEXT: mul z1.d, z2.d, z3.d
508 ; SVE2-NEXT: stp q0, q1, [x0]
510 %op1 = load <4 x i64>, ptr %a
511 %op2 = load <4 x i64>, ptr %b
512 %res = mul <4 x i64> %op1, %op2
513 store <4 x i64> %res, ptr %a
521 define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
522 ; CHECK-LABEL: sub_v4i8:
524 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
525 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
526 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
527 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
529 %res = sub <4 x i8> %op1, %op2
533 define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
534 ; CHECK-LABEL: sub_v8i8:
536 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
537 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
538 ; CHECK-NEXT: sub z0.b, z0.b, z1.b
539 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
541 %res = sub <8 x i8> %op1, %op2
545 define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
546 ; CHECK-LABEL: sub_v16i8:
548 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
549 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
550 ; CHECK-NEXT: sub z0.b, z0.b, z1.b
551 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
553 %res = sub <16 x i8> %op1, %op2
557 define void @sub_v32i8(ptr %a, ptr %b) {
558 ; CHECK-LABEL: sub_v32i8:
560 ; CHECK-NEXT: ldp q0, q3, [x1]
561 ; CHECK-NEXT: ldp q1, q2, [x0]
562 ; CHECK-NEXT: sub z0.b, z1.b, z0.b
563 ; CHECK-NEXT: sub z1.b, z2.b, z3.b
564 ; CHECK-NEXT: stp q0, q1, [x0]
566 %op1 = load <32 x i8>, ptr %a
567 %op2 = load <32 x i8>, ptr %b
568 %res = sub <32 x i8> %op1, %op2
569 store <32 x i8> %res, ptr %a
573 define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
574 ; CHECK-LABEL: sub_v2i16:
576 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
577 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
578 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
579 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
581 %res = sub <2 x i16> %op1, %op2
585 define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
586 ; CHECK-LABEL: sub_v4i16:
588 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
589 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
590 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
591 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
593 %res = sub <4 x i16> %op1, %op2
597 define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
598 ; CHECK-LABEL: sub_v8i16:
600 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
601 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
602 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
603 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
605 %res = sub <8 x i16> %op1, %op2
609 define void @sub_v16i16(ptr %a, ptr %b) {
610 ; CHECK-LABEL: sub_v16i16:
612 ; CHECK-NEXT: ldp q0, q3, [x1]
613 ; CHECK-NEXT: ldp q1, q2, [x0]
614 ; CHECK-NEXT: sub z0.h, z1.h, z0.h
615 ; CHECK-NEXT: sub z1.h, z2.h, z3.h
616 ; CHECK-NEXT: stp q0, q1, [x0]
618 %op1 = load <16 x i16>, ptr %a
619 %op2 = load <16 x i16>, ptr %b
620 %res = sub <16 x i16> %op1, %op2
621 store <16 x i16> %res, ptr %a
625 define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
626 ; CHECK-LABEL: sub_v2i32:
628 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
629 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
630 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
631 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
633 %res = sub <2 x i32> %op1, %op2
637 define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
638 ; CHECK-LABEL: sub_v4i32:
640 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
641 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
642 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
643 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
645 %res = sub <4 x i32> %op1, %op2
649 define void @sub_v8i32(ptr %a, ptr %b) {
650 ; CHECK-LABEL: sub_v8i32:
652 ; CHECK-NEXT: ldp q0, q3, [x1]
653 ; CHECK-NEXT: ldp q1, q2, [x0]
654 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
655 ; CHECK-NEXT: sub z1.s, z2.s, z3.s
656 ; CHECK-NEXT: stp q0, q1, [x0]
658 %op1 = load <8 x i32>, ptr %a
659 %op2 = load <8 x i32>, ptr %b
660 %res = sub <8 x i32> %op1, %op2
661 store <8 x i32> %res, ptr %a
665 define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
666 ; CHECK-LABEL: sub_v1i64:
668 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
669 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
670 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
671 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
673 %res = sub <1 x i64> %op1, %op2
677 define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
678 ; CHECK-LABEL: sub_v2i64:
680 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
681 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
682 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
683 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
685 %res = sub <2 x i64> %op1, %op2
689 define void @sub_v4i64(ptr %a, ptr %b) {
690 ; CHECK-LABEL: sub_v4i64:
692 ; CHECK-NEXT: ldp q0, q3, [x1]
693 ; CHECK-NEXT: ldp q1, q2, [x0]
694 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
695 ; CHECK-NEXT: sub z1.d, z2.d, z3.d
696 ; CHECK-NEXT: stp q0, q1, [x0]
698 %op1 = load <4 x i64>, ptr %a
699 %op2 = load <4 x i64>, ptr %b
700 %res = sub <4 x i64> %op1, %op2
701 store <4 x i64> %res, ptr %a
709 define <4 x i8> @abs_v4i8(<4 x i8> %op1) {
710 ; CHECK-LABEL: abs_v4i8:
712 ; CHECK-NEXT: ptrue p0.h, vl4
713 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
714 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
715 ; CHECK-NEXT: abs z0.h, p0/m, z0.h
716 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
718 %res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %op1, i1 false)
722 define <8 x i8> @abs_v8i8(<8 x i8> %op1) {
723 ; CHECK-LABEL: abs_v8i8:
725 ; CHECK-NEXT: ptrue p0.b, vl8
726 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
727 ; CHECK-NEXT: abs z0.b, p0/m, z0.b
728 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
730 %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %op1, i1 false)
734 define <16 x i8> @abs_v16i8(<16 x i8> %op1) {
735 ; CHECK-LABEL: abs_v16i8:
737 ; CHECK-NEXT: ptrue p0.b, vl16
738 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
739 ; CHECK-NEXT: abs z0.b, p0/m, z0.b
740 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
742 %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %op1, i1 false)
746 define void @abs_v32i8(ptr %a) {
747 ; CHECK-LABEL: abs_v32i8:
749 ; CHECK-NEXT: ptrue p0.b, vl16
750 ; CHECK-NEXT: ldp q0, q1, [x0]
751 ; CHECK-NEXT: abs z0.b, p0/m, z0.b
752 ; CHECK-NEXT: abs z1.b, p0/m, z1.b
753 ; CHECK-NEXT: stp q0, q1, [x0]
755 %op1 = load <32 x i8>, ptr %a
756 %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %op1, i1 false)
757 store <32 x i8> %res, ptr %a
761 define <2 x i16> @abs_v2i16(<2 x i16> %op1) {
762 ; CHECK-LABEL: abs_v2i16:
764 ; CHECK-NEXT: ptrue p0.s, vl2
765 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
766 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s
767 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
768 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
770 %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %op1, i1 false)
774 define <4 x i16> @abs_v4i16(<4 x i16> %op1) {
775 ; CHECK-LABEL: abs_v4i16:
777 ; CHECK-NEXT: ptrue p0.h, vl4
778 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
779 ; CHECK-NEXT: abs z0.h, p0/m, z0.h
780 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
782 %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %op1, i1 false)
786 define <8 x i16> @abs_v8i16(<8 x i16> %op1) {
787 ; CHECK-LABEL: abs_v8i16:
789 ; CHECK-NEXT: ptrue p0.h, vl8
790 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
791 ; CHECK-NEXT: abs z0.h, p0/m, z0.h
792 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
794 %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %op1, i1 false)
798 define void @abs_v16i16(ptr %a) {
799 ; CHECK-LABEL: abs_v16i16:
801 ; CHECK-NEXT: ptrue p0.h, vl8
802 ; CHECK-NEXT: ldp q0, q1, [x0]
803 ; CHECK-NEXT: abs z0.h, p0/m, z0.h
804 ; CHECK-NEXT: abs z1.h, p0/m, z1.h
805 ; CHECK-NEXT: stp q0, q1, [x0]
807 %op1 = load <16 x i16>, ptr %a
808 %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %op1, i1 false)
809 store <16 x i16> %res, ptr %a
813 define <2 x i32> @abs_v2i32(<2 x i32> %op1) {
814 ; CHECK-LABEL: abs_v2i32:
816 ; CHECK-NEXT: ptrue p0.s, vl2
817 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
818 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
819 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
821 %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false)
825 define <4 x i32> @abs_v4i32(<4 x i32> %op1) {
826 ; CHECK-LABEL: abs_v4i32:
828 ; CHECK-NEXT: ptrue p0.s, vl4
829 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
830 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
831 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
833 %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false)
837 define void @abs_v8i32(ptr %a) {
838 ; CHECK-LABEL: abs_v8i32:
840 ; CHECK-NEXT: ptrue p0.s, vl4
841 ; CHECK-NEXT: ldp q0, q1, [x0]
842 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
843 ; CHECK-NEXT: abs z1.s, p0/m, z1.s
844 ; CHECK-NEXT: stp q0, q1, [x0]
846 %op1 = load <8 x i32>, ptr %a
847 %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false)
848 store <8 x i32> %res, ptr %a
852 define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
853 ; CHECK-LABEL: abs_v1i64:
855 ; CHECK-NEXT: ptrue p0.d, vl1
856 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
857 ; CHECK-NEXT: abs z0.d, p0/m, z0.d
858 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
860 %res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false)
864 define <2 x i64> @abs_v2i64(<2 x i64> %op1) {
865 ; CHECK-LABEL: abs_v2i64:
867 ; CHECK-NEXT: ptrue p0.d, vl2
868 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
869 ; CHECK-NEXT: abs z0.d, p0/m, z0.d
870 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
872 %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false)
876 define void @abs_v4i64(ptr %a) {
877 ; CHECK-LABEL: abs_v4i64:
879 ; CHECK-NEXT: ptrue p0.d, vl2
880 ; CHECK-NEXT: ldp q0, q1, [x0]
881 ; CHECK-NEXT: abs z0.d, p0/m, z0.d
882 ; CHECK-NEXT: abs z1.d, p0/m, z1.d
883 ; CHECK-NEXT: stp q0, q1, [x0]
885 %op1 = load <4 x i64>, ptr %a
886 %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false)
887 store <4 x i64> %res, ptr %a
891 declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
892 declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
893 declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
894 declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
895 declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
896 declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
897 declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
898 declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
899 declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
900 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
901 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
902 declare <1 x i64> @llvm.abs.v1i64(<1 x i64>, i1)
903 declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
904 declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)