1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 ; Although SVE immediate packing should be fully tested using scalable vectors,
7 ; these tests protects against the possibility that scalable nodes, resulting
8 ; from lowering fixed length vector operations, trigger different isel patterns.
14 define void @add_v64i8(ptr %a) #0 {
15 ; CHECK-LABEL: add_v64i8:
17 ; CHECK-NEXT: ptrue p0.b, vl64
18 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
19 ; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
20 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
22 %op1 = load <64 x i8>, ptr %a
23 %ins = insertelement <64 x i8> undef, i8 7, i64 0
24 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
25 %res = add <64 x i8> %op1, %op2
26 store <64 x i8> %res, ptr %a
30 define void @add_v32i16(ptr %a) #0 {
31 ; CHECK-LABEL: add_v32i16:
33 ; CHECK-NEXT: ptrue p0.h, vl32
34 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
35 ; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf
36 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
38 %op1 = load <32 x i16>, ptr %a
39 %ins = insertelement <32 x i16> undef, i16 15, i64 0
40 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
41 %res = add <32 x i16> %op1, %op2
42 store <32 x i16> %res, ptr %a
46 define void @add_v16i32(ptr %a) #0 {
47 ; CHECK-LABEL: add_v16i32:
49 ; CHECK-NEXT: ptrue p0.s, vl16
50 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
51 ; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f
52 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
54 %op1 = load <16 x i32>, ptr %a
55 %ins = insertelement <16 x i32> undef, i32 31, i64 0
56 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
57 %res = add <16 x i32> %op1, %op2
58 store <16 x i32> %res, ptr %a
62 define void @add_v8i64(ptr %a) #0 {
63 ; CHECK-LABEL: add_v8i64:
65 ; CHECK-NEXT: ptrue p0.d, vl8
66 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
67 ; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f
68 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
70 %op1 = load <8 x i64>, ptr %a
71 %ins = insertelement <8 x i64> undef, i64 63, i64 0
72 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
73 %res = add <8 x i64> %op1, %op2
74 store <8 x i64> %res, ptr %a
82 define void @and_v64i8(ptr %a) #0 {
83 ; CHECK-LABEL: and_v64i8:
85 ; CHECK-NEXT: ptrue p0.b, vl64
86 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
87 ; CHECK-NEXT: and z0.b, z0.b, #0x7
88 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
90 %op1 = load <64 x i8>, ptr %a
91 %ins = insertelement <64 x i8> undef, i8 7, i64 0
92 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
93 %res = and <64 x i8> %op1, %op2
94 store <64 x i8> %res, ptr %a
98 define void @and_v32i16(ptr %a) #0 {
99 ; CHECK-LABEL: and_v32i16:
101 ; CHECK-NEXT: ptrue p0.h, vl32
102 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
103 ; CHECK-NEXT: and z0.h, z0.h, #0xf
104 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
106 %op1 = load <32 x i16>, ptr %a
107 %ins = insertelement <32 x i16> undef, i16 15, i64 0
108 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
109 %res = and <32 x i16> %op1, %op2
110 store <32 x i16> %res, ptr %a
114 define void @and_v16i32(ptr %a) #0 {
115 ; CHECK-LABEL: and_v16i32:
117 ; CHECK-NEXT: ptrue p0.s, vl16
118 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
119 ; CHECK-NEXT: and z0.s, z0.s, #0x1f
120 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
122 %op1 = load <16 x i32>, ptr %a
123 %ins = insertelement <16 x i32> undef, i32 31, i64 0
124 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
125 %res = and <16 x i32> %op1, %op2
126 store <16 x i32> %res, ptr %a
130 define void @and_v8i64(ptr %a) #0 {
131 ; CHECK-LABEL: and_v8i64:
133 ; CHECK-NEXT: ptrue p0.d, vl8
134 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
135 ; CHECK-NEXT: and z0.d, z0.d, #0x3f
136 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
138 %op1 = load <8 x i64>, ptr %a
139 %ins = insertelement <8 x i64> undef, i64 63, i64 0
140 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
141 %res = and <8 x i64> %op1, %op2
142 store <8 x i64> %res, ptr %a
150 define void @ashr_v64i8(ptr %a) #0 {
151 ; CHECK-LABEL: ashr_v64i8:
153 ; CHECK-NEXT: ptrue p0.b, vl64
154 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
155 ; CHECK-NEXT: asr z0.b, z0.b, #7
156 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
158 %op1 = load <64 x i8>, ptr %a
159 %ins = insertelement <64 x i8> undef, i8 7, i64 0
160 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
161 %res = ashr <64 x i8> %op1, %op2
162 store <64 x i8> %res, ptr %a
166 define void @ashr_v32i16(ptr %a) #0 {
167 ; CHECK-LABEL: ashr_v32i16:
169 ; CHECK-NEXT: ptrue p0.h, vl32
170 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
171 ; CHECK-NEXT: asr z0.h, z0.h, #15
172 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
174 %op1 = load <32 x i16>, ptr %a
175 %ins = insertelement <32 x i16> undef, i16 15, i64 0
176 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
177 %res = ashr <32 x i16> %op1, %op2
178 store <32 x i16> %res, ptr %a
182 define void @ashr_v16i32(ptr %a) #0 {
183 ; CHECK-LABEL: ashr_v16i32:
185 ; CHECK-NEXT: ptrue p0.s, vl16
186 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
187 ; CHECK-NEXT: asr z0.s, z0.s, #31
188 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
190 %op1 = load <16 x i32>, ptr %a
191 %ins = insertelement <16 x i32> undef, i32 31, i64 0
192 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
193 %res = ashr <16 x i32> %op1, %op2
194 store <16 x i32> %res, ptr %a
198 define void @ashr_v8i64(ptr %a) #0 {
199 ; CHECK-LABEL: ashr_v8i64:
201 ; CHECK-NEXT: ptrue p0.d, vl8
202 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
203 ; CHECK-NEXT: asr z0.d, z0.d, #63
204 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
206 %op1 = load <8 x i64>, ptr %a
207 %ins = insertelement <8 x i64> undef, i64 63, i64 0
208 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
209 %res = ashr <8 x i64> %op1, %op2
210 store <8 x i64> %res, ptr %a
218 define void @icmp_eq_v64i8(ptr %a) #0 {
219 ; CHECK-LABEL: icmp_eq_v64i8:
221 ; CHECK-NEXT: ptrue p0.b, vl64
222 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
223 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7
224 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
225 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
227 %op1 = load <64 x i8>, ptr %a
228 %ins = insertelement <64 x i8> undef, i8 7, i64 0
229 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
230 %cmp = icmp eq <64 x i8> %op1, %op2
231 %res = sext <64 x i1> %cmp to <64 x i8>
232 store <64 x i8> %res, ptr %a
236 define void @icmp_sge_v32i16(ptr %a) #0 {
237 ; CHECK-LABEL: icmp_sge_v32i16:
239 ; CHECK-NEXT: ptrue p0.h, vl32
240 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
241 ; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15
242 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
243 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
245 %op1 = load <32 x i16>, ptr %a
246 %ins = insertelement <32 x i16> undef, i16 15, i64 0
247 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
248 %cmp = icmp sge <32 x i16> %op1, %op2
249 %res = sext <32 x i1> %cmp to <32 x i16>
250 store <32 x i16> %res, ptr %a
254 define void @icmp_sgt_v16i32(ptr %a) #0 {
255 ; CHECK-LABEL: icmp_sgt_v16i32:
257 ; CHECK-NEXT: ptrue p0.s, vl16
258 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
259 ; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-16
260 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
261 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
263 %op1 = load <16 x i32>, ptr %a
264 %ins = insertelement <16 x i32> undef, i32 -16, i64 0
265 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
266 %cmp = icmp sgt <16 x i32> %op1, %op2
267 %res = sext <16 x i1> %cmp to <16 x i32>
268 store <16 x i32> %res, ptr %a
272 define void @icmp_ult_v8i64(ptr %a) #0 {
273 ; CHECK-LABEL: icmp_ult_v8i64:
275 ; CHECK-NEXT: ptrue p0.d, vl8
276 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
277 ; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63
278 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
279 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
281 %op1 = load <8 x i64>, ptr %a
282 %ins = insertelement <8 x i64> undef, i64 63, i64 0
283 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
284 %cmp = icmp ult <8 x i64> %op1, %op2
285 %res = sext <8 x i1> %cmp to <8 x i64>
286 store <8 x i64> %res, ptr %a
294 define void @lshr_v64i8(ptr %a) #0 {
295 ; CHECK-LABEL: lshr_v64i8:
297 ; CHECK-NEXT: ptrue p0.b, vl64
298 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
299 ; CHECK-NEXT: lsr z0.b, z0.b, #7
300 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
302 %op1 = load <64 x i8>, ptr %a
303 %ins = insertelement <64 x i8> undef, i8 7, i64 0
304 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
305 %res = lshr <64 x i8> %op1, %op2
306 store <64 x i8> %res, ptr %a
310 define void @lshr_v32i16(ptr %a) #0 {
311 ; CHECK-LABEL: lshr_v32i16:
313 ; CHECK-NEXT: ptrue p0.h, vl32
314 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
315 ; CHECK-NEXT: lsr z0.h, z0.h, #15
316 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
318 %op1 = load <32 x i16>, ptr %a
319 %ins = insertelement <32 x i16> undef, i16 15, i64 0
320 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
321 %res = lshr <32 x i16> %op1, %op2
322 store <32 x i16> %res, ptr %a
326 define void @lshr_v16i32(ptr %a) #0 {
327 ; CHECK-LABEL: lshr_v16i32:
329 ; CHECK-NEXT: ptrue p0.s, vl16
330 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
331 ; CHECK-NEXT: lsr z0.s, z0.s, #31
332 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
334 %op1 = load <16 x i32>, ptr %a
335 %ins = insertelement <16 x i32> undef, i32 31, i64 0
336 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
337 %res = lshr <16 x i32> %op1, %op2
338 store <16 x i32> %res, ptr %a
342 define void @lshr_v8i64(ptr %a) #0 {
343 ; CHECK-LABEL: lshr_v8i64:
345 ; CHECK-NEXT: ptrue p0.d, vl8
346 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
347 ; CHECK-NEXT: lsr z0.d, z0.d, #63
348 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
350 %op1 = load <8 x i64>, ptr %a
351 %ins = insertelement <8 x i64> undef, i64 63, i64 0
352 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
353 %res = lshr <8 x i64> %op1, %op2
354 store <8 x i64> %res, ptr %a
362 define void @mul_v64i8(ptr %a) #0 {
363 ; CHECK-LABEL: mul_v64i8:
365 ; CHECK-NEXT: ptrue p0.b, vl64
366 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
367 ; CHECK-NEXT: mul z0.b, z0.b, #7
368 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
370 %op1 = load <64 x i8>, ptr %a
371 %ins = insertelement <64 x i8> undef, i8 7, i64 0
372 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
373 %res = mul <64 x i8> %op1, %op2
374 store <64 x i8> %res, ptr %a
378 define void @mul_v32i16(ptr %a) #0 {
379 ; CHECK-LABEL: mul_v32i16:
381 ; CHECK-NEXT: ptrue p0.h, vl32
382 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
383 ; CHECK-NEXT: mul z0.h, z0.h, #15
384 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
386 %op1 = load <32 x i16>, ptr %a
387 %ins = insertelement <32 x i16> undef, i16 15, i64 0
388 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
389 %res = mul <32 x i16> %op1, %op2
390 store <32 x i16> %res, ptr %a
394 define void @mul_v16i32(ptr %a) #0 {
395 ; CHECK-LABEL: mul_v16i32:
397 ; CHECK-NEXT: ptrue p0.s, vl16
398 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
399 ; CHECK-NEXT: mul z0.s, z0.s, #31
400 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
402 %op1 = load <16 x i32>, ptr %a
403 %ins = insertelement <16 x i32> undef, i32 31, i64 0
404 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
405 %res = mul <16 x i32> %op1, %op2
406 store <16 x i32> %res, ptr %a
410 define void @mul_v8i64(ptr %a) #0 {
411 ; CHECK-LABEL: mul_v8i64:
413 ; CHECK-NEXT: ptrue p0.d, vl8
414 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
415 ; CHECK-NEXT: mul z0.d, z0.d, #63
416 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
418 %op1 = load <8 x i64>, ptr %a
419 %ins = insertelement <8 x i64> undef, i64 63, i64 0
420 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
421 %res = mul <8 x i64> %op1, %op2
422 store <8 x i64> %res, ptr %a
430 define void @or_v64i8(ptr %a) #0 {
431 ; CHECK-LABEL: or_v64i8:
433 ; CHECK-NEXT: ptrue p0.b, vl64
434 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
435 ; CHECK-NEXT: orr z0.b, z0.b, #0x7
436 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
438 %op1 = load <64 x i8>, ptr %a
439 %ins = insertelement <64 x i8> undef, i8 7, i64 0
440 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
441 %res = or <64 x i8> %op1, %op2
442 store <64 x i8> %res, ptr %a
446 define void @or_v32i16(ptr %a) #0 {
447 ; CHECK-LABEL: or_v32i16:
449 ; CHECK-NEXT: ptrue p0.h, vl32
450 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
451 ; CHECK-NEXT: orr z0.h, z0.h, #0xf
452 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
454 %op1 = load <32 x i16>, ptr %a
455 %ins = insertelement <32 x i16> undef, i16 15, i64 0
456 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
457 %res = or <32 x i16> %op1, %op2
458 store <32 x i16> %res, ptr %a
462 define void @or_v16i32(ptr %a) #0 {
463 ; CHECK-LABEL: or_v16i32:
465 ; CHECK-NEXT: ptrue p0.s, vl16
466 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
467 ; CHECK-NEXT: orr z0.s, z0.s, #0x1f
468 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
470 %op1 = load <16 x i32>, ptr %a
471 %ins = insertelement <16 x i32> undef, i32 31, i64 0
472 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
473 %res = or <16 x i32> %op1, %op2
474 store <16 x i32> %res, ptr %a
478 define void @or_v8i64(ptr %a) #0 {
479 ; CHECK-LABEL: or_v8i64:
481 ; CHECK-NEXT: ptrue p0.d, vl8
482 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
483 ; CHECK-NEXT: orr z0.d, z0.d, #0x3f
484 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
486 %op1 = load <8 x i64>, ptr %a
487 %ins = insertelement <8 x i64> undef, i64 63, i64 0
488 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
489 %res = or <8 x i64> %op1, %op2
490 store <8 x i64> %res, ptr %a
498 define void @shl_v64i8(ptr %a) #0 {
499 ; CHECK-LABEL: shl_v64i8:
501 ; CHECK-NEXT: ptrue p0.b, vl64
502 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
503 ; CHECK-NEXT: lsl z0.b, z0.b, #7
504 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
506 %op1 = load <64 x i8>, ptr %a
507 %ins = insertelement <64 x i8> undef, i8 7, i64 0
508 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
509 %res = shl <64 x i8> %op1, %op2
510 store <64 x i8> %res, ptr %a
514 define void @shl_v32i16(ptr %a) #0 {
515 ; CHECK-LABEL: shl_v32i16:
517 ; CHECK-NEXT: ptrue p0.h, vl32
518 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
519 ; CHECK-NEXT: lsl z0.h, z0.h, #15
520 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
522 %op1 = load <32 x i16>, ptr %a
523 %ins = insertelement <32 x i16> undef, i16 15, i64 0
524 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
525 %res = shl <32 x i16> %op1, %op2
526 store <32 x i16> %res, ptr %a
530 define void @shl_v16i32(ptr %a) #0 {
531 ; CHECK-LABEL: shl_v16i32:
533 ; CHECK-NEXT: ptrue p0.s, vl16
534 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
535 ; CHECK-NEXT: lsl z0.s, z0.s, #31
536 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
538 %op1 = load <16 x i32>, ptr %a
539 %ins = insertelement <16 x i32> undef, i32 31, i64 0
540 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
541 %res = shl <16 x i32> %op1, %op2
542 store <16 x i32> %res, ptr %a
546 define void @shl_v8i64(ptr %a) #0 {
547 ; CHECK-LABEL: shl_v8i64:
549 ; CHECK-NEXT: ptrue p0.d, vl8
550 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
551 ; CHECK-NEXT: lsl z0.d, z0.d, #63
552 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
554 %op1 = load <8 x i64>, ptr %a
555 %ins = insertelement <8 x i64> undef, i64 63, i64 0
556 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
557 %res = shl <8 x i64> %op1, %op2
558 store <8 x i64> %res, ptr %a
566 define void @smax_v64i8(ptr %a) #0 {
567 ; CHECK-LABEL: smax_v64i8:
569 ; CHECK-NEXT: ptrue p0.b, vl64
570 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
571 ; CHECK-NEXT: smax z0.b, z0.b, #7
572 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
574 %op1 = load <64 x i8>, ptr %a
575 %ins = insertelement <64 x i8> undef, i8 7, i64 0
576 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
577 %res = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
578 store <64 x i8> %res, ptr %a
582 define void @smax_v32i16(ptr %a) #0 {
583 ; CHECK-LABEL: smax_v32i16:
585 ; CHECK-NEXT: ptrue p0.h, vl32
586 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
587 ; CHECK-NEXT: smax z0.h, z0.h, #15
588 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
590 %op1 = load <32 x i16>, ptr %a
591 %ins = insertelement <32 x i16> undef, i16 15, i64 0
592 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
593 %res = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
594 store <32 x i16> %res, ptr %a
598 define void @smax_v16i32(ptr %a) #0 {
599 ; CHECK-LABEL: smax_v16i32:
601 ; CHECK-NEXT: ptrue p0.s, vl16
602 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
603 ; CHECK-NEXT: smax z0.s, z0.s, #31
604 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
606 %op1 = load <16 x i32>, ptr %a
607 %ins = insertelement <16 x i32> undef, i32 31, i64 0
608 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
609 %res = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
610 store <16 x i32> %res, ptr %a
614 define void @smax_v8i64(ptr %a) #0 {
615 ; CHECK-LABEL: smax_v8i64:
617 ; CHECK-NEXT: ptrue p0.d, vl8
618 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
619 ; CHECK-NEXT: smax z0.d, z0.d, #63
620 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
622 %op1 = load <8 x i64>, ptr %a
623 %ins = insertelement <8 x i64> undef, i64 63, i64 0
624 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
625 %res = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
626 store <8 x i64> %res, ptr %a
634 define void @smin_v64i8(ptr %a) #0 {
635 ; CHECK-LABEL: smin_v64i8:
637 ; CHECK-NEXT: ptrue p0.b, vl64
638 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
639 ; CHECK-NEXT: smin z0.b, z0.b, #7
640 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
642 %op1 = load <64 x i8>, ptr %a
643 %ins = insertelement <64 x i8> undef, i8 7, i64 0
644 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
645 %res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
646 store <64 x i8> %res, ptr %a
650 define void @smin_v32i16(ptr %a) #0 {
651 ; CHECK-LABEL: smin_v32i16:
653 ; CHECK-NEXT: ptrue p0.h, vl32
654 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
655 ; CHECK-NEXT: smin z0.h, z0.h, #15
656 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
658 %op1 = load <32 x i16>, ptr %a
659 %ins = insertelement <32 x i16> undef, i16 15, i64 0
660 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
661 %res = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
662 store <32 x i16> %res, ptr %a
666 define void @smin_v16i32(ptr %a) #0 {
667 ; CHECK-LABEL: smin_v16i32:
669 ; CHECK-NEXT: ptrue p0.s, vl16
670 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
671 ; CHECK-NEXT: smin z0.s, z0.s, #31
672 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
674 %op1 = load <16 x i32>, ptr %a
675 %ins = insertelement <16 x i32> undef, i32 31, i64 0
676 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
677 %res = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
678 store <16 x i32> %res, ptr %a
682 define void @smin_v8i64(ptr %a) #0 {
683 ; CHECK-LABEL: smin_v8i64:
685 ; CHECK-NEXT: ptrue p0.d, vl8
686 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
687 ; CHECK-NEXT: smin z0.d, z0.d, #63
688 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
690 %op1 = load <8 x i64>, ptr %a
691 %ins = insertelement <8 x i64> undef, i64 63, i64 0
692 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
693 %res = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
694 store <8 x i64> %res, ptr %a
702 define void @sub_v64i8(ptr %a) #0 {
703 ; CHECK-LABEL: sub_v64i8:
705 ; CHECK-NEXT: ptrue p0.b, vl64
706 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
707 ; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7
708 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
710 %op1 = load <64 x i8>, ptr %a
711 %ins = insertelement <64 x i8> undef, i8 7, i64 0
712 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
713 %res = sub <64 x i8> %op1, %op2
714 store <64 x i8> %res, ptr %a
718 define void @sub_v32i16(ptr %a) #0 {
719 ; CHECK-LABEL: sub_v32i16:
721 ; CHECK-NEXT: ptrue p0.h, vl32
722 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
723 ; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf
724 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
726 %op1 = load <32 x i16>, ptr %a
727 %ins = insertelement <32 x i16> undef, i16 15, i64 0
728 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
729 %res = sub <32 x i16> %op1, %op2
730 store <32 x i16> %res, ptr %a
734 define void @sub_v16i32(ptr %a) #0 {
735 ; CHECK-LABEL: sub_v16i32:
737 ; CHECK-NEXT: ptrue p0.s, vl16
738 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
739 ; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f
740 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
742 %op1 = load <16 x i32>, ptr %a
743 %ins = insertelement <16 x i32> undef, i32 31, i64 0
744 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
745 %res = sub <16 x i32> %op1, %op2
746 store <16 x i32> %res, ptr %a
750 define void @sub_v8i64(ptr %a) #0 {
751 ; CHECK-LABEL: sub_v8i64:
753 ; CHECK-NEXT: ptrue p0.d, vl8
754 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
755 ; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f
756 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
758 %op1 = load <8 x i64>, ptr %a
759 %ins = insertelement <8 x i64> undef, i64 63, i64 0
760 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
761 %res = sub <8 x i64> %op1, %op2
762 store <8 x i64> %res, ptr %a
770 define void @umax_v64i8(ptr %a) #0 {
771 ; CHECK-LABEL: umax_v64i8:
773 ; CHECK-NEXT: ptrue p0.b, vl64
774 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
775 ; CHECK-NEXT: umax z0.b, z0.b, #7
776 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
778 %op1 = load <64 x i8>, ptr %a
779 %ins = insertelement <64 x i8> undef, i8 7, i64 0
780 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
781 %res = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
782 store <64 x i8> %res, ptr %a
786 define void @umax_v32i16(ptr %a) #0 {
787 ; CHECK-LABEL: umax_v32i16:
789 ; CHECK-NEXT: ptrue p0.h, vl32
790 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
791 ; CHECK-NEXT: umax z0.h, z0.h, #15
792 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
794 %op1 = load <32 x i16>, ptr %a
795 %ins = insertelement <32 x i16> undef, i16 15, i64 0
796 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
797 %res = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
798 store <32 x i16> %res, ptr %a
802 define void @umax_v16i32(ptr %a) #0 {
803 ; CHECK-LABEL: umax_v16i32:
805 ; CHECK-NEXT: ptrue p0.s, vl16
806 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
807 ; CHECK-NEXT: umax z0.s, z0.s, #31
808 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
810 %op1 = load <16 x i32>, ptr %a
811 %ins = insertelement <16 x i32> undef, i32 31, i64 0
812 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
813 %res = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
814 store <16 x i32> %res, ptr %a
818 define void @umax_v8i64(ptr %a) #0 {
819 ; CHECK-LABEL: umax_v8i64:
821 ; CHECK-NEXT: ptrue p0.d, vl8
822 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
823 ; CHECK-NEXT: umax z0.d, z0.d, #63
824 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
826 %op1 = load <8 x i64>, ptr %a
827 %ins = insertelement <8 x i64> undef, i64 63, i64 0
828 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
829 %res = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
830 store <8 x i64> %res, ptr %a
838 define void @umin_v64i8(ptr %a) #0 {
839 ; CHECK-LABEL: umin_v64i8:
841 ; CHECK-NEXT: ptrue p0.b, vl64
842 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
843 ; CHECK-NEXT: umin z0.b, z0.b, #7
844 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
846 %op1 = load <64 x i8>, ptr %a
847 %ins = insertelement <64 x i8> undef, i8 7, i64 0
848 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
849 %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
850 store <64 x i8> %res, ptr %a
854 define void @umin_v32i16(ptr %a) #0 {
855 ; CHECK-LABEL: umin_v32i16:
857 ; CHECK-NEXT: ptrue p0.h, vl32
858 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
859 ; CHECK-NEXT: umin z0.h, z0.h, #15
860 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
862 %op1 = load <32 x i16>, ptr %a
863 %ins = insertelement <32 x i16> undef, i16 15, i64 0
864 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
865 %res = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
866 store <32 x i16> %res, ptr %a
870 define void @umin_v16i32(ptr %a) #0 {
871 ; CHECK-LABEL: umin_v16i32:
873 ; CHECK-NEXT: ptrue p0.s, vl16
874 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
875 ; CHECK-NEXT: umin z0.s, z0.s, #31
876 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
878 %op1 = load <16 x i32>, ptr %a
879 %ins = insertelement <16 x i32> undef, i32 31, i64 0
880 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
881 %res = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
882 store <16 x i32> %res, ptr %a
886 define void @umin_v8i64(ptr %a) #0 {
887 ; CHECK-LABEL: umin_v8i64:
889 ; CHECK-NEXT: ptrue p0.d, vl8
890 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
891 ; CHECK-NEXT: umin z0.d, z0.d, #63
892 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
894 %op1 = load <8 x i64>, ptr %a
895 %ins = insertelement <8 x i64> undef, i64 63, i64 0
896 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
897 %res = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
898 store <8 x i64> %res, ptr %a
906 define void @xor_v64i8(ptr %a) #0 {
907 ; CHECK-LABEL: xor_v64i8:
909 ; CHECK-NEXT: ptrue p0.b, vl64
910 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
911 ; CHECK-NEXT: eor z0.b, z0.b, #0x7
912 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
914 %op1 = load <64 x i8>, ptr %a
915 %ins = insertelement <64 x i8> undef, i8 7, i64 0
916 %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
917 %res = xor <64 x i8> %op1, %op2
918 store <64 x i8> %res, ptr %a
922 define void @xor_v32i16(ptr %a) #0 {
923 ; CHECK-LABEL: xor_v32i16:
925 ; CHECK-NEXT: ptrue p0.h, vl32
926 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
927 ; CHECK-NEXT: eor z0.h, z0.h, #0xf
928 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
930 %op1 = load <32 x i16>, ptr %a
931 %ins = insertelement <32 x i16> undef, i16 15, i64 0
932 %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
933 %res = xor <32 x i16> %op1, %op2
934 store <32 x i16> %res, ptr %a
938 define void @xor_v16i32(ptr %a) #0 {
939 ; CHECK-LABEL: xor_v16i32:
941 ; CHECK-NEXT: ptrue p0.s, vl16
942 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
943 ; CHECK-NEXT: eor z0.s, z0.s, #0x1f
944 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
946 %op1 = load <16 x i32>, ptr %a
947 %ins = insertelement <16 x i32> undef, i32 31, i64 0
948 %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
949 %res = xor <16 x i32> %op1, %op2
950 store <16 x i32> %res, ptr %a
954 define void @xor_v8i64(ptr %a) #0 {
955 ; CHECK-LABEL: xor_v8i64:
957 ; CHECK-NEXT: ptrue p0.d, vl8
958 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
959 ; CHECK-NEXT: eor z0.d, z0.d, #0x3f
960 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
962 %op1 = load <8 x i64>, ptr %a
963 %ins = insertelement <8 x i64> undef, i64 63, i64 0
964 %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
965 %res = xor <8 x i64> %op1, %op2
966 store <8 x i64> %res, ptr %a
970 declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>)
971 declare <32 x i16> @llvm.smax.v32i16(<32 x i16>, <32 x i16>)
972 declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
973 declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>)
975 declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>)
976 declare <32 x i16> @llvm.smin.v32i16(<32 x i16>, <32 x i16>)
977 declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
978 declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>)
980 declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
981 declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>)
982 declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
983 declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>)
985 declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
986 declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>)
987 declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
988 declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>)
990 attributes #0 = { "target-features"="+sve" }