1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
7 ; Although SVE immediate packing should be fully tested using scalable vectors,
8 ; these tests protects against the possibility that scalable nodes, resulting
9 ; from lowering fixed length vector operations, trigger different isel patterns.
11 ; FIXME: These instructions should have the immediate form
17 define void @add_v32i8(ptr %a) {
18 ; CHECK-LABEL: add_v32i8:
20 ; CHECK-NEXT: ldp q0, q1, [x0]
21 ; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
22 ; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7
23 ; CHECK-NEXT: stp q0, q1, [x0]
25 %op1 = load <32 x i8>, ptr %a
26 %ins = insertelement <32 x i8> undef, i8 7, i32 0
27 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
28 %res = add <32 x i8> %op1, %op2
29 store <32 x i8> %res, ptr %a
33 define void @add_v16i16(ptr %a) {
34 ; CHECK-LABEL: add_v16i16:
36 ; CHECK-NEXT: ldp q0, q1, [x0]
37 ; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf
38 ; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf
39 ; CHECK-NEXT: stp q0, q1, [x0]
41 %op1 = load <16 x i16>, ptr %a
42 %ins = insertelement <16 x i16> undef, i16 15, i64 0
43 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
44 %res = add <16 x i16> %op1, %op2
45 store <16 x i16> %res, ptr %a
49 define void @add_v8i32(ptr %a) {
50 ; CHECK-LABEL: add_v8i32:
52 ; CHECK-NEXT: ldp q0, q1, [x0]
53 ; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f
54 ; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f
55 ; CHECK-NEXT: stp q0, q1, [x0]
57 %op1 = load <8 x i32>, ptr %a
58 %ins = insertelement <8 x i32> undef, i32 31, i64 0
59 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
60 %res = add <8 x i32> %op1, %op2
61 store <8 x i32> %res, ptr %a
65 define void @add_v4i64(ptr %a) {
66 ; CHECK-LABEL: add_v4i64:
68 ; CHECK-NEXT: ldp q0, q1, [x0]
69 ; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f
70 ; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f
71 ; CHECK-NEXT: stp q0, q1, [x0]
73 %op1 = load <4 x i64>, ptr %a
74 %ins = insertelement <4 x i64> undef, i64 63, i64 0
75 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
76 %res = add <4 x i64> %op1, %op2
77 store <4 x i64> %res, ptr %a
85 define void @and_v32i8(ptr %a) {
86 ; CHECK-LABEL: and_v32i8:
88 ; CHECK-NEXT: ldp q0, q1, [x0]
89 ; CHECK-NEXT: and z0.b, z0.b, #0x7
90 ; CHECK-NEXT: and z1.b, z1.b, #0x7
91 ; CHECK-NEXT: stp q0, q1, [x0]
93 %op1 = load <32 x i8>, ptr %a
94 %ins = insertelement <32 x i8> undef, i8 7, i32 0
95 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
96 %res = and <32 x i8> %op1, %op2
97 store <32 x i8> %res, ptr %a
101 define void @and_v16i16(ptr %a) {
102 ; CHECK-LABEL: and_v16i16:
104 ; CHECK-NEXT: ldp q0, q1, [x0]
105 ; CHECK-NEXT: and z0.h, z0.h, #0xf
106 ; CHECK-NEXT: and z1.h, z1.h, #0xf
107 ; CHECK-NEXT: stp q0, q1, [x0]
109 %op1 = load <16 x i16>, ptr %a
110 %ins = insertelement <16 x i16> undef, i16 15, i64 0
111 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
112 %res = and <16 x i16> %op1, %op2
113 store <16 x i16> %res, ptr %a
117 define void @and_v8i32(ptr %a) {
118 ; CHECK-LABEL: and_v8i32:
120 ; CHECK-NEXT: ldp q0, q1, [x0]
121 ; CHECK-NEXT: and z0.s, z0.s, #0x1f
122 ; CHECK-NEXT: and z1.s, z1.s, #0x1f
123 ; CHECK-NEXT: stp q0, q1, [x0]
125 %op1 = load <8 x i32>, ptr %a
126 %ins = insertelement <8 x i32> undef, i32 31, i64 0
127 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
128 %res = and <8 x i32> %op1, %op2
129 store <8 x i32> %res, ptr %a
133 define void @and_v4i64(ptr %a) {
134 ; CHECK-LABEL: and_v4i64:
136 ; CHECK-NEXT: ldp q0, q1, [x0]
137 ; CHECK-NEXT: and z0.d, z0.d, #0x3f
138 ; CHECK-NEXT: and z1.d, z1.d, #0x3f
139 ; CHECK-NEXT: stp q0, q1, [x0]
141 %op1 = load <4 x i64>, ptr %a
142 %ins = insertelement <4 x i64> undef, i64 63, i64 0
143 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
144 %res = and <4 x i64> %op1, %op2
145 store <4 x i64> %res, ptr %a
153 define void @ashr_v32i8(ptr %a) {
154 ; CHECK-LABEL: ashr_v32i8:
156 ; CHECK-NEXT: ldp q0, q1, [x0]
157 ; CHECK-NEXT: asr z0.b, z0.b, #7
158 ; CHECK-NEXT: asr z1.b, z1.b, #7
159 ; CHECK-NEXT: stp q0, q1, [x0]
161 %op1 = load <32 x i8>, ptr %a
162 %ins = insertelement <32 x i8> undef, i8 7, i32 0
163 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
164 %res = ashr <32 x i8> %op1, %op2
165 store <32 x i8> %res, ptr %a
169 define void @ashr_v16i16(ptr %a) {
170 ; CHECK-LABEL: ashr_v16i16:
172 ; CHECK-NEXT: ldp q0, q1, [x0]
173 ; CHECK-NEXT: asr z0.h, z0.h, #15
174 ; CHECK-NEXT: asr z1.h, z1.h, #15
175 ; CHECK-NEXT: stp q0, q1, [x0]
177 %op1 = load <16 x i16>, ptr %a
178 %ins = insertelement <16 x i16> undef, i16 15, i64 0
179 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
180 %res = ashr <16 x i16> %op1, %op2
181 store <16 x i16> %res, ptr %a
185 define void @ashr_v8i32(ptr %a) {
186 ; CHECK-LABEL: ashr_v8i32:
188 ; CHECK-NEXT: ldp q0, q1, [x0]
189 ; CHECK-NEXT: asr z0.s, z0.s, #31
190 ; CHECK-NEXT: asr z1.s, z1.s, #31
191 ; CHECK-NEXT: stp q0, q1, [x0]
193 %op1 = load <8 x i32>, ptr %a
194 %ins = insertelement <8 x i32> undef, i32 31, i64 0
195 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
196 %res = ashr <8 x i32> %op1, %op2
197 store <8 x i32> %res, ptr %a
201 define void @ashr_v4i64(ptr %a) {
202 ; CHECK-LABEL: ashr_v4i64:
204 ; CHECK-NEXT: ldp q0, q1, [x0]
205 ; CHECK-NEXT: asr z0.d, z0.d, #63
206 ; CHECK-NEXT: asr z1.d, z1.d, #63
207 ; CHECK-NEXT: stp q0, q1, [x0]
209 %op1 = load <4 x i64>, ptr %a
210 %ins = insertelement <4 x i64> undef, i64 63, i64 0
211 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
212 %res = ashr <4 x i64> %op1, %op2
213 store <4 x i64> %res, ptr %a
221 define void @icmp_eq_v32i8(ptr %a) {
222 ; CHECK-LABEL: icmp_eq_v32i8:
224 ; CHECK-NEXT: ptrue p0.b, vl16
225 ; CHECK-NEXT: ldp q0, q1, [x0]
226 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7
227 ; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, #7
228 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
229 ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
230 ; CHECK-NEXT: stp q0, q1, [x0]
232 %op1 = load <32 x i8>, ptr %a
233 %ins = insertelement <32 x i8> undef, i8 7, i64 0
234 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
235 %cmp = icmp eq <32 x i8> %op1, %op2
236 %res = sext <32 x i1> %cmp to <32 x i8>
237 store <32 x i8> %res, ptr %a
241 define void @icmp_sge_v16i16(ptr %a) {
242 ; CHECK-LABEL: icmp_sge_v16i16:
244 ; CHECK-NEXT: ptrue p0.h, vl8
245 ; CHECK-NEXT: ldp q0, q1, [x0]
246 ; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15
247 ; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, #15
248 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
249 ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
250 ; CHECK-NEXT: stp q0, q1, [x0]
252 %op1 = load <16 x i16>, ptr %a
253 %ins = insertelement <16 x i16> undef, i16 15, i64 0
254 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
255 %cmp = icmp sge <16 x i16> %op1, %op2
256 %res = sext <16 x i1> %cmp to <16 x i16>
257 store <16 x i16> %res, ptr %a
261 define void @icmp_sgt_v8i32(ptr %a) {
262 ; CHECK-LABEL: icmp_sgt_v8i32:
264 ; CHECK-NEXT: ptrue p0.s, vl4
265 ; CHECK-NEXT: ldp q0, q1, [x0]
266 ; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-8
267 ; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, #-8
268 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
269 ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
270 ; CHECK-NEXT: stp q0, q1, [x0]
272 %op1 = load <8 x i32>, ptr %a
273 %ins = insertelement <8 x i32> undef, i32 -8, i64 0
274 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
275 %cmp = icmp sgt <8 x i32> %op1, %op2
276 %res = sext <8 x i1> %cmp to <8 x i32>
277 store <8 x i32> %res, ptr %a
281 define void @icmp_ult_v4i64(ptr %a) {
282 ; CHECK-LABEL: icmp_ult_v4i64:
284 ; CHECK-NEXT: ptrue p0.d, vl2
285 ; CHECK-NEXT: ldp q0, q1, [x0]
286 ; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63
287 ; CHECK-NEXT: cmplo p0.d, p0/z, z1.d, #63
288 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
289 ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
290 ; CHECK-NEXT: stp q0, q1, [x0]
292 %op1 = load <4 x i64>, ptr %a
293 %ins = insertelement <4 x i64> undef, i64 63, i64 0
294 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
295 %cmp = icmp ult <4 x i64> %op1, %op2
296 %res = sext <4 x i1> %cmp to <4 x i64>
297 store <4 x i64> %res, ptr %a
305 define void @lshr_v32i8(ptr %a) {
306 ; CHECK-LABEL: lshr_v32i8:
308 ; CHECK-NEXT: ldp q0, q1, [x0]
309 ; CHECK-NEXT: lsr z0.b, z0.b, #7
310 ; CHECK-NEXT: lsr z1.b, z1.b, #7
311 ; CHECK-NEXT: stp q0, q1, [x0]
313 %op1 = load <32 x i8>, ptr %a
314 %ins = insertelement <32 x i8> undef, i8 7, i64 0
315 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
316 %res = lshr <32 x i8> %op1, %op2
317 store <32 x i8> %res, ptr %a
321 define void @lshr_v16i16(ptr %a) {
322 ; CHECK-LABEL: lshr_v16i16:
324 ; CHECK-NEXT: ldp q0, q1, [x0]
325 ; CHECK-NEXT: lsr z0.h, z0.h, #15
326 ; CHECK-NEXT: lsr z1.h, z1.h, #15
327 ; CHECK-NEXT: stp q0, q1, [x0]
329 %op1 = load <16 x i16>, ptr %a
330 %ins = insertelement <16 x i16> undef, i16 15, i64 0
331 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
332 %res = lshr <16 x i16> %op1, %op2
333 store <16 x i16> %res, ptr %a
337 define void @lshr_v8i32(ptr %a) {
338 ; CHECK-LABEL: lshr_v8i32:
340 ; CHECK-NEXT: ldp q0, q1, [x0]
341 ; CHECK-NEXT: lsr z0.s, z0.s, #31
342 ; CHECK-NEXT: lsr z1.s, z1.s, #31
343 ; CHECK-NEXT: stp q0, q1, [x0]
345 %op1 = load <8 x i32>, ptr %a
346 %ins = insertelement <8 x i32> undef, i32 31, i64 0
347 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
348 %res = lshr <8 x i32> %op1, %op2
349 store <8 x i32> %res, ptr %a
353 define void @lshr_v4i64(ptr %a) {
354 ; CHECK-LABEL: lshr_v4i64:
356 ; CHECK-NEXT: ldp q0, q1, [x0]
357 ; CHECK-NEXT: lsr z0.d, z0.d, #63
358 ; CHECK-NEXT: lsr z1.d, z1.d, #63
359 ; CHECK-NEXT: stp q0, q1, [x0]
361 %op1 = load <4 x i64>, ptr %a
362 %ins = insertelement <4 x i64> undef, i64 63, i64 0
363 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
364 %res = lshr <4 x i64> %op1, %op2
365 store <4 x i64> %res, ptr %a
373 define void @mul_v32i8(ptr %a) {
374 ; CHECK-LABEL: mul_v32i8:
376 ; CHECK-NEXT: ldp q0, q1, [x0]
377 ; CHECK-NEXT: mul z0.b, z0.b, #7
378 ; CHECK-NEXT: mul z1.b, z1.b, #7
379 ; CHECK-NEXT: stp q0, q1, [x0]
381 %op1 = load <32 x i8>, ptr %a
382 %ins = insertelement <32 x i8> undef, i8 7, i64 0
383 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
384 %res = mul <32 x i8> %op1, %op2
385 store <32 x i8> %res, ptr %a
389 define void @mul_v16i16(ptr %a) {
390 ; CHECK-LABEL: mul_v16i16:
392 ; CHECK-NEXT: ldp q0, q1, [x0]
393 ; CHECK-NEXT: mul z0.h, z0.h, #15
394 ; CHECK-NEXT: mul z1.h, z1.h, #15
395 ; CHECK-NEXT: stp q0, q1, [x0]
397 %op1 = load <16 x i16>, ptr %a
398 %ins = insertelement <16 x i16> undef, i16 15, i64 0
399 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
400 %res = mul <16 x i16> %op1, %op2
401 store <16 x i16> %res, ptr %a
405 define void @mul_v8i32(ptr %a) {
406 ; CHECK-LABEL: mul_v8i32:
408 ; CHECK-NEXT: ldp q0, q1, [x0]
409 ; CHECK-NEXT: mul z0.s, z0.s, #31
410 ; CHECK-NEXT: mul z1.s, z1.s, #31
411 ; CHECK-NEXT: stp q0, q1, [x0]
413 %op1 = load <8 x i32>, ptr %a
414 %ins = insertelement <8 x i32> undef, i32 31, i64 0
415 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
416 %res = mul <8 x i32> %op1, %op2
417 store <8 x i32> %res, ptr %a
421 define void @mul_v4i64(ptr %a) {
422 ; CHECK-LABEL: mul_v4i64:
424 ; CHECK-NEXT: ldp q0, q1, [x0]
425 ; CHECK-NEXT: mul z0.d, z0.d, #63
426 ; CHECK-NEXT: mul z1.d, z1.d, #63
427 ; CHECK-NEXT: stp q0, q1, [x0]
429 %op1 = load <4 x i64>, ptr %a
430 %ins = insertelement <4 x i64> undef, i64 63, i64 0
431 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
432 %res = mul <4 x i64> %op1, %op2
433 store <4 x i64> %res, ptr %a
441 define void @or_v32i8(ptr %a) {
442 ; CHECK-LABEL: or_v32i8:
444 ; CHECK-NEXT: ldp q0, q1, [x0]
445 ; CHECK-NEXT: orr z0.b, z0.b, #0x7
446 ; CHECK-NEXT: orr z1.b, z1.b, #0x7
447 ; CHECK-NEXT: stp q0, q1, [x0]
449 %op1 = load <32 x i8>, ptr %a
450 %ins = insertelement <32 x i8> undef, i8 7, i64 0
451 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
452 %res = or <32 x i8> %op1, %op2
453 store <32 x i8> %res, ptr %a
457 define void @or_v16i16(ptr %a) {
458 ; CHECK-LABEL: or_v16i16:
460 ; CHECK-NEXT: ldp q0, q1, [x0]
461 ; CHECK-NEXT: orr z0.h, z0.h, #0xf
462 ; CHECK-NEXT: orr z1.h, z1.h, #0xf
463 ; CHECK-NEXT: stp q0, q1, [x0]
465 %op1 = load <16 x i16>, ptr %a
466 %ins = insertelement <16 x i16> undef, i16 15, i64 0
467 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
468 %res = or <16 x i16> %op1, %op2
469 store <16 x i16> %res, ptr %a
473 define void @or_v8i32(ptr %a) {
474 ; CHECK-LABEL: or_v8i32:
476 ; CHECK-NEXT: ldp q0, q1, [x0]
477 ; CHECK-NEXT: orr z0.s, z0.s, #0x1f
478 ; CHECK-NEXT: orr z1.s, z1.s, #0x1f
479 ; CHECK-NEXT: stp q0, q1, [x0]
481 %op1 = load <8 x i32>, ptr %a
482 %ins = insertelement <8 x i32> undef, i32 31, i64 0
483 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
484 %res = or <8 x i32> %op1, %op2
485 store <8 x i32> %res, ptr %a
489 define void @or_v4i64(ptr %a) {
490 ; CHECK-LABEL: or_v4i64:
492 ; CHECK-NEXT: ldp q0, q1, [x0]
493 ; CHECK-NEXT: orr z0.d, z0.d, #0x3f
494 ; CHECK-NEXT: orr z1.d, z1.d, #0x3f
495 ; CHECK-NEXT: stp q0, q1, [x0]
497 %op1 = load <4 x i64>, ptr %a
498 %ins = insertelement <4 x i64> undef, i64 63, i64 0
499 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
500 %res = or <4 x i64> %op1, %op2
501 store <4 x i64> %res, ptr %a
509 define void @shl_v32i8(ptr %a) {
510 ; CHECK-LABEL: shl_v32i8:
512 ; CHECK-NEXT: ldp q0, q1, [x0]
513 ; CHECK-NEXT: lsl z0.b, z0.b, #7
514 ; CHECK-NEXT: lsl z1.b, z1.b, #7
515 ; CHECK-NEXT: stp q0, q1, [x0]
517 %op1 = load <32 x i8>, ptr %a
518 %ins = insertelement <32 x i8> undef, i8 7, i64 0
519 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
520 %res = shl <32 x i8> %op1, %op2
521 store <32 x i8> %res, ptr %a
525 define void @shl_v16i16(ptr %a) {
526 ; CHECK-LABEL: shl_v16i16:
528 ; CHECK-NEXT: ldp q0, q1, [x0]
529 ; CHECK-NEXT: lsl z0.h, z0.h, #15
530 ; CHECK-NEXT: lsl z1.h, z1.h, #15
531 ; CHECK-NEXT: stp q0, q1, [x0]
533 %op1 = load <16 x i16>, ptr %a
534 %ins = insertelement <16 x i16> undef, i16 15, i64 0
535 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
536 %res = shl <16 x i16> %op1, %op2
537 store <16 x i16> %res, ptr %a
541 define void @shl_v8i32(ptr %a) {
542 ; CHECK-LABEL: shl_v8i32:
544 ; CHECK-NEXT: ldp q0, q1, [x0]
545 ; CHECK-NEXT: lsl z0.s, z0.s, #31
546 ; CHECK-NEXT: lsl z1.s, z1.s, #31
547 ; CHECK-NEXT: stp q0, q1, [x0]
549 %op1 = load <8 x i32>, ptr %a
550 %ins = insertelement <8 x i32> undef, i32 31, i64 0
551 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
552 %res = shl <8 x i32> %op1, %op2
553 store <8 x i32> %res, ptr %a
557 define void @shl_v4i64(ptr %a) {
558 ; CHECK-LABEL: shl_v4i64:
560 ; CHECK-NEXT: ldp q0, q1, [x0]
561 ; CHECK-NEXT: lsl z0.d, z0.d, #63
562 ; CHECK-NEXT: lsl z1.d, z1.d, #63
563 ; CHECK-NEXT: stp q0, q1, [x0]
565 %op1 = load <4 x i64>, ptr %a
566 %ins = insertelement <4 x i64> undef, i64 63, i64 0
567 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
568 %res = shl <4 x i64> %op1, %op2
569 store <4 x i64> %res, ptr %a
577 define void @smax_v32i8(ptr %a) {
578 ; CHECK-LABEL: smax_v32i8:
580 ; CHECK-NEXT: ldp q0, q1, [x0]
581 ; CHECK-NEXT: smax z0.b, z0.b, #7
582 ; CHECK-NEXT: smax z1.b, z1.b, #7
583 ; CHECK-NEXT: stp q0, q1, [x0]
585 %op1 = load <32 x i8>, ptr %a
586 %ins = insertelement <32 x i8> undef, i8 7, i64 0
587 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
588 %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
589 store <32 x i8> %res, ptr %a
593 define void @smax_v16i16(ptr %a) {
594 ; CHECK-LABEL: smax_v16i16:
596 ; CHECK-NEXT: ldp q0, q1, [x0]
597 ; CHECK-NEXT: smax z0.h, z0.h, #15
598 ; CHECK-NEXT: smax z1.h, z1.h, #15
599 ; CHECK-NEXT: stp q0, q1, [x0]
601 %op1 = load <16 x i16>, ptr %a
602 %ins = insertelement <16 x i16> undef, i16 15, i64 0
603 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
604 %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
605 store <16 x i16> %res, ptr %a
609 define void @smax_v8i32(ptr %a) {
610 ; CHECK-LABEL: smax_v8i32:
612 ; CHECK-NEXT: ldp q0, q1, [x0]
613 ; CHECK-NEXT: smax z0.s, z0.s, #31
614 ; CHECK-NEXT: smax z1.s, z1.s, #31
615 ; CHECK-NEXT: stp q0, q1, [x0]
617 %op1 = load <8 x i32>, ptr %a
618 %ins = insertelement <8 x i32> undef, i32 31, i64 0
619 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
620 %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
621 store <8 x i32> %res, ptr %a
625 define void @smax_v4i64(ptr %a) {
626 ; CHECK-LABEL: smax_v4i64:
628 ; CHECK-NEXT: ldp q0, q1, [x0]
629 ; CHECK-NEXT: smax z0.d, z0.d, #63
630 ; CHECK-NEXT: smax z1.d, z1.d, #63
631 ; CHECK-NEXT: stp q0, q1, [x0]
633 %op1 = load <4 x i64>, ptr %a
634 %ins = insertelement <4 x i64> undef, i64 63, i64 0
635 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
636 %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
637 store <4 x i64> %res, ptr %a
645 define void @smin_v32i8(ptr %a) {
646 ; CHECK-LABEL: smin_v32i8:
648 ; CHECK-NEXT: ldp q0, q1, [x0]
649 ; CHECK-NEXT: smin z0.b, z0.b, #7
650 ; CHECK-NEXT: smin z1.b, z1.b, #7
651 ; CHECK-NEXT: stp q0, q1, [x0]
653 %op1 = load <32 x i8>, ptr %a
654 %ins = insertelement <32 x i8> undef, i8 7, i64 0
655 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
656 %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
657 store <32 x i8> %res, ptr %a
661 define void @smin_v16i16(ptr %a) {
662 ; CHECK-LABEL: smin_v16i16:
664 ; CHECK-NEXT: ldp q0, q1, [x0]
665 ; CHECK-NEXT: smin z0.h, z0.h, #15
666 ; CHECK-NEXT: smin z1.h, z1.h, #15
667 ; CHECK-NEXT: stp q0, q1, [x0]
669 %op1 = load <16 x i16>, ptr %a
670 %ins = insertelement <16 x i16> undef, i16 15, i64 0
671 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
672 %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
673 store <16 x i16> %res, ptr %a
677 define void @smin_v8i32(ptr %a) {
678 ; CHECK-LABEL: smin_v8i32:
680 ; CHECK-NEXT: ldp q0, q1, [x0]
681 ; CHECK-NEXT: smin z0.s, z0.s, #31
682 ; CHECK-NEXT: smin z1.s, z1.s, #31
683 ; CHECK-NEXT: stp q0, q1, [x0]
685 %op1 = load <8 x i32>, ptr %a
686 %ins = insertelement <8 x i32> undef, i32 31, i64 0
687 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
688 %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
689 store <8 x i32> %res, ptr %a
693 define void @smin_v4i64(ptr %a) {
694 ; CHECK-LABEL: smin_v4i64:
696 ; CHECK-NEXT: ldp q0, q1, [x0]
697 ; CHECK-NEXT: smin z0.d, z0.d, #63
698 ; CHECK-NEXT: smin z1.d, z1.d, #63
699 ; CHECK-NEXT: stp q0, q1, [x0]
701 %op1 = load <4 x i64>, ptr %a
702 %ins = insertelement <4 x i64> undef, i64 63, i64 0
703 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
704 %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
705 store <4 x i64> %res, ptr %a
713 define void @sub_v32i8(ptr %a) {
714 ; CHECK-LABEL: sub_v32i8:
716 ; CHECK-NEXT: ldp q0, q1, [x0]
717 ; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7
718 ; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7
719 ; CHECK-NEXT: stp q0, q1, [x0]
721 %op1 = load <32 x i8>, ptr %a
722 %ins = insertelement <32 x i8> undef, i8 7, i64 0
723 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
724 %res = sub <32 x i8> %op1, %op2
725 store <32 x i8> %res, ptr %a
729 define void @sub_v16i16(ptr %a) {
730 ; CHECK-LABEL: sub_v16i16:
732 ; CHECK-NEXT: ldp q0, q1, [x0]
733 ; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf
734 ; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf
735 ; CHECK-NEXT: stp q0, q1, [x0]
737 %op1 = load <16 x i16>, ptr %a
738 %ins = insertelement <16 x i16> undef, i16 15, i64 0
739 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
740 %res = sub <16 x i16> %op1, %op2
741 store <16 x i16> %res, ptr %a
745 define void @sub_v8i32(ptr %a) {
746 ; CHECK-LABEL: sub_v8i32:
748 ; CHECK-NEXT: ldp q0, q1, [x0]
749 ; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f
750 ; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f
751 ; CHECK-NEXT: stp q0, q1, [x0]
753 %op1 = load <8 x i32>, ptr %a
754 %ins = insertelement <8 x i32> undef, i32 31, i64 0
755 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
756 %res = sub <8 x i32> %op1, %op2
757 store <8 x i32> %res, ptr %a
761 define void @sub_v4i64(ptr %a) {
762 ; CHECK-LABEL: sub_v4i64:
764 ; CHECK-NEXT: ldp q0, q1, [x0]
765 ; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f
766 ; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f
767 ; CHECK-NEXT: stp q0, q1, [x0]
769 %op1 = load <4 x i64>, ptr %a
770 %ins = insertelement <4 x i64> undef, i64 63, i64 0
771 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
772 %res = sub <4 x i64> %op1, %op2
773 store <4 x i64> %res, ptr %a
781 define void @umax_v32i8(ptr %a) {
782 ; CHECK-LABEL: umax_v32i8:
784 ; CHECK-NEXT: ldp q0, q1, [x0]
785 ; CHECK-NEXT: umax z0.b, z0.b, #7
786 ; CHECK-NEXT: umax z1.b, z1.b, #7
787 ; CHECK-NEXT: stp q0, q1, [x0]
789 %op1 = load <32 x i8>, ptr %a
790 %ins = insertelement <32 x i8> undef, i8 7, i64 0
791 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
792 %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
793 store <32 x i8> %res, ptr %a
797 define void @umax_v16i16(ptr %a) {
798 ; CHECK-LABEL: umax_v16i16:
800 ; CHECK-NEXT: ldp q0, q1, [x0]
801 ; CHECK-NEXT: umax z0.h, z0.h, #15
802 ; CHECK-NEXT: umax z1.h, z1.h, #15
803 ; CHECK-NEXT: stp q0, q1, [x0]
805 %op1 = load <16 x i16>, ptr %a
806 %ins = insertelement <16 x i16> undef, i16 15, i64 0
807 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
808 %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
809 store <16 x i16> %res, ptr %a
813 define void @umax_v8i32(ptr %a) {
814 ; CHECK-LABEL: umax_v8i32:
816 ; CHECK-NEXT: ldp q0, q1, [x0]
817 ; CHECK-NEXT: umax z0.s, z0.s, #31
818 ; CHECK-NEXT: umax z1.s, z1.s, #31
819 ; CHECK-NEXT: stp q0, q1, [x0]
821 %op1 = load <8 x i32>, ptr %a
822 %ins = insertelement <8 x i32> undef, i32 31, i64 0
823 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
824 %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
825 store <8 x i32> %res, ptr %a
829 define void @umax_v4i64(ptr %a) {
830 ; CHECK-LABEL: umax_v4i64:
832 ; CHECK-NEXT: ldp q0, q1, [x0]
833 ; CHECK-NEXT: umax z0.d, z0.d, #63
834 ; CHECK-NEXT: umax z1.d, z1.d, #63
835 ; CHECK-NEXT: stp q0, q1, [x0]
837 %op1 = load <4 x i64>, ptr %a
838 %ins = insertelement <4 x i64> undef, i64 63, i64 0
839 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
840 %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
841 store <4 x i64> %res, ptr %a
849 define void @umin_v32i8(ptr %a) {
850 ; CHECK-LABEL: umin_v32i8:
852 ; CHECK-NEXT: ldp q0, q1, [x0]
853 ; CHECK-NEXT: umin z0.b, z0.b, #7
854 ; CHECK-NEXT: umin z1.b, z1.b, #7
855 ; CHECK-NEXT: stp q0, q1, [x0]
857 %op1 = load <32 x i8>, ptr %a
858 %ins = insertelement <32 x i8> undef, i8 7, i64 0
859 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
860 %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
861 store <32 x i8> %res, ptr %a
865 define void @umin_v16i16(ptr %a) {
866 ; CHECK-LABEL: umin_v16i16:
868 ; CHECK-NEXT: ldp q0, q1, [x0]
869 ; CHECK-NEXT: umin z0.h, z0.h, #15
870 ; CHECK-NEXT: umin z1.h, z1.h, #15
871 ; CHECK-NEXT: stp q0, q1, [x0]
873 %op1 = load <16 x i16>, ptr %a
874 %ins = insertelement <16 x i16> undef, i16 15, i64 0
875 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
876 %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
877 store <16 x i16> %res, ptr %a
881 define void @umin_v8i32(ptr %a) {
882 ; CHECK-LABEL: umin_v8i32:
884 ; CHECK-NEXT: ldp q0, q1, [x0]
885 ; CHECK-NEXT: umin z0.s, z0.s, #31
886 ; CHECK-NEXT: umin z1.s, z1.s, #31
887 ; CHECK-NEXT: stp q0, q1, [x0]
889 %op1 = load <8 x i32>, ptr %a
890 %ins = insertelement <8 x i32> undef, i32 31, i64 0
891 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
892 %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
893 store <8 x i32> %res, ptr %a
897 define void @umin_v4i64(ptr %a) {
898 ; CHECK-LABEL: umin_v4i64:
900 ; CHECK-NEXT: ldp q0, q1, [x0]
901 ; CHECK-NEXT: umin z0.d, z0.d, #63
902 ; CHECK-NEXT: umin z1.d, z1.d, #63
903 ; CHECK-NEXT: stp q0, q1, [x0]
905 %op1 = load <4 x i64>, ptr %a
906 %ins = insertelement <4 x i64> undef, i64 63, i64 0
907 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
908 %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
909 store <4 x i64> %res, ptr %a
917 define void @xor_v32i8(ptr %a) {
918 ; CHECK-LABEL: xor_v32i8:
920 ; CHECK-NEXT: ldp q0, q1, [x0]
921 ; CHECK-NEXT: eor z0.b, z0.b, #0x7
922 ; CHECK-NEXT: eor z1.b, z1.b, #0x7
923 ; CHECK-NEXT: stp q0, q1, [x0]
925 %op1 = load <32 x i8>, ptr %a
926 %ins = insertelement <32 x i8> undef, i8 7, i64 0
927 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
928 %res = xor <32 x i8> %op1, %op2
929 store <32 x i8> %res, ptr %a
933 define void @xor_v16i16(ptr %a) {
934 ; CHECK-LABEL: xor_v16i16:
936 ; CHECK-NEXT: ldp q0, q1, [x0]
937 ; CHECK-NEXT: eor z0.h, z0.h, #0xf
938 ; CHECK-NEXT: eor z1.h, z1.h, #0xf
939 ; CHECK-NEXT: stp q0, q1, [x0]
941 %op1 = load <16 x i16>, ptr %a
942 %ins = insertelement <16 x i16> undef, i16 15, i64 0
943 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
944 %res = xor <16 x i16> %op1, %op2
945 store <16 x i16> %res, ptr %a
949 define void @xor_v8i32(ptr %a) {
950 ; CHECK-LABEL: xor_v8i32:
952 ; CHECK-NEXT: ldp q0, q1, [x0]
953 ; CHECK-NEXT: eor z0.s, z0.s, #0x1f
954 ; CHECK-NEXT: eor z1.s, z1.s, #0x1f
955 ; CHECK-NEXT: stp q0, q1, [x0]
957 %op1 = load <8 x i32>, ptr %a
958 %ins = insertelement <8 x i32> undef, i32 31, i64 0
959 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
960 %res = xor <8 x i32> %op1, %op2
961 store <8 x i32> %res, ptr %a
965 define void @xor_v4i64(ptr %a) {
966 ; CHECK-LABEL: xor_v4i64:
968 ; CHECK-NEXT: ldp q0, q1, [x0]
969 ; CHECK-NEXT: eor z0.d, z0.d, #0x3f
970 ; CHECK-NEXT: eor z1.d, z1.d, #0x3f
971 ; CHECK-NEXT: stp q0, q1, [x0]
973 %op1 = load <4 x i64>, ptr %a
974 %ins = insertelement <4 x i64> undef, i64 63, i64 0
975 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
976 %res = xor <4 x i64> %op1, %op2
977 store <4 x i64> %res, ptr %a
981 declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
982 declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
983 declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
984 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
986 declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
987 declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
988 declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
989 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
991 declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
992 declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
993 declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
994 declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
996 declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
997 declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
998 declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
999 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)