1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 ; Although SVE immediate packing should be fully tested using scalable vectors,
7 ; these tests protects against the possibility that scalable nodes, resulting
8 ; from lowering fixed length vector operations, trigger different isel patterns.
10 ; FIXME: These instructions should have the immediate form
16 define void @add_v32i8(ptr %a) {
17 ; CHECK-LABEL: add_v32i8:
19 ; CHECK-NEXT: ldp q0, q1, [x0]
20 ; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
21 ; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7
22 ; CHECK-NEXT: stp q0, q1, [x0]
24 %op1 = load <32 x i8>, ptr %a
25 %ins = insertelement <32 x i8> undef, i8 7, i32 0
26 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
27 %res = add <32 x i8> %op1, %op2
28 store <32 x i8> %res, ptr %a
32 define void @add_v16i16(ptr %a) {
33 ; CHECK-LABEL: add_v16i16:
35 ; CHECK-NEXT: ldp q0, q1, [x0]
36 ; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf
37 ; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf
38 ; CHECK-NEXT: stp q0, q1, [x0]
40 %op1 = load <16 x i16>, ptr %a
41 %ins = insertelement <16 x i16> undef, i16 15, i64 0
42 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
43 %res = add <16 x i16> %op1, %op2
44 store <16 x i16> %res, ptr %a
48 define void @add_v8i32(ptr %a) {
49 ; CHECK-LABEL: add_v8i32:
51 ; CHECK-NEXT: ldp q0, q1, [x0]
52 ; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f
53 ; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f
54 ; CHECK-NEXT: stp q0, q1, [x0]
56 %op1 = load <8 x i32>, ptr %a
57 %ins = insertelement <8 x i32> undef, i32 31, i64 0
58 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
59 %res = add <8 x i32> %op1, %op2
60 store <8 x i32> %res, ptr %a
64 define void @add_v4i64(ptr %a) {
65 ; CHECK-LABEL: add_v4i64:
67 ; CHECK-NEXT: ldp q0, q1, [x0]
68 ; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f
69 ; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f
70 ; CHECK-NEXT: stp q0, q1, [x0]
72 %op1 = load <4 x i64>, ptr %a
73 %ins = insertelement <4 x i64> undef, i64 63, i64 0
74 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
75 %res = add <4 x i64> %op1, %op2
76 store <4 x i64> %res, ptr %a
84 define void @and_v32i8(ptr %a) {
85 ; CHECK-LABEL: and_v32i8:
87 ; CHECK-NEXT: ldp q0, q1, [x0]
88 ; CHECK-NEXT: and z0.b, z0.b, #0x7
89 ; CHECK-NEXT: and z1.b, z1.b, #0x7
90 ; CHECK-NEXT: stp q0, q1, [x0]
92 %op1 = load <32 x i8>, ptr %a
93 %ins = insertelement <32 x i8> undef, i8 7, i32 0
94 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
95 %res = and <32 x i8> %op1, %op2
96 store <32 x i8> %res, ptr %a
100 define void @and_v16i16(ptr %a) {
101 ; CHECK-LABEL: and_v16i16:
103 ; CHECK-NEXT: ldp q0, q1, [x0]
104 ; CHECK-NEXT: and z0.h, z0.h, #0xf
105 ; CHECK-NEXT: and z1.h, z1.h, #0xf
106 ; CHECK-NEXT: stp q0, q1, [x0]
108 %op1 = load <16 x i16>, ptr %a
109 %ins = insertelement <16 x i16> undef, i16 15, i64 0
110 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
111 %res = and <16 x i16> %op1, %op2
112 store <16 x i16> %res, ptr %a
116 define void @and_v8i32(ptr %a) {
117 ; CHECK-LABEL: and_v8i32:
119 ; CHECK-NEXT: ldp q0, q1, [x0]
120 ; CHECK-NEXT: and z0.s, z0.s, #0x1f
121 ; CHECK-NEXT: and z1.s, z1.s, #0x1f
122 ; CHECK-NEXT: stp q0, q1, [x0]
124 %op1 = load <8 x i32>, ptr %a
125 %ins = insertelement <8 x i32> undef, i32 31, i64 0
126 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
127 %res = and <8 x i32> %op1, %op2
128 store <8 x i32> %res, ptr %a
132 define void @and_v4i64(ptr %a) {
133 ; CHECK-LABEL: and_v4i64:
135 ; CHECK-NEXT: ldp q0, q1, [x0]
136 ; CHECK-NEXT: and z0.d, z0.d, #0x3f
137 ; CHECK-NEXT: and z1.d, z1.d, #0x3f
138 ; CHECK-NEXT: stp q0, q1, [x0]
140 %op1 = load <4 x i64>, ptr %a
141 %ins = insertelement <4 x i64> undef, i64 63, i64 0
142 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
143 %res = and <4 x i64> %op1, %op2
144 store <4 x i64> %res, ptr %a
152 define void @ashr_v32i8(ptr %a) {
153 ; CHECK-LABEL: ashr_v32i8:
155 ; CHECK-NEXT: ldp q0, q1, [x0]
156 ; CHECK-NEXT: asr z0.b, z0.b, #7
157 ; CHECK-NEXT: asr z1.b, z1.b, #7
158 ; CHECK-NEXT: stp q0, q1, [x0]
160 %op1 = load <32 x i8>, ptr %a
161 %ins = insertelement <32 x i8> undef, i8 7, i32 0
162 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
163 %res = ashr <32 x i8> %op1, %op2
164 store <32 x i8> %res, ptr %a
168 define void @ashr_v16i16(ptr %a) {
169 ; CHECK-LABEL: ashr_v16i16:
171 ; CHECK-NEXT: ldp q0, q1, [x0]
172 ; CHECK-NEXT: asr z0.h, z0.h, #15
173 ; CHECK-NEXT: asr z1.h, z1.h, #15
174 ; CHECK-NEXT: stp q0, q1, [x0]
176 %op1 = load <16 x i16>, ptr %a
177 %ins = insertelement <16 x i16> undef, i16 15, i64 0
178 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
179 %res = ashr <16 x i16> %op1, %op2
180 store <16 x i16> %res, ptr %a
184 define void @ashr_v8i32(ptr %a) {
185 ; CHECK-LABEL: ashr_v8i32:
187 ; CHECK-NEXT: ldp q0, q1, [x0]
188 ; CHECK-NEXT: asr z0.s, z0.s, #31
189 ; CHECK-NEXT: asr z1.s, z1.s, #31
190 ; CHECK-NEXT: stp q0, q1, [x0]
192 %op1 = load <8 x i32>, ptr %a
193 %ins = insertelement <8 x i32> undef, i32 31, i64 0
194 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
195 %res = ashr <8 x i32> %op1, %op2
196 store <8 x i32> %res, ptr %a
200 define void @ashr_v4i64(ptr %a) {
201 ; CHECK-LABEL: ashr_v4i64:
203 ; CHECK-NEXT: ldp q0, q1, [x0]
204 ; CHECK-NEXT: asr z0.d, z0.d, #63
205 ; CHECK-NEXT: asr z1.d, z1.d, #63
206 ; CHECK-NEXT: stp q0, q1, [x0]
208 %op1 = load <4 x i64>, ptr %a
209 %ins = insertelement <4 x i64> undef, i64 63, i64 0
210 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
211 %res = ashr <4 x i64> %op1, %op2
212 store <4 x i64> %res, ptr %a
220 define void @icmp_eq_v32i8(ptr %a) {
221 ; CHECK-LABEL: icmp_eq_v32i8:
223 ; CHECK-NEXT: ptrue p0.b, vl16
224 ; CHECK-NEXT: ldp q0, q1, [x0]
225 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7
226 ; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, #7
227 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
228 ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
229 ; CHECK-NEXT: stp q0, q1, [x0]
231 %op1 = load <32 x i8>, ptr %a
232 %ins = insertelement <32 x i8> undef, i8 7, i64 0
233 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
234 %cmp = icmp eq <32 x i8> %op1, %op2
235 %res = sext <32 x i1> %cmp to <32 x i8>
236 store <32 x i8> %res, ptr %a
240 define void @icmp_sge_v16i16(ptr %a) {
241 ; CHECK-LABEL: icmp_sge_v16i16:
243 ; CHECK-NEXT: ptrue p0.h, vl8
244 ; CHECK-NEXT: ldp q0, q1, [x0]
245 ; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15
246 ; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, #15
247 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
248 ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
249 ; CHECK-NEXT: stp q0, q1, [x0]
251 %op1 = load <16 x i16>, ptr %a
252 %ins = insertelement <16 x i16> undef, i16 15, i64 0
253 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
254 %cmp = icmp sge <16 x i16> %op1, %op2
255 %res = sext <16 x i1> %cmp to <16 x i16>
256 store <16 x i16> %res, ptr %a
260 define void @icmp_sgt_v8i32(ptr %a) {
261 ; CHECK-LABEL: icmp_sgt_v8i32:
263 ; CHECK-NEXT: ptrue p0.s, vl4
264 ; CHECK-NEXT: ldp q0, q1, [x0]
265 ; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-8
266 ; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, #-8
267 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
268 ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
269 ; CHECK-NEXT: stp q0, q1, [x0]
271 %op1 = load <8 x i32>, ptr %a
272 %ins = insertelement <8 x i32> undef, i32 -8, i64 0
273 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
274 %cmp = icmp sgt <8 x i32> %op1, %op2
275 %res = sext <8 x i1> %cmp to <8 x i32>
276 store <8 x i32> %res, ptr %a
280 define void @icmp_ult_v4i64(ptr %a) {
281 ; CHECK-LABEL: icmp_ult_v4i64:
283 ; CHECK-NEXT: ptrue p0.d, vl2
284 ; CHECK-NEXT: ldp q0, q1, [x0]
285 ; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63
286 ; CHECK-NEXT: cmplo p0.d, p0/z, z1.d, #63
287 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
288 ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
289 ; CHECK-NEXT: stp q0, q1, [x0]
291 %op1 = load <4 x i64>, ptr %a
292 %ins = insertelement <4 x i64> undef, i64 63, i64 0
293 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
294 %cmp = icmp ult <4 x i64> %op1, %op2
295 %res = sext <4 x i1> %cmp to <4 x i64>
296 store <4 x i64> %res, ptr %a
304 define void @lshr_v32i8(ptr %a) {
305 ; CHECK-LABEL: lshr_v32i8:
307 ; CHECK-NEXT: ldp q0, q1, [x0]
308 ; CHECK-NEXT: lsr z0.b, z0.b, #7
309 ; CHECK-NEXT: lsr z1.b, z1.b, #7
310 ; CHECK-NEXT: stp q0, q1, [x0]
312 %op1 = load <32 x i8>, ptr %a
313 %ins = insertelement <32 x i8> undef, i8 7, i64 0
314 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
315 %res = lshr <32 x i8> %op1, %op2
316 store <32 x i8> %res, ptr %a
320 define void @lshr_v16i16(ptr %a) {
321 ; CHECK-LABEL: lshr_v16i16:
323 ; CHECK-NEXT: ldp q0, q1, [x0]
324 ; CHECK-NEXT: lsr z0.h, z0.h, #15
325 ; CHECK-NEXT: lsr z1.h, z1.h, #15
326 ; CHECK-NEXT: stp q0, q1, [x0]
328 %op1 = load <16 x i16>, ptr %a
329 %ins = insertelement <16 x i16> undef, i16 15, i64 0
330 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
331 %res = lshr <16 x i16> %op1, %op2
332 store <16 x i16> %res, ptr %a
336 define void @lshr_v8i32(ptr %a) {
337 ; CHECK-LABEL: lshr_v8i32:
339 ; CHECK-NEXT: ldp q0, q1, [x0]
340 ; CHECK-NEXT: lsr z0.s, z0.s, #31
341 ; CHECK-NEXT: lsr z1.s, z1.s, #31
342 ; CHECK-NEXT: stp q0, q1, [x0]
344 %op1 = load <8 x i32>, ptr %a
345 %ins = insertelement <8 x i32> undef, i32 31, i64 0
346 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
347 %res = lshr <8 x i32> %op1, %op2
348 store <8 x i32> %res, ptr %a
352 define void @lshr_v4i64(ptr %a) {
353 ; CHECK-LABEL: lshr_v4i64:
355 ; CHECK-NEXT: ldp q0, q1, [x0]
356 ; CHECK-NEXT: lsr z0.d, z0.d, #63
357 ; CHECK-NEXT: lsr z1.d, z1.d, #63
358 ; CHECK-NEXT: stp q0, q1, [x0]
360 %op1 = load <4 x i64>, ptr %a
361 %ins = insertelement <4 x i64> undef, i64 63, i64 0
362 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
363 %res = lshr <4 x i64> %op1, %op2
364 store <4 x i64> %res, ptr %a
372 define void @mul_v32i8(ptr %a) {
373 ; CHECK-LABEL: mul_v32i8:
375 ; CHECK-NEXT: ldp q0, q1, [x0]
376 ; CHECK-NEXT: mul z0.b, z0.b, #7
377 ; CHECK-NEXT: mul z1.b, z1.b, #7
378 ; CHECK-NEXT: stp q0, q1, [x0]
380 %op1 = load <32 x i8>, ptr %a
381 %ins = insertelement <32 x i8> undef, i8 7, i64 0
382 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
383 %res = mul <32 x i8> %op1, %op2
384 store <32 x i8> %res, ptr %a
388 define void @mul_v16i16(ptr %a) {
389 ; CHECK-LABEL: mul_v16i16:
391 ; CHECK-NEXT: ldp q0, q1, [x0]
392 ; CHECK-NEXT: mul z0.h, z0.h, #15
393 ; CHECK-NEXT: mul z1.h, z1.h, #15
394 ; CHECK-NEXT: stp q0, q1, [x0]
396 %op1 = load <16 x i16>, ptr %a
397 %ins = insertelement <16 x i16> undef, i16 15, i64 0
398 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
399 %res = mul <16 x i16> %op1, %op2
400 store <16 x i16> %res, ptr %a
404 define void @mul_v8i32(ptr %a) {
405 ; CHECK-LABEL: mul_v8i32:
407 ; CHECK-NEXT: ldp q0, q1, [x0]
408 ; CHECK-NEXT: mul z0.s, z0.s, #31
409 ; CHECK-NEXT: mul z1.s, z1.s, #31
410 ; CHECK-NEXT: stp q0, q1, [x0]
412 %op1 = load <8 x i32>, ptr %a
413 %ins = insertelement <8 x i32> undef, i32 31, i64 0
414 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
415 %res = mul <8 x i32> %op1, %op2
416 store <8 x i32> %res, ptr %a
420 define void @mul_v4i64(ptr %a) {
421 ; CHECK-LABEL: mul_v4i64:
423 ; CHECK-NEXT: ldp q0, q1, [x0]
424 ; CHECK-NEXT: mul z0.d, z0.d, #63
425 ; CHECK-NEXT: mul z1.d, z1.d, #63
426 ; CHECK-NEXT: stp q0, q1, [x0]
428 %op1 = load <4 x i64>, ptr %a
429 %ins = insertelement <4 x i64> undef, i64 63, i64 0
430 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
431 %res = mul <4 x i64> %op1, %op2
432 store <4 x i64> %res, ptr %a
440 define void @or_v32i8(ptr %a) {
441 ; CHECK-LABEL: or_v32i8:
443 ; CHECK-NEXT: ldp q0, q1, [x0]
444 ; CHECK-NEXT: orr z0.b, z0.b, #0x7
445 ; CHECK-NEXT: orr z1.b, z1.b, #0x7
446 ; CHECK-NEXT: stp q0, q1, [x0]
448 %op1 = load <32 x i8>, ptr %a
449 %ins = insertelement <32 x i8> undef, i8 7, i64 0
450 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
451 %res = or <32 x i8> %op1, %op2
452 store <32 x i8> %res, ptr %a
456 define void @or_v16i16(ptr %a) {
457 ; CHECK-LABEL: or_v16i16:
459 ; CHECK-NEXT: ldp q0, q1, [x0]
460 ; CHECK-NEXT: orr z0.h, z0.h, #0xf
461 ; CHECK-NEXT: orr z1.h, z1.h, #0xf
462 ; CHECK-NEXT: stp q0, q1, [x0]
464 %op1 = load <16 x i16>, ptr %a
465 %ins = insertelement <16 x i16> undef, i16 15, i64 0
466 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
467 %res = or <16 x i16> %op1, %op2
468 store <16 x i16> %res, ptr %a
472 define void @or_v8i32(ptr %a) {
473 ; CHECK-LABEL: or_v8i32:
475 ; CHECK-NEXT: ldp q0, q1, [x0]
476 ; CHECK-NEXT: orr z0.s, z0.s, #0x1f
477 ; CHECK-NEXT: orr z1.s, z1.s, #0x1f
478 ; CHECK-NEXT: stp q0, q1, [x0]
480 %op1 = load <8 x i32>, ptr %a
481 %ins = insertelement <8 x i32> undef, i32 31, i64 0
482 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
483 %res = or <8 x i32> %op1, %op2
484 store <8 x i32> %res, ptr %a
488 define void @or_v4i64(ptr %a) {
489 ; CHECK-LABEL: or_v4i64:
491 ; CHECK-NEXT: ldp q0, q1, [x0]
492 ; CHECK-NEXT: orr z0.d, z0.d, #0x3f
493 ; CHECK-NEXT: orr z1.d, z1.d, #0x3f
494 ; CHECK-NEXT: stp q0, q1, [x0]
496 %op1 = load <4 x i64>, ptr %a
497 %ins = insertelement <4 x i64> undef, i64 63, i64 0
498 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
499 %res = or <4 x i64> %op1, %op2
500 store <4 x i64> %res, ptr %a
508 define void @shl_v32i8(ptr %a) {
509 ; CHECK-LABEL: shl_v32i8:
511 ; CHECK-NEXT: ldp q0, q1, [x0]
512 ; CHECK-NEXT: lsl z0.b, z0.b, #7
513 ; CHECK-NEXT: lsl z1.b, z1.b, #7
514 ; CHECK-NEXT: stp q0, q1, [x0]
516 %op1 = load <32 x i8>, ptr %a
517 %ins = insertelement <32 x i8> undef, i8 7, i64 0
518 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
519 %res = shl <32 x i8> %op1, %op2
520 store <32 x i8> %res, ptr %a
524 define void @shl_v16i16(ptr %a) {
525 ; CHECK-LABEL: shl_v16i16:
527 ; CHECK-NEXT: ldp q0, q1, [x0]
528 ; CHECK-NEXT: lsl z0.h, z0.h, #15
529 ; CHECK-NEXT: lsl z1.h, z1.h, #15
530 ; CHECK-NEXT: stp q0, q1, [x0]
532 %op1 = load <16 x i16>, ptr %a
533 %ins = insertelement <16 x i16> undef, i16 15, i64 0
534 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
535 %res = shl <16 x i16> %op1, %op2
536 store <16 x i16> %res, ptr %a
540 define void @shl_v8i32(ptr %a) {
541 ; CHECK-LABEL: shl_v8i32:
543 ; CHECK-NEXT: ldp q0, q1, [x0]
544 ; CHECK-NEXT: lsl z0.s, z0.s, #31
545 ; CHECK-NEXT: lsl z1.s, z1.s, #31
546 ; CHECK-NEXT: stp q0, q1, [x0]
548 %op1 = load <8 x i32>, ptr %a
549 %ins = insertelement <8 x i32> undef, i32 31, i64 0
550 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
551 %res = shl <8 x i32> %op1, %op2
552 store <8 x i32> %res, ptr %a
556 define void @shl_v4i64(ptr %a) {
557 ; CHECK-LABEL: shl_v4i64:
559 ; CHECK-NEXT: ldp q0, q1, [x0]
560 ; CHECK-NEXT: lsl z0.d, z0.d, #63
561 ; CHECK-NEXT: lsl z1.d, z1.d, #63
562 ; CHECK-NEXT: stp q0, q1, [x0]
564 %op1 = load <4 x i64>, ptr %a
565 %ins = insertelement <4 x i64> undef, i64 63, i64 0
566 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
567 %res = shl <4 x i64> %op1, %op2
568 store <4 x i64> %res, ptr %a
576 define void @smax_v32i8(ptr %a) {
577 ; CHECK-LABEL: smax_v32i8:
579 ; CHECK-NEXT: ldp q0, q1, [x0]
580 ; CHECK-NEXT: smax z0.b, z0.b, #7
581 ; CHECK-NEXT: smax z1.b, z1.b, #7
582 ; CHECK-NEXT: stp q0, q1, [x0]
584 %op1 = load <32 x i8>, ptr %a
585 %ins = insertelement <32 x i8> undef, i8 7, i64 0
586 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
587 %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
588 store <32 x i8> %res, ptr %a
592 define void @smax_v16i16(ptr %a) {
593 ; CHECK-LABEL: smax_v16i16:
595 ; CHECK-NEXT: ldp q0, q1, [x0]
596 ; CHECK-NEXT: smax z0.h, z0.h, #15
597 ; CHECK-NEXT: smax z1.h, z1.h, #15
598 ; CHECK-NEXT: stp q0, q1, [x0]
600 %op1 = load <16 x i16>, ptr %a
601 %ins = insertelement <16 x i16> undef, i16 15, i64 0
602 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
603 %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
604 store <16 x i16> %res, ptr %a
608 define void @smax_v8i32(ptr %a) {
609 ; CHECK-LABEL: smax_v8i32:
611 ; CHECK-NEXT: ldp q0, q1, [x0]
612 ; CHECK-NEXT: smax z0.s, z0.s, #31
613 ; CHECK-NEXT: smax z1.s, z1.s, #31
614 ; CHECK-NEXT: stp q0, q1, [x0]
616 %op1 = load <8 x i32>, ptr %a
617 %ins = insertelement <8 x i32> undef, i32 31, i64 0
618 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
619 %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
620 store <8 x i32> %res, ptr %a
624 define void @smax_v4i64(ptr %a) {
625 ; CHECK-LABEL: smax_v4i64:
627 ; CHECK-NEXT: ldp q0, q1, [x0]
628 ; CHECK-NEXT: smax z0.d, z0.d, #63
629 ; CHECK-NEXT: smax z1.d, z1.d, #63
630 ; CHECK-NEXT: stp q0, q1, [x0]
632 %op1 = load <4 x i64>, ptr %a
633 %ins = insertelement <4 x i64> undef, i64 63, i64 0
634 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
635 %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
636 store <4 x i64> %res, ptr %a
644 define void @smin_v32i8(ptr %a) {
645 ; CHECK-LABEL: smin_v32i8:
647 ; CHECK-NEXT: ldp q0, q1, [x0]
648 ; CHECK-NEXT: smin z0.b, z0.b, #7
649 ; CHECK-NEXT: smin z1.b, z1.b, #7
650 ; CHECK-NEXT: stp q0, q1, [x0]
652 %op1 = load <32 x i8>, ptr %a
653 %ins = insertelement <32 x i8> undef, i8 7, i64 0
654 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
655 %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
656 store <32 x i8> %res, ptr %a
660 define void @smin_v16i16(ptr %a) {
661 ; CHECK-LABEL: smin_v16i16:
663 ; CHECK-NEXT: ldp q0, q1, [x0]
664 ; CHECK-NEXT: smin z0.h, z0.h, #15
665 ; CHECK-NEXT: smin z1.h, z1.h, #15
666 ; CHECK-NEXT: stp q0, q1, [x0]
668 %op1 = load <16 x i16>, ptr %a
669 %ins = insertelement <16 x i16> undef, i16 15, i64 0
670 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
671 %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
672 store <16 x i16> %res, ptr %a
676 define void @smin_v8i32(ptr %a) {
677 ; CHECK-LABEL: smin_v8i32:
679 ; CHECK-NEXT: ldp q0, q1, [x0]
680 ; CHECK-NEXT: smin z0.s, z0.s, #31
681 ; CHECK-NEXT: smin z1.s, z1.s, #31
682 ; CHECK-NEXT: stp q0, q1, [x0]
684 %op1 = load <8 x i32>, ptr %a
685 %ins = insertelement <8 x i32> undef, i32 31, i64 0
686 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
687 %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
688 store <8 x i32> %res, ptr %a
692 define void @smin_v4i64(ptr %a) {
693 ; CHECK-LABEL: smin_v4i64:
695 ; CHECK-NEXT: ldp q0, q1, [x0]
696 ; CHECK-NEXT: smin z0.d, z0.d, #63
697 ; CHECK-NEXT: smin z1.d, z1.d, #63
698 ; CHECK-NEXT: stp q0, q1, [x0]
700 %op1 = load <4 x i64>, ptr %a
701 %ins = insertelement <4 x i64> undef, i64 63, i64 0
702 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
703 %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
704 store <4 x i64> %res, ptr %a
712 define void @sub_v32i8(ptr %a) {
713 ; CHECK-LABEL: sub_v32i8:
715 ; CHECK-NEXT: ldp q0, q1, [x0]
716 ; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7
717 ; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7
718 ; CHECK-NEXT: stp q0, q1, [x0]
720 %op1 = load <32 x i8>, ptr %a
721 %ins = insertelement <32 x i8> undef, i8 7, i64 0
722 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
723 %res = sub <32 x i8> %op1, %op2
724 store <32 x i8> %res, ptr %a
728 define void @sub_v16i16(ptr %a) {
729 ; CHECK-LABEL: sub_v16i16:
731 ; CHECK-NEXT: ldp q0, q1, [x0]
732 ; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf
733 ; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf
734 ; CHECK-NEXT: stp q0, q1, [x0]
736 %op1 = load <16 x i16>, ptr %a
737 %ins = insertelement <16 x i16> undef, i16 15, i64 0
738 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
739 %res = sub <16 x i16> %op1, %op2
740 store <16 x i16> %res, ptr %a
744 define void @sub_v8i32(ptr %a) {
745 ; CHECK-LABEL: sub_v8i32:
747 ; CHECK-NEXT: ldp q0, q1, [x0]
748 ; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f
749 ; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f
750 ; CHECK-NEXT: stp q0, q1, [x0]
752 %op1 = load <8 x i32>, ptr %a
753 %ins = insertelement <8 x i32> undef, i32 31, i64 0
754 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
755 %res = sub <8 x i32> %op1, %op2
756 store <8 x i32> %res, ptr %a
760 define void @sub_v4i64(ptr %a) {
761 ; CHECK-LABEL: sub_v4i64:
763 ; CHECK-NEXT: ldp q0, q1, [x0]
764 ; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f
765 ; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f
766 ; CHECK-NEXT: stp q0, q1, [x0]
768 %op1 = load <4 x i64>, ptr %a
769 %ins = insertelement <4 x i64> undef, i64 63, i64 0
770 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
771 %res = sub <4 x i64> %op1, %op2
772 store <4 x i64> %res, ptr %a
780 define void @umax_v32i8(ptr %a) {
781 ; CHECK-LABEL: umax_v32i8:
783 ; CHECK-NEXT: ldp q0, q1, [x0]
784 ; CHECK-NEXT: umax z0.b, z0.b, #7
785 ; CHECK-NEXT: umax z1.b, z1.b, #7
786 ; CHECK-NEXT: stp q0, q1, [x0]
788 %op1 = load <32 x i8>, ptr %a
789 %ins = insertelement <32 x i8> undef, i8 7, i64 0
790 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
791 %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
792 store <32 x i8> %res, ptr %a
796 define void @umax_v16i16(ptr %a) {
797 ; CHECK-LABEL: umax_v16i16:
799 ; CHECK-NEXT: ldp q0, q1, [x0]
800 ; CHECK-NEXT: umax z0.h, z0.h, #15
801 ; CHECK-NEXT: umax z1.h, z1.h, #15
802 ; CHECK-NEXT: stp q0, q1, [x0]
804 %op1 = load <16 x i16>, ptr %a
805 %ins = insertelement <16 x i16> undef, i16 15, i64 0
806 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
807 %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
808 store <16 x i16> %res, ptr %a
812 define void @umax_v8i32(ptr %a) {
813 ; CHECK-LABEL: umax_v8i32:
815 ; CHECK-NEXT: ldp q0, q1, [x0]
816 ; CHECK-NEXT: umax z0.s, z0.s, #31
817 ; CHECK-NEXT: umax z1.s, z1.s, #31
818 ; CHECK-NEXT: stp q0, q1, [x0]
820 %op1 = load <8 x i32>, ptr %a
821 %ins = insertelement <8 x i32> undef, i32 31, i64 0
822 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
823 %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
824 store <8 x i32> %res, ptr %a
828 define void @umax_v4i64(ptr %a) {
829 ; CHECK-LABEL: umax_v4i64:
831 ; CHECK-NEXT: ldp q0, q1, [x0]
832 ; CHECK-NEXT: umax z0.d, z0.d, #63
833 ; CHECK-NEXT: umax z1.d, z1.d, #63
834 ; CHECK-NEXT: stp q0, q1, [x0]
836 %op1 = load <4 x i64>, ptr %a
837 %ins = insertelement <4 x i64> undef, i64 63, i64 0
838 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
839 %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
840 store <4 x i64> %res, ptr %a
848 define void @umin_v32i8(ptr %a) {
849 ; CHECK-LABEL: umin_v32i8:
851 ; CHECK-NEXT: ldp q0, q1, [x0]
852 ; CHECK-NEXT: umin z0.b, z0.b, #7
853 ; CHECK-NEXT: umin z1.b, z1.b, #7
854 ; CHECK-NEXT: stp q0, q1, [x0]
856 %op1 = load <32 x i8>, ptr %a
857 %ins = insertelement <32 x i8> undef, i8 7, i64 0
858 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
859 %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
860 store <32 x i8> %res, ptr %a
864 define void @umin_v16i16(ptr %a) {
865 ; CHECK-LABEL: umin_v16i16:
867 ; CHECK-NEXT: ldp q0, q1, [x0]
868 ; CHECK-NEXT: umin z0.h, z0.h, #15
869 ; CHECK-NEXT: umin z1.h, z1.h, #15
870 ; CHECK-NEXT: stp q0, q1, [x0]
872 %op1 = load <16 x i16>, ptr %a
873 %ins = insertelement <16 x i16> undef, i16 15, i64 0
874 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
875 %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
876 store <16 x i16> %res, ptr %a
880 define void @umin_v8i32(ptr %a) {
881 ; CHECK-LABEL: umin_v8i32:
883 ; CHECK-NEXT: ldp q0, q1, [x0]
884 ; CHECK-NEXT: umin z0.s, z0.s, #31
885 ; CHECK-NEXT: umin z1.s, z1.s, #31
886 ; CHECK-NEXT: stp q0, q1, [x0]
888 %op1 = load <8 x i32>, ptr %a
889 %ins = insertelement <8 x i32> undef, i32 31, i64 0
890 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
891 %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
892 store <8 x i32> %res, ptr %a
896 define void @umin_v4i64(ptr %a) {
897 ; CHECK-LABEL: umin_v4i64:
899 ; CHECK-NEXT: ldp q0, q1, [x0]
900 ; CHECK-NEXT: umin z0.d, z0.d, #63
901 ; CHECK-NEXT: umin z1.d, z1.d, #63
902 ; CHECK-NEXT: stp q0, q1, [x0]
904 %op1 = load <4 x i64>, ptr %a
905 %ins = insertelement <4 x i64> undef, i64 63, i64 0
906 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
907 %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
908 store <4 x i64> %res, ptr %a
916 define void @xor_v32i8(ptr %a) {
917 ; CHECK-LABEL: xor_v32i8:
919 ; CHECK-NEXT: ldp q0, q1, [x0]
920 ; CHECK-NEXT: eor z0.b, z0.b, #0x7
921 ; CHECK-NEXT: eor z1.b, z1.b, #0x7
922 ; CHECK-NEXT: stp q0, q1, [x0]
924 %op1 = load <32 x i8>, ptr %a
925 %ins = insertelement <32 x i8> undef, i8 7, i64 0
926 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
927 %res = xor <32 x i8> %op1, %op2
928 store <32 x i8> %res, ptr %a
932 define void @xor_v16i16(ptr %a) {
933 ; CHECK-LABEL: xor_v16i16:
935 ; CHECK-NEXT: ldp q0, q1, [x0]
936 ; CHECK-NEXT: eor z0.h, z0.h, #0xf
937 ; CHECK-NEXT: eor z1.h, z1.h, #0xf
938 ; CHECK-NEXT: stp q0, q1, [x0]
940 %op1 = load <16 x i16>, ptr %a
941 %ins = insertelement <16 x i16> undef, i16 15, i64 0
942 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
943 %res = xor <16 x i16> %op1, %op2
944 store <16 x i16> %res, ptr %a
948 define void @xor_v8i32(ptr %a) {
949 ; CHECK-LABEL: xor_v8i32:
951 ; CHECK-NEXT: ldp q0, q1, [x0]
952 ; CHECK-NEXT: eor z0.s, z0.s, #0x1f
953 ; CHECK-NEXT: eor z1.s, z1.s, #0x1f
954 ; CHECK-NEXT: stp q0, q1, [x0]
956 %op1 = load <8 x i32>, ptr %a
957 %ins = insertelement <8 x i32> undef, i32 31, i64 0
958 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
959 %res = xor <8 x i32> %op1, %op2
960 store <8 x i32> %res, ptr %a
964 define void @xor_v4i64(ptr %a) {
965 ; CHECK-LABEL: xor_v4i64:
967 ; CHECK-NEXT: ldp q0, q1, [x0]
968 ; CHECK-NEXT: eor z0.d, z0.d, #0x3f
969 ; CHECK-NEXT: eor z1.d, z1.d, #0x3f
970 ; CHECK-NEXT: stp q0, q1, [x0]
972 %op1 = load <4 x i64>, ptr %a
973 %ins = insertelement <4 x i64> undef, i64 63, i64 0
974 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
975 %res = xor <4 x i64> %op1, %op2
976 store <4 x i64> %res, ptr %a
980 declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
981 declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
982 declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
983 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
985 declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
986 declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
987 declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
988 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
990 declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
991 declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
992 declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
993 declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
995 declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
996 declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
997 declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
998 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)