1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
8 ; Don't use SVE for 64-bit vectors.
9 define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) vscale_range(2,0) #0 {
10 ; CHECK-LABEL: select_v8i8:
12 ; CHECK-NEXT: shl v2.8b, v2.8b, #7
13 ; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
14 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
16 %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
20 ; Don't use SVE for 128-bit vectors.
21 define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) vscale_range(2,0) #0 {
22 ; CHECK-LABEL: select_v16i8:
24 ; CHECK-NEXT: shl v2.16b, v2.16b, #7
25 ; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
26 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
28 %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
32 define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
33 ; CHECK-LABEL: select_v32i8:
35 ; CHECK-NEXT: ptrue p0.b, vl32
36 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
37 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
38 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
39 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
40 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
42 %op1 = load <32 x i8>, ptr %a
43 %op2 = load <32 x i8>, ptr %b
44 %mask = icmp eq <32 x i8> %op1, %op2
45 %sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2
46 store <32 x i8> %sel, ptr %a
50 define void @select_v64i8(ptr %a, ptr %b) #0 {
51 ; VBITS_GE_256-LABEL: select_v64i8:
52 ; VBITS_GE_256: // %bb.0:
53 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32
54 ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
55 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
56 ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8]
57 ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0]
58 ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
59 ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
60 ; VBITS_GE_256-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
61 ; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z1.b
62 ; VBITS_GE_256-NEXT: sel z1.b, p2, z2.b, z3.b
63 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
64 ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
65 ; VBITS_GE_256-NEXT: ret
67 ; VBITS_GE_512-LABEL: select_v64i8:
68 ; VBITS_GE_512: // %bb.0:
69 ; VBITS_GE_512-NEXT: ptrue p0.b, vl64
70 ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
71 ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
72 ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
73 ; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
74 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
75 ; VBITS_GE_512-NEXT: ret
76 %op1 = load <64 x i8>, ptr %a
77 %op2 = load <64 x i8>, ptr %b
78 %mask = icmp eq <64 x i8> %op1, %op2
79 %sel = select <64 x i1> %mask, <64 x i8> %op1, <64 x i8> %op2
80 store <64 x i8> %sel, ptr %a
84 define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
85 ; CHECK-LABEL: select_v128i8:
87 ; CHECK-NEXT: ptrue p0.b, vl128
88 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
89 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
90 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
91 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
92 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
94 %op1 = load <128 x i8>, ptr %a
95 %op2 = load <128 x i8>, ptr %b
96 %mask = icmp eq <128 x i8> %op1, %op2
97 %sel = select <128 x i1> %mask, <128 x i8> %op1, <128 x i8> %op2
98 store <128 x i8> %sel, ptr %a
102 define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
103 ; CHECK-LABEL: select_v256i8:
105 ; CHECK-NEXT: ptrue p0.b, vl256
106 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
107 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
108 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
109 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
110 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
112 %op1 = load <256 x i8>, ptr %a
113 %op2 = load <256 x i8>, ptr %b
114 %mask = icmp eq <256 x i8> %op1, %op2
115 %sel = select <256 x i1> %mask, <256 x i8> %op1, <256 x i8> %op2
116 store <256 x i8> %sel, ptr %a
120 ; Don't use SVE for 64-bit vectors.
121 define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) vscale_range(2,0) #0 {
122 ; CHECK-LABEL: select_v4i16:
124 ; CHECK-NEXT: shl v2.4h, v2.4h, #15
125 ; CHECK-NEXT: cmlt v2.4h, v2.4h, #0
126 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
128 %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
132 ; Don't use SVE for 128-bit vectors.
133 define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) vscale_range(2,0) #0 {
134 ; CHECK-LABEL: select_v8i16:
136 ; CHECK-NEXT: ushll v2.8h, v2.8b, #0
137 ; CHECK-NEXT: shl v2.8h, v2.8h, #15
138 ; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
139 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
141 %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
145 define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
146 ; CHECK-LABEL: select_v16i16:
148 ; CHECK-NEXT: ptrue p0.h, vl16
149 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
150 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
151 ; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
152 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
153 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
155 %op1 = load <16 x i16>, ptr %a
156 %op2 = load <16 x i16>, ptr %b
157 %mask = icmp eq <16 x i16> %op1, %op2
158 %sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2
159 store <16 x i16> %sel, ptr %a
163 define void @select_v32i16(ptr %a, ptr %b) #0 {
164 ; VBITS_GE_256-LABEL: select_v32i16:
165 ; VBITS_GE_256: // %bb.0:
166 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
167 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
168 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
169 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
170 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0]
171 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
172 ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
173 ; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h
174 ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z1.h
175 ; VBITS_GE_256-NEXT: sel z1.h, p2, z2.h, z3.h
176 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
177 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
178 ; VBITS_GE_256-NEXT: ret
180 ; VBITS_GE_512-LABEL: select_v32i16:
181 ; VBITS_GE_512: // %bb.0:
182 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
183 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
184 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
185 ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
186 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
187 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
188 ; VBITS_GE_512-NEXT: ret
189 %op1 = load <32 x i16>, ptr %a
190 %op2 = load <32 x i16>, ptr %b
191 %mask = icmp eq <32 x i16> %op1, %op2
192 %sel = select <32 x i1> %mask, <32 x i16> %op1, <32 x i16> %op2
193 store <32 x i16> %sel, ptr %a
197 define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
198 ; CHECK-LABEL: select_v64i16:
200 ; CHECK-NEXT: ptrue p0.h, vl64
201 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
202 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
203 ; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
204 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
205 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
207 %op1 = load <64 x i16>, ptr %a
208 %op2 = load <64 x i16>, ptr %b
209 %mask = icmp eq <64 x i16> %op1, %op2
210 %sel = select <64 x i1> %mask, <64 x i16> %op1, <64 x i16> %op2
211 store <64 x i16> %sel, ptr %a
215 define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
216 ; CHECK-LABEL: select_v128i16:
218 ; CHECK-NEXT: ptrue p0.h, vl128
219 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
220 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
221 ; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
222 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
223 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
225 %op1 = load <128 x i16>, ptr %a
226 %op2 = load <128 x i16>, ptr %b
227 %mask = icmp eq <128 x i16> %op1, %op2
228 %sel = select <128 x i1> %mask, <128 x i16> %op1, <128 x i16> %op2
229 store <128 x i16> %sel, ptr %a
233 ; Don't use SVE for 64-bit vectors.
234 define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) vscale_range(2,0) #0 {
235 ; CHECK-LABEL: select_v2i32:
237 ; CHECK-NEXT: shl v2.2s, v2.2s, #31
238 ; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
239 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
241 %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
245 ; Don't use SVE for 128-bit vectors.
246 define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) vscale_range(2,0) #0 {
247 ; CHECK-LABEL: select_v4i32:
249 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0
250 ; CHECK-NEXT: shl v2.4s, v2.4s, #31
251 ; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
252 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
254 %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
258 define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
259 ; CHECK-LABEL: select_v8i32:
261 ; CHECK-NEXT: ptrue p0.s, vl8
262 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
263 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
264 ; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
265 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
266 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
268 %op1 = load <8 x i32>, ptr %a
269 %op2 = load <8 x i32>, ptr %b
270 %mask = icmp eq <8 x i32> %op1, %op2
271 %sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2
272 store <8 x i32> %sel, ptr %a
276 define void @select_v16i32(ptr %a, ptr %b) #0 {
277 ; VBITS_GE_256-LABEL: select_v16i32:
278 ; VBITS_GE_256: // %bb.0:
279 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
280 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
281 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
282 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
283 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
284 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
285 ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
286 ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z2.s, z3.s
287 ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z1.s
288 ; VBITS_GE_256-NEXT: sel z1.s, p2, z2.s, z3.s
289 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
290 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
291 ; VBITS_GE_256-NEXT: ret
293 ; VBITS_GE_512-LABEL: select_v16i32:
294 ; VBITS_GE_512: // %bb.0:
295 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
296 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
297 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
298 ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
299 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
300 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
301 ; VBITS_GE_512-NEXT: ret
302 %op1 = load <16 x i32>, ptr %a
303 %op2 = load <16 x i32>, ptr %b
304 %mask = icmp eq <16 x i32> %op1, %op2
305 %sel = select <16 x i1> %mask, <16 x i32> %op1, <16 x i32> %op2
306 store <16 x i32> %sel, ptr %a
310 define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
311 ; CHECK-LABEL: select_v32i32:
313 ; CHECK-NEXT: ptrue p0.s, vl32
314 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
315 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
316 ; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
317 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
318 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
320 %op1 = load <32 x i32>, ptr %a
321 %op2 = load <32 x i32>, ptr %b
322 %mask = icmp eq <32 x i32> %op1, %op2
323 %sel = select <32 x i1> %mask, <32 x i32> %op1, <32 x i32> %op2
324 store <32 x i32> %sel, ptr %a
328 define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
329 ; CHECK-LABEL: select_v64i32:
331 ; CHECK-NEXT: ptrue p0.s, vl64
332 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
333 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
334 ; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
335 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
336 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
338 %op1 = load <64 x i32>, ptr %a
339 %op2 = load <64 x i32>, ptr %b
340 %mask = icmp eq <64 x i32> %op1, %op2
341 %sel = select <64 x i1> %mask, <64 x i32> %op1, <64 x i32> %op2
342 store <64 x i32> %sel, ptr %a
346 ; Don't use SVE for 64-bit vectors.
347 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) vscale_range(2,0) #0 {
348 ; CHECK-LABEL: select_v1i64:
350 ; CHECK-NEXT: tst w0, #0x1
351 ; CHECK-NEXT: csetm x8, ne
352 ; CHECK-NEXT: fmov d2, x8
353 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
355 %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
359 ; Don't use SVE for 128-bit vectors.
360 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) vscale_range(2,0) #0 {
361 ; CHECK-LABEL: select_v2i64:
363 ; CHECK-NEXT: ushll v2.2d, v2.2s, #0
364 ; CHECK-NEXT: shl v2.2d, v2.2d, #63
365 ; CHECK-NEXT: cmlt v2.2d, v2.2d, #0
366 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
368 %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2
372 define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
373 ; CHECK-LABEL: select_v4i64:
375 ; CHECK-NEXT: ptrue p0.d, vl4
376 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
377 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
378 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
379 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
380 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
382 %op1 = load <4 x i64>, ptr %a
383 %op2 = load <4 x i64>, ptr %b
384 %mask = icmp eq <4 x i64> %op1, %op2
385 %sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2
386 store <4 x i64> %sel, ptr %a
390 define void @select_v8i64(ptr %a, ptr %b) #0 {
391 ; VBITS_GE_256-LABEL: select_v8i64:
392 ; VBITS_GE_256: // %bb.0:
393 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
394 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
395 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
396 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
397 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
398 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
399 ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
400 ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z2.d, z3.d
401 ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z1.d
402 ; VBITS_GE_256-NEXT: sel z1.d, p2, z2.d, z3.d
403 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
404 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
405 ; VBITS_GE_256-NEXT: ret
407 ; VBITS_GE_512-LABEL: select_v8i64:
408 ; VBITS_GE_512: // %bb.0:
409 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
410 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
411 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
412 ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
413 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
414 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
415 ; VBITS_GE_512-NEXT: ret
416 %op1 = load <8 x i64>, ptr %a
417 %op2 = load <8 x i64>, ptr %b
418 %mask = icmp eq <8 x i64> %op1, %op2
419 %sel = select <8 x i1> %mask, <8 x i64> %op1, <8 x i64> %op2
420 store <8 x i64> %sel, ptr %a
424 define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
425 ; CHECK-LABEL: select_v16i64:
427 ; CHECK-NEXT: ptrue p0.d, vl16
428 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
429 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
430 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
431 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
432 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
434 %op1 = load <16 x i64>, ptr %a
435 %op2 = load <16 x i64>, ptr %b
436 %mask = icmp eq <16 x i64> %op1, %op2
437 %sel = select <16 x i1> %mask, <16 x i64> %op1, <16 x i64> %op2
438 store <16 x i64> %sel, ptr %a
442 define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
443 ; CHECK-LABEL: select_v32i64:
445 ; CHECK-NEXT: ptrue p0.d, vl32
446 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
447 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
448 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
449 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
450 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
452 %op1 = load <32 x i64>, ptr %a
453 %op2 = load <32 x i64>, ptr %b
454 %mask = icmp eq <32 x i64> %op1, %op2
455 %sel = select <32 x i1> %mask, <32 x i64> %op1, <32 x i64> %op2
456 store <32 x i64> %sel, ptr %a
460 attributes #0 = { "target-features"="+sve" uwtable }