1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
8 ; Don't use SVE for 64-bit vectors.
9 define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
10 ; CHECK-LABEL: select_v8i8:
12 ; CHECK-NEXT: tst w0, #0x1
13 ; CHECK-NEXT: csetm w8, ne
14 ; CHECK-NEXT: dup v2.8b, w8
15 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
17 %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
21 ; Don't use SVE for 128-bit vectors.
22 define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
23 ; CHECK-LABEL: select_v16i8:
25 ; CHECK-NEXT: tst w0, #0x1
26 ; CHECK-NEXT: csetm w8, ne
27 ; CHECK-NEXT: dup v2.16b, w8
28 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
30 %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
34 define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
35 ; CHECK-LABEL: select_v32i8:
37 ; CHECK-NEXT: mov z0.b, w2
38 ; CHECK-NEXT: ptrue p0.b
39 ; CHECK-NEXT: ptrue p1.b, vl32
40 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
41 ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
42 ; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
43 ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
44 ; CHECK-NEXT: st1b { z0.b }, p1, [x0]
46 %op1 = load volatile <32 x i8>, ptr %a
47 %op2 = load volatile <32 x i8>, ptr %b
48 %sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
49 store <32 x i8> %sel, ptr %a
53 define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
54 ; VBITS_GE_256-LABEL: select_v64i8:
55 ; VBITS_GE_256: // %bb.0:
56 ; VBITS_GE_256-NEXT: mov z0.b, w2
57 ; VBITS_GE_256-NEXT: ptrue p0.b
58 ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
59 ; VBITS_GE_256-NEXT: ptrue p1.b, vl32
60 ; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0
61 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0, x8]
62 ; VBITS_GE_256-NEXT: ld1b { z1.b }, p1/z, [x0]
63 ; VBITS_GE_256-NEXT: ld1b { z2.b }, p1/z, [x1, x8]
64 ; VBITS_GE_256-NEXT: ld1b { z3.b }, p1/z, [x1]
65 ; VBITS_GE_256-NEXT: sel z0.b, p0, z0.b, z2.b
66 ; VBITS_GE_256-NEXT: sel z1.b, p0, z1.b, z3.b
67 ; VBITS_GE_256-NEXT: st1b { z0.b }, p1, [x0, x8]
68 ; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x0]
69 ; VBITS_GE_256-NEXT: ret
71 ; VBITS_GE_512-LABEL: select_v64i8:
72 ; VBITS_GE_512: // %bb.0:
73 ; VBITS_GE_512-NEXT: mov z0.b, w2
74 ; VBITS_GE_512-NEXT: ptrue p0.b
75 ; VBITS_GE_512-NEXT: ptrue p1.b, vl64
76 ; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z0.b, #0
77 ; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x0]
78 ; VBITS_GE_512-NEXT: ld1b { z1.b }, p1/z, [x1]
79 ; VBITS_GE_512-NEXT: sel z0.b, p0, z0.b, z1.b
80 ; VBITS_GE_512-NEXT: st1b { z0.b }, p1, [x0]
81 ; VBITS_GE_512-NEXT: ret
82 %op1 = load volatile <64 x i8>, ptr %a
83 %op2 = load volatile <64 x i8>, ptr %b
84 %sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
85 store <64 x i8> %sel, ptr %a
89 define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
90 ; CHECK-LABEL: select_v128i8:
92 ; CHECK-NEXT: mov z0.b, w2
93 ; CHECK-NEXT: ptrue p0.b
94 ; CHECK-NEXT: ptrue p1.b, vl128
95 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
96 ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
97 ; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
98 ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
99 ; CHECK-NEXT: st1b { z0.b }, p1, [x0]
101 %op1 = load volatile <128 x i8>, ptr %a
102 %op2 = load volatile <128 x i8>, ptr %b
103 %sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
104 store <128 x i8> %sel, ptr %a
108 define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
109 ; CHECK-LABEL: select_v256i8:
111 ; CHECK-NEXT: mov z0.b, w2
112 ; CHECK-NEXT: ptrue p0.b
113 ; CHECK-NEXT: ptrue p1.b, vl256
114 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
115 ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
116 ; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
117 ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
118 ; CHECK-NEXT: st1b { z0.b }, p1, [x0]
120 %op1 = load volatile <256 x i8>, ptr %a
121 %op2 = load volatile <256 x i8>, ptr %b
122 %sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
123 store <256 x i8> %sel, ptr %a
127 ; Don't use SVE for 64-bit vectors.
128 define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
129 ; CHECK-LABEL: select_v4i16:
131 ; CHECK-NEXT: tst w0, #0x1
132 ; CHECK-NEXT: csetm w8, ne
133 ; CHECK-NEXT: dup v2.4h, w8
134 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
136 %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
140 ; Don't use SVE for 128-bit vectors.
141 define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
142 ; CHECK-LABEL: select_v8i16:
144 ; CHECK-NEXT: tst w0, #0x1
145 ; CHECK-NEXT: csetm w8, ne
146 ; CHECK-NEXT: dup v2.8h, w8
147 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
149 %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
153 define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
154 ; CHECK-LABEL: select_v16i16:
156 ; CHECK-NEXT: mov z0.h, w2
157 ; CHECK-NEXT: ptrue p0.h
158 ; CHECK-NEXT: ptrue p1.h, vl16
159 ; CHECK-NEXT: and z0.h, z0.h, #0x1
160 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
161 ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
162 ; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
163 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
164 ; CHECK-NEXT: st1h { z0.h }, p1, [x0]
166 %op1 = load volatile <16 x i16>, ptr %a
167 %op2 = load volatile <16 x i16>, ptr %b
168 %sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
169 store <16 x i16> %sel, ptr %a
173 define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
174 ; VBITS_GE_256-LABEL: select_v32i16:
175 ; VBITS_GE_256: // %bb.0:
176 ; VBITS_GE_256-NEXT: mov z0.h, w2
177 ; VBITS_GE_256-NEXT: ptrue p0.h
178 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
179 ; VBITS_GE_256-NEXT: ptrue p1.h, vl16
180 ; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1
181 ; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0
182 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
183 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0]
184 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p1/z, [x1, x8, lsl #1]
185 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p1/z, [x1]
186 ; VBITS_GE_256-NEXT: sel z0.h, p0, z0.h, z2.h
187 ; VBITS_GE_256-NEXT: sel z1.h, p0, z1.h, z3.h
188 ; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1]
189 ; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0]
190 ; VBITS_GE_256-NEXT: ret
192 ; VBITS_GE_512-LABEL: select_v32i16:
193 ; VBITS_GE_512: // %bb.0:
194 ; VBITS_GE_512-NEXT: mov z0.h, w2
195 ; VBITS_GE_512-NEXT: ptrue p0.h
196 ; VBITS_GE_512-NEXT: ptrue p1.h, vl32
197 ; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1
198 ; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, #0
199 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
200 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p1/z, [x1]
201 ; VBITS_GE_512-NEXT: sel z0.h, p0, z0.h, z1.h
202 ; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x0]
203 ; VBITS_GE_512-NEXT: ret
204 %op1 = load volatile <32 x i16>, ptr %a
205 %op2 = load volatile <32 x i16>, ptr %b
206 %sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
207 store <32 x i16> %sel, ptr %a
211 define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
212 ; CHECK-LABEL: select_v64i16:
214 ; CHECK-NEXT: mov z0.h, w2
215 ; CHECK-NEXT: ptrue p0.h
216 ; CHECK-NEXT: ptrue p1.h, vl64
217 ; CHECK-NEXT: and z0.h, z0.h, #0x1
218 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
219 ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
220 ; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
221 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
222 ; CHECK-NEXT: st1h { z0.h }, p1, [x0]
224 %op1 = load volatile <64 x i16>, ptr %a
225 %op2 = load volatile <64 x i16>, ptr %b
226 %sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
227 store <64 x i16> %sel, ptr %a
231 define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
232 ; CHECK-LABEL: select_v128i16:
234 ; CHECK-NEXT: mov z0.h, w2
235 ; CHECK-NEXT: ptrue p0.h
236 ; CHECK-NEXT: ptrue p1.h, vl128
237 ; CHECK-NEXT: and z0.h, z0.h, #0x1
238 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
239 ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
240 ; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
241 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
242 ; CHECK-NEXT: st1h { z0.h }, p1, [x0]
244 %op1 = load volatile <128 x i16>, ptr %a
245 %op2 = load volatile <128 x i16>, ptr %b
246 %sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
247 store <128 x i16> %sel, ptr %a
251 ; Don't use SVE for 64-bit vectors.
252 define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
253 ; CHECK-LABEL: select_v2i32:
255 ; CHECK-NEXT: tst w0, #0x1
256 ; CHECK-NEXT: csetm w8, ne
257 ; CHECK-NEXT: dup v2.2s, w8
258 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
260 %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
264 ; Don't use SVE for 128-bit vectors.
265 define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
266 ; CHECK-LABEL: select_v4i32:
268 ; CHECK-NEXT: tst w0, #0x1
269 ; CHECK-NEXT: csetm w8, ne
270 ; CHECK-NEXT: dup v2.4s, w8
271 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
273 %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
277 define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
278 ; CHECK-LABEL: select_v8i32:
280 ; CHECK-NEXT: and w8, w2, #0x1
281 ; CHECK-NEXT: ptrue p0.s
282 ; CHECK-NEXT: mov z0.s, w8
283 ; CHECK-NEXT: ptrue p1.s, vl8
284 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
285 ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
286 ; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
287 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
288 ; CHECK-NEXT: st1w { z0.s }, p1, [x0]
290 %op1 = load volatile <8 x i32>, ptr %a
291 %op2 = load volatile <8 x i32>, ptr %b
292 %sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
293 store <8 x i32> %sel, ptr %a
297 define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
298 ; VBITS_GE_256-LABEL: select_v16i32:
299 ; VBITS_GE_256: // %bb.0:
300 ; VBITS_GE_256-NEXT: and w8, w2, #0x1
301 ; VBITS_GE_256-NEXT: ptrue p0.s
302 ; VBITS_GE_256-NEXT: mov z0.s, w8
303 ; VBITS_GE_256-NEXT: ptrue p1.s, vl8
304 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
305 ; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0
306 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
307 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0]
308 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p1/z, [x1, x8, lsl #2]
309 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p1/z, [x1]
310 ; VBITS_GE_256-NEXT: sel z0.s, p0, z0.s, z2.s
311 ; VBITS_GE_256-NEXT: sel z1.s, p0, z1.s, z3.s
312 ; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2]
313 ; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0]
314 ; VBITS_GE_256-NEXT: ret
316 ; VBITS_GE_512-LABEL: select_v16i32:
317 ; VBITS_GE_512: // %bb.0:
318 ; VBITS_GE_512-NEXT: and w8, w2, #0x1
319 ; VBITS_GE_512-NEXT: ptrue p0.s
320 ; VBITS_GE_512-NEXT: mov z0.s, w8
321 ; VBITS_GE_512-NEXT: ptrue p1.s, vl16
322 ; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, #0
323 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
324 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x1]
325 ; VBITS_GE_512-NEXT: sel z0.s, p0, z0.s, z1.s
326 ; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x0]
327 ; VBITS_GE_512-NEXT: ret
328 %op1 = load volatile <16 x i32>, ptr %a
329 %op2 = load volatile <16 x i32>, ptr %b
330 %sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
331 store <16 x i32> %sel, ptr %a
335 define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
336 ; CHECK-LABEL: select_v32i32:
338 ; CHECK-NEXT: and w8, w2, #0x1
339 ; CHECK-NEXT: ptrue p0.s
340 ; CHECK-NEXT: mov z0.s, w8
341 ; CHECK-NEXT: ptrue p1.s, vl32
342 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
343 ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
344 ; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
345 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
346 ; CHECK-NEXT: st1w { z0.s }, p1, [x0]
348 %op1 = load volatile <32 x i32>, ptr %a
349 %op2 = load volatile <32 x i32>, ptr %b
350 %sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
351 store <32 x i32> %sel, ptr %a
355 define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
356 ; CHECK-LABEL: select_v64i32:
358 ; CHECK-NEXT: and w8, w2, #0x1
359 ; CHECK-NEXT: ptrue p0.s
360 ; CHECK-NEXT: mov z0.s, w8
361 ; CHECK-NEXT: ptrue p1.s, vl64
362 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
363 ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
364 ; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
365 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
366 ; CHECK-NEXT: st1w { z0.s }, p1, [x0]
368 %op1 = load volatile <64 x i32>, ptr %a
369 %op2 = load volatile <64 x i32>, ptr %b
370 %sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
371 store <64 x i32> %sel, ptr %a
375 ; Don't use SVE for 64-bit vectors.
376 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
377 ; CHECK-LABEL: select_v1i64:
379 ; CHECK-NEXT: tst w0, #0x1
380 ; CHECK-NEXT: csetm x8, ne
381 ; CHECK-NEXT: fmov d2, x8
382 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
384 %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
388 ; Don't use SVE for 128-bit vectors.
389 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
390 ; CHECK-LABEL: select_v2i64:
392 ; CHECK-NEXT: tst w0, #0x1
393 ; CHECK-NEXT: csetm x8, ne
394 ; CHECK-NEXT: dup v2.2d, x8
395 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
397 %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
401 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
402 ; CHECK-LABEL: select_v4i64:
404 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
405 ; CHECK-NEXT: and x8, x2, #0x1
406 ; CHECK-NEXT: ptrue p0.d
407 ; CHECK-NEXT: mov z0.d, x8
408 ; CHECK-NEXT: ptrue p1.d, vl4
409 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
410 ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
411 ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
412 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
413 ; CHECK-NEXT: st1d { z0.d }, p1, [x0]
415 %op1 = load volatile <4 x i64>, ptr %a
416 %op2 = load volatile <4 x i64>, ptr %b
417 %sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
418 store <4 x i64> %sel, ptr %a
422 define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
423 ; VBITS_GE_256-LABEL: select_v8i64:
424 ; VBITS_GE_256: // %bb.0:
425 ; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2
426 ; VBITS_GE_256-NEXT: and x8, x2, #0x1
427 ; VBITS_GE_256-NEXT: ptrue p0.d
428 ; VBITS_GE_256-NEXT: mov z0.d, x8
429 ; VBITS_GE_256-NEXT: ptrue p1.d, vl4
430 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
431 ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
432 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
433 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0]
434 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3]
435 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1]
436 ; VBITS_GE_256-NEXT: sel z0.d, p0, z0.d, z2.d
437 ; VBITS_GE_256-NEXT: sel z1.d, p0, z1.d, z3.d
438 ; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
439 ; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0]
440 ; VBITS_GE_256-NEXT: ret
442 ; VBITS_GE_512-LABEL: select_v8i64:
443 ; VBITS_GE_512: // %bb.0:
444 ; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2
445 ; VBITS_GE_512-NEXT: and x8, x2, #0x1
446 ; VBITS_GE_512-NEXT: ptrue p0.d
447 ; VBITS_GE_512-NEXT: mov z0.d, x8
448 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8
449 ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0
450 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
451 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1]
452 ; VBITS_GE_512-NEXT: sel z0.d, p0, z0.d, z1.d
453 ; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x0]
454 ; VBITS_GE_512-NEXT: ret
455 %op1 = load volatile <8 x i64>, ptr %a
456 %op2 = load volatile <8 x i64>, ptr %b
457 %sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
458 store <8 x i64> %sel, ptr %a
462 define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
463 ; CHECK-LABEL: select_v16i64:
465 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
466 ; CHECK-NEXT: and x8, x2, #0x1
467 ; CHECK-NEXT: ptrue p0.d
468 ; CHECK-NEXT: mov z0.d, x8
469 ; CHECK-NEXT: ptrue p1.d, vl16
470 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
471 ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
472 ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
473 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
474 ; CHECK-NEXT: st1d { z0.d }, p1, [x0]
476 %op1 = load volatile <16 x i64>, ptr %a
477 %op2 = load volatile <16 x i64>, ptr %b
478 %sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
479 store <16 x i64> %sel, ptr %a
483 define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
484 ; CHECK-LABEL: select_v32i64:
486 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
487 ; CHECK-NEXT: and x8, x2, #0x1
488 ; CHECK-NEXT: ptrue p0.d
489 ; CHECK-NEXT: mov z0.d, x8
490 ; CHECK-NEXT: ptrue p1.d, vl32
491 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
492 ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
493 ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
494 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
495 ; CHECK-NEXT: st1d { z0.d }, p1, [x0]
497 %op1 = load volatile <32 x i64>, ptr %a
498 %op2 = load volatile <32 x i64>, ptr %b
499 %sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
500 store <32 x i64> %sel, ptr %a
504 attributes #0 = { "target-features"="+sve" }