1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
8 ; Don't use SVE for 64-bit vectors.
9 define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
10 ; CHECK-LABEL: select_v8i8:
12 ; CHECK-NEXT: tst w0, #0x1
13 ; CHECK-NEXT: csetm w8, ne
14 ; CHECK-NEXT: dup v2.8b, w8
15 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
17 %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
21 ; Don't use SVE for 128-bit vectors.
22 define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
23 ; CHECK-LABEL: select_v16i8:
25 ; CHECK-NEXT: tst w0, #0x1
26 ; CHECK-NEXT: csetm w8, ne
27 ; CHECK-NEXT: dup v2.16b, w8
28 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
30 %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
34 define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
35 ; CHECK-LABEL: select_v32i8:
37 ; CHECK-NEXT: ptrue p0.b, vl32
38 ; CHECK-NEXT: mov z0.b, w2
39 ; CHECK-NEXT: ptrue p1.b
40 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0]
41 ; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1]
42 ; CHECK-NEXT: cmpne p1.b, p1/z, z0.b, #0
43 ; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b
44 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
46 %op1 = load volatile <32 x i8>, ptr %a
47 %op2 = load volatile <32 x i8>, ptr %b
48 %sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
49 store <32 x i8> %sel, ptr %a
53 define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
54 ; VBITS_GE_256-LABEL: select_v64i8:
55 ; VBITS_GE_256: // %bb.0:
56 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32
57 ; VBITS_GE_256-NEXT: mov z0.b, w2
58 ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
59 ; VBITS_GE_256-NEXT: ptrue p1.b
60 ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0, x8]
61 ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0]
62 ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1, x8]
63 ; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z0.b, #0
64 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x1]
65 ; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b
66 ; VBITS_GE_256-NEXT: mov z0.b, p1/m, z2.b
67 ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0, x8]
68 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0]
69 ; VBITS_GE_256-NEXT: ret
71 ; VBITS_GE_512-LABEL: select_v64i8:
72 ; VBITS_GE_512: // %bb.0:
73 ; VBITS_GE_512-NEXT: ptrue p0.b, vl64
74 ; VBITS_GE_512-NEXT: mov z0.b, w2
75 ; VBITS_GE_512-NEXT: ptrue p1.b
76 ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x0]
77 ; VBITS_GE_512-NEXT: ld1b { z2.b }, p0/z, [x1]
78 ; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z0.b, #0
79 ; VBITS_GE_512-NEXT: sel z0.b, p1, z1.b, z2.b
80 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
81 ; VBITS_GE_512-NEXT: ret
82 %op1 = load volatile <64 x i8>, ptr %a
83 %op2 = load volatile <64 x i8>, ptr %b
84 %sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
85 store <64 x i8> %sel, ptr %a
89 define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
90 ; CHECK-LABEL: select_v128i8:
92 ; CHECK-NEXT: ptrue p0.b, vl128
93 ; CHECK-NEXT: mov z0.b, w2
94 ; CHECK-NEXT: ptrue p1.b
95 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0]
96 ; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1]
97 ; CHECK-NEXT: cmpne p1.b, p1/z, z0.b, #0
98 ; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b
99 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
101 %op1 = load volatile <128 x i8>, ptr %a
102 %op2 = load volatile <128 x i8>, ptr %b
103 %sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
104 store <128 x i8> %sel, ptr %a
108 define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
109 ; CHECK-LABEL: select_v256i8:
111 ; CHECK-NEXT: ptrue p0.b, vl256
112 ; CHECK-NEXT: mov z0.b, w2
113 ; CHECK-NEXT: ptrue p1.b
114 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0]
115 ; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1]
116 ; CHECK-NEXT: cmpne p1.b, p1/z, z0.b, #0
117 ; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b
118 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
120 %op1 = load volatile <256 x i8>, ptr %a
121 %op2 = load volatile <256 x i8>, ptr %b
122 %sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
123 store <256 x i8> %sel, ptr %a
127 ; Don't use SVE for 64-bit vectors.
128 define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
129 ; CHECK-LABEL: select_v4i16:
131 ; CHECK-NEXT: tst w0, #0x1
132 ; CHECK-NEXT: csetm w8, ne
133 ; CHECK-NEXT: dup v2.4h, w8
134 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
136 %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
140 ; Don't use SVE for 128-bit vectors.
141 define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
142 ; CHECK-LABEL: select_v8i16:
144 ; CHECK-NEXT: tst w0, #0x1
145 ; CHECK-NEXT: csetm w8, ne
146 ; CHECK-NEXT: dup v2.8h, w8
147 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
149 %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
153 define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
154 ; CHECK-LABEL: select_v16i16:
156 ; CHECK-NEXT: ptrue p0.h, vl16
157 ; CHECK-NEXT: mov z0.h, w2
158 ; CHECK-NEXT: ptrue p1.h
159 ; CHECK-NEXT: and z0.h, z0.h, #0x1
160 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
161 ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1]
162 ; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0
163 ; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h
164 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
166 %op1 = load volatile <16 x i16>, ptr %a
167 %op2 = load volatile <16 x i16>, ptr %b
168 %sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
169 store <16 x i16> %sel, ptr %a
173 define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
174 ; VBITS_GE_256-LABEL: select_v32i16:
175 ; VBITS_GE_256: // %bb.0:
176 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
177 ; VBITS_GE_256-NEXT: mov z0.h, w2
178 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
179 ; VBITS_GE_256-NEXT: ptrue p1.h
180 ; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1
181 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
182 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0]
183 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1]
184 ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0
185 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1]
186 ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h
187 ; VBITS_GE_256-NEXT: mov z0.h, p1/m, z2.h
188 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1]
189 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
190 ; VBITS_GE_256-NEXT: ret
192 ; VBITS_GE_512-LABEL: select_v32i16:
193 ; VBITS_GE_512: // %bb.0:
194 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
195 ; VBITS_GE_512-NEXT: mov z0.h, w2
196 ; VBITS_GE_512-NEXT: ptrue p1.h
197 ; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1
198 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0]
199 ; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1]
200 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z0.h, #0
201 ; VBITS_GE_512-NEXT: sel z0.h, p1, z1.h, z2.h
202 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
203 ; VBITS_GE_512-NEXT: ret
204 %op1 = load volatile <32 x i16>, ptr %a
205 %op2 = load volatile <32 x i16>, ptr %b
206 %sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
207 store <32 x i16> %sel, ptr %a
211 define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
212 ; CHECK-LABEL: select_v64i16:
214 ; CHECK-NEXT: ptrue p0.h, vl64
215 ; CHECK-NEXT: mov z0.h, w2
216 ; CHECK-NEXT: ptrue p1.h
217 ; CHECK-NEXT: and z0.h, z0.h, #0x1
218 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
219 ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1]
220 ; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0
221 ; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h
222 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
224 %op1 = load volatile <64 x i16>, ptr %a
225 %op2 = load volatile <64 x i16>, ptr %b
226 %sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
227 store <64 x i16> %sel, ptr %a
231 define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
232 ; CHECK-LABEL: select_v128i16:
234 ; CHECK-NEXT: ptrue p0.h, vl128
235 ; CHECK-NEXT: mov z0.h, w2
236 ; CHECK-NEXT: ptrue p1.h
237 ; CHECK-NEXT: and z0.h, z0.h, #0x1
238 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
239 ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1]
240 ; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0
241 ; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h
242 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
244 %op1 = load volatile <128 x i16>, ptr %a
245 %op2 = load volatile <128 x i16>, ptr %b
246 %sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
247 store <128 x i16> %sel, ptr %a
251 ; Don't use SVE for 64-bit vectors.
252 define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
253 ; CHECK-LABEL: select_v2i32:
255 ; CHECK-NEXT: tst w0, #0x1
256 ; CHECK-NEXT: csetm w8, ne
257 ; CHECK-NEXT: dup v2.2s, w8
258 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
260 %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
264 ; Don't use SVE for 128-bit vectors.
265 define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
266 ; CHECK-LABEL: select_v4i32:
268 ; CHECK-NEXT: tst w0, #0x1
269 ; CHECK-NEXT: csetm w8, ne
270 ; CHECK-NEXT: dup v2.4s, w8
271 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
273 %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
277 define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
278 ; CHECK-LABEL: select_v8i32:
280 ; CHECK-NEXT: ptrue p0.s, vl8
281 ; CHECK-NEXT: and w8, w2, #0x1
282 ; CHECK-NEXT: ptrue p1.s
283 ; CHECK-NEXT: mov z0.s, w8
284 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
285 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
286 ; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0
287 ; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s
288 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
290 %op1 = load volatile <8 x i32>, ptr %a
291 %op2 = load volatile <8 x i32>, ptr %b
292 %sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
293 store <8 x i32> %sel, ptr %a
297 define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
298 ; VBITS_GE_256-LABEL: select_v16i32:
299 ; VBITS_GE_256: // %bb.0:
300 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
301 ; VBITS_GE_256-NEXT: and w8, w2, #0x1
302 ; VBITS_GE_256-NEXT: ptrue p1.s
303 ; VBITS_GE_256-NEXT: mov z0.s, w8
304 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
305 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
306 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
307 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1, x8, lsl #2]
308 ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z0.s, #0
309 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1]
310 ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
311 ; VBITS_GE_256-NEXT: mov z0.s, p1/m, z2.s
312 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2]
313 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
314 ; VBITS_GE_256-NEXT: ret
316 ; VBITS_GE_512-LABEL: select_v16i32:
317 ; VBITS_GE_512: // %bb.0:
318 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
319 ; VBITS_GE_512-NEXT: and w8, w2, #0x1
320 ; VBITS_GE_512-NEXT: ptrue p1.s
321 ; VBITS_GE_512-NEXT: mov z0.s, w8
322 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
323 ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1]
324 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z0.s, #0
325 ; VBITS_GE_512-NEXT: sel z0.s, p1, z1.s, z2.s
326 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
327 ; VBITS_GE_512-NEXT: ret
328 %op1 = load volatile <16 x i32>, ptr %a
329 %op2 = load volatile <16 x i32>, ptr %b
330 %sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
331 store <16 x i32> %sel, ptr %a
335 define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
336 ; CHECK-LABEL: select_v32i32:
338 ; CHECK-NEXT: ptrue p0.s, vl32
339 ; CHECK-NEXT: and w8, w2, #0x1
340 ; CHECK-NEXT: ptrue p1.s
341 ; CHECK-NEXT: mov z0.s, w8
342 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
343 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
344 ; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0
345 ; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s
346 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
348 %op1 = load volatile <32 x i32>, ptr %a
349 %op2 = load volatile <32 x i32>, ptr %b
350 %sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
351 store <32 x i32> %sel, ptr %a
355 define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
356 ; CHECK-LABEL: select_v64i32:
358 ; CHECK-NEXT: ptrue p0.s, vl64
359 ; CHECK-NEXT: and w8, w2, #0x1
360 ; CHECK-NEXT: ptrue p1.s
361 ; CHECK-NEXT: mov z0.s, w8
362 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
363 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
364 ; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0
365 ; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s
366 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
368 %op1 = load volatile <64 x i32>, ptr %a
369 %op2 = load volatile <64 x i32>, ptr %b
370 %sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
371 store <64 x i32> %sel, ptr %a
375 ; Don't use SVE for 64-bit vectors.
376 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
377 ; CHECK-LABEL: select_v1i64:
379 ; CHECK-NEXT: tst w0, #0x1
380 ; CHECK-NEXT: csetm x8, ne
381 ; CHECK-NEXT: fmov d2, x8
382 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
384 %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
388 ; Don't use SVE for 128-bit vectors.
389 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
390 ; CHECK-LABEL: select_v2i64:
392 ; CHECK-NEXT: tst w0, #0x1
393 ; CHECK-NEXT: csetm x8, ne
394 ; CHECK-NEXT: dup v2.2d, x8
395 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
397 %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
401 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
402 ; CHECK-LABEL: select_v4i64:
404 ; CHECK-NEXT: ptrue p0.d, vl4
405 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
406 ; CHECK-NEXT: and x8, x2, #0x1
407 ; CHECK-NEXT: ptrue p1.d
408 ; CHECK-NEXT: mov z0.d, x8
409 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
410 ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1]
411 ; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0
412 ; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
413 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
415 %op1 = load volatile <4 x i64>, ptr %a
416 %op2 = load volatile <4 x i64>, ptr %b
417 %sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
418 store <4 x i64> %sel, ptr %a
422 define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
423 ; VBITS_GE_256-LABEL: select_v8i64:
424 ; VBITS_GE_256: // %bb.0:
425 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
426 ; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2
427 ; VBITS_GE_256-NEXT: and x8, x2, #0x1
428 ; VBITS_GE_256-NEXT: ptrue p1.d
429 ; VBITS_GE_256-NEXT: mov z0.d, x8
430 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
431 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
432 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
433 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3]
434 ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z0.d, #0
435 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1]
436 ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
437 ; VBITS_GE_256-NEXT: mov z0.d, p1/m, z2.d
438 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3]
439 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
440 ; VBITS_GE_256-NEXT: ret
442 ; VBITS_GE_512-LABEL: select_v8i64:
443 ; VBITS_GE_512: // %bb.0:
444 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
445 ; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2
446 ; VBITS_GE_512-NEXT: and x8, x2, #0x1
447 ; VBITS_GE_512-NEXT: ptrue p1.d
448 ; VBITS_GE_512-NEXT: mov z0.d, x8
449 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0]
450 ; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1]
451 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0
452 ; VBITS_GE_512-NEXT: sel z0.d, p1, z1.d, z2.d
453 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
454 ; VBITS_GE_512-NEXT: ret
455 %op1 = load volatile <8 x i64>, ptr %a
456 %op2 = load volatile <8 x i64>, ptr %b
457 %sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
458 store <8 x i64> %sel, ptr %a
462 define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
463 ; CHECK-LABEL: select_v16i64:
465 ; CHECK-NEXT: ptrue p0.d, vl16
466 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
467 ; CHECK-NEXT: and x8, x2, #0x1
468 ; CHECK-NEXT: ptrue p1.d
469 ; CHECK-NEXT: mov z0.d, x8
470 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
471 ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1]
472 ; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0
473 ; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
474 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
476 %op1 = load volatile <16 x i64>, ptr %a
477 %op2 = load volatile <16 x i64>, ptr %b
478 %sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
479 store <16 x i64> %sel, ptr %a
483 define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
484 ; CHECK-LABEL: select_v32i64:
486 ; CHECK-NEXT: ptrue p0.d, vl32
487 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
488 ; CHECK-NEXT: and x8, x2, #0x1
489 ; CHECK-NEXT: ptrue p1.d
490 ; CHECK-NEXT: mov z0.d, x8
491 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
492 ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1]
493 ; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0
494 ; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
495 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
497 %op1 = load volatile <32 x i64>, ptr %a
498 %op2 = load volatile <32 x i64>, ptr %b
499 %sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
500 store <32 x i64> %sel, ptr %a
504 attributes #0 = { "target-features"="+sve" }