1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
6 target triple = "aarch64-unknown-linux-gnu"
8 define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) {
9 ; CHECK-LABEL: select_v2f16:
11 ; CHECK-NEXT: sub sp, sp, #16
12 ; CHECK-NEXT: .cfi_def_cfa_offset 16
13 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
14 ; CHECK-NEXT: mov z3.s, z2.s[1]
15 ; CHECK-NEXT: fmov w8, s2
16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
17 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
18 ; CHECK-NEXT: ptrue p0.h
19 ; CHECK-NEXT: strh w8, [sp, #8]
20 ; CHECK-NEXT: fmov w8, s3
21 ; CHECK-NEXT: strh w8, [sp, #10]
22 ; CHECK-NEXT: ldr d2, [sp, #8]
23 ; CHECK-NEXT: lsl z2.h, z2.h, #15
24 ; CHECK-NEXT: asr z2.h, z2.h, #15
25 ; CHECK-NEXT: and z2.h, z2.h, #0x1
26 ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
27 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
28 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
29 ; CHECK-NEXT: add sp, sp, #16
32 ; NONEON-NOSVE-LABEL: select_v2f16:
33 ; NONEON-NOSVE: // %bb.0:
34 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
35 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
36 ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
37 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
38 ; NONEON-NOSVE-NEXT: str d0, [sp]
39 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
40 ; NONEON-NOSVE-NEXT: ldr w10, [sp, #12]
41 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
42 ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1
43 ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
44 ; NONEON-NOSVE-NEXT: str w10, [sp, #28]
45 ; NONEON-NOSVE-NEXT: tst w9, #0xffff
46 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
47 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
48 ; NONEON-NOSVE-NEXT: tst w8, #0xffff
49 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
50 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
51 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
52 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
53 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
54 ; NONEON-NOSVE-NEXT: add sp, sp, #32
55 ; NONEON-NOSVE-NEXT: ret
56 %sel = select <2 x i1> %mask, <2 x half> %op1, <2 x half> %op2
60 define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) {
61 ; CHECK-LABEL: select_v4f16:
63 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
64 ; CHECK-NEXT: ptrue p0.h
65 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
66 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
67 ; CHECK-NEXT: lsl z2.h, z2.h, #15
68 ; CHECK-NEXT: asr z2.h, z2.h, #15
69 ; CHECK-NEXT: and z2.h, z2.h, #0x1
70 ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
71 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
72 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
75 ; NONEON-NOSVE-LABEL: select_v4f16:
76 ; NONEON-NOSVE: // %bb.0:
77 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
78 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
79 ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
80 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22]
81 ; NONEON-NOSVE-NEXT: str d0, [sp]
82 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
83 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
84 ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #20]
85 ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #18]
86 ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1
87 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
88 ; NONEON-NOSVE-NEXT: tst w9, #0xffff
89 ; NONEON-NOSVE-NEXT: sbfx w9, w11, #0, #1
90 ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
91 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
92 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
93 ; NONEON-NOSVE-NEXT: tst w9, #0xffff
94 ; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1
95 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
96 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
97 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
98 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
99 ; NONEON-NOSVE-NEXT: tst w9, #0xffff
100 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
101 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
102 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
103 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
104 ; NONEON-NOSVE-NEXT: tst w8, #0xffff
105 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
106 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
107 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
108 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
109 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
110 ; NONEON-NOSVE-NEXT: add sp, sp, #32
111 ; NONEON-NOSVE-NEXT: ret
112 %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
116 define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) {
117 ; CHECK-LABEL: select_v8f16:
119 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
120 ; CHECK-NEXT: ptrue p0.h
121 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
122 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
123 ; CHECK-NEXT: uunpklo z2.h, z2.b
124 ; CHECK-NEXT: lsl z2.h, z2.h, #15
125 ; CHECK-NEXT: asr z2.h, z2.h, #15
126 ; CHECK-NEXT: and z2.h, z2.h, #0x1
127 ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
128 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
129 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
132 ; NONEON-NOSVE-LABEL: select_v8f16:
133 ; NONEON-NOSVE: // %bb.0:
134 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
135 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
136 ; NONEON-NOSVE-NEXT: str d2, [sp, #40]
137 ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #47]
138 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
139 ; NONEON-NOSVE-NEXT: ldrb w15, [sp, #46]
140 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
141 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
142 ; NONEON-NOSVE-NEXT: ldrb w14, [sp, #45]
143 ; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1
144 ; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
145 ; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
146 ; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
147 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41]
148 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40]
149 ; NONEON-NOSVE-NEXT: tst w13, #0xffff
150 ; NONEON-NOSVE-NEXT: sbfx w13, w15, #0, #1
151 ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1
152 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
153 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
154 ; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1
155 ; NONEON-NOSVE-NEXT: tst w13, #0xffff
156 ; NONEON-NOSVE-NEXT: sbfx w13, w14, #0, #1
157 ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1
158 ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1
159 ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
160 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
161 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
162 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
163 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
164 ; NONEON-NOSVE-NEXT: tst w13, #0xffff
165 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
166 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
167 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
168 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
169 ; NONEON-NOSVE-NEXT: tst w12, #0xffff
170 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
171 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
172 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
173 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
174 ; NONEON-NOSVE-NEXT: tst w11, #0xffff
175 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
176 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
177 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
178 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
179 ; NONEON-NOSVE-NEXT: tst w10, #0xffff
180 ; NONEON-NOSVE-NEXT: str h0, [sp, #54]
181 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
182 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
183 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
184 ; NONEON-NOSVE-NEXT: tst w9, #0xffff
185 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
186 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
187 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
188 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
189 ; NONEON-NOSVE-NEXT: tst w8, #0xffff
190 ; NONEON-NOSVE-NEXT: str h0, [sp, #50]
191 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
192 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
193 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
194 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
195 ; NONEON-NOSVE-NEXT: add sp, sp, #64
196 ; NONEON-NOSVE-NEXT: ret
197 %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
201 define void @select_v16f16(ptr %a, ptr %b) {
202 ; CHECK-LABEL: select_v16f16:
204 ; CHECK-NEXT: ldp q0, q2, [x0]
205 ; CHECK-NEXT: ptrue p0.h, vl8
206 ; CHECK-NEXT: ldp q1, q3, [x1]
207 ; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
208 ; CHECK-NEXT: fcmeq p0.h, p0/z, z2.h, z3.h
209 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
210 ; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h
211 ; CHECK-NEXT: stp q0, q1, [x0]
214 ; NONEON-NOSVE-LABEL: select_v16f16:
215 ; NONEON-NOSVE: // %bb.0:
216 ; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
217 ; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
218 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
219 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
220 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
221 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
222 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32]
223 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #20]
224 ; NONEON-NOSVE-NEXT: ldr h5, [sp, #4]
225 ; NONEON-NOSVE-NEXT: ldr h16, [sp, #22]
226 ; NONEON-NOSVE-NEXT: fcvt s2, h0
227 ; NONEON-NOSVE-NEXT: fcvt s3, h1
228 ; NONEON-NOSVE-NEXT: ldr h17, [sp, #6]
229 ; NONEON-NOSVE-NEXT: fcvt s6, h4
230 ; NONEON-NOSVE-NEXT: fcvt s7, h5
231 ; NONEON-NOSVE-NEXT: ldr h19, [sp, #8]
232 ; NONEON-NOSVE-NEXT: fcvt s18, h17
233 ; NONEON-NOSVE-NEXT: ldr h21, [sp, #10]
234 ; NONEON-NOSVE-NEXT: ldr h22, [sp, #12]
235 ; NONEON-NOSVE-NEXT: fcvt s20, h19
236 ; NONEON-NOSVE-NEXT: ldr h24, [sp, #32]
237 ; NONEON-NOSVE-NEXT: ldr h25, [sp, #34]
238 ; NONEON-NOSVE-NEXT: fcmp s3, s2
239 ; NONEON-NOSVE-NEXT: fcvt s2, h16
240 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #24]
241 ; NONEON-NOSVE-NEXT: ldr h26, [sp, #36]
242 ; NONEON-NOSVE-NEXT: ldr h27, [sp, #38]
243 ; NONEON-NOSVE-NEXT: ldr h28, [sp, #42]
244 ; NONEON-NOSVE-NEXT: ldr h29, [sp, #44]
245 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, eq
246 ; NONEON-NOSVE-NEXT: fcmp s7, s6
247 ; NONEON-NOSVE-NEXT: fcvt s7, h3
248 ; NONEON-NOSVE-NEXT: ldr h6, [sp, #26]
249 ; NONEON-NOSVE-NEXT: fcsel s1, s5, s4, eq
250 ; NONEON-NOSVE-NEXT: fcmp s18, s2
251 ; NONEON-NOSVE-NEXT: fcvt s4, h6
252 ; NONEON-NOSVE-NEXT: fcvt s18, h21
253 ; NONEON-NOSVE-NEXT: ldr h5, [sp, #28]
254 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
255 ; NONEON-NOSVE-NEXT: fcsel s2, s17, s16, eq
256 ; NONEON-NOSVE-NEXT: fcmp s20, s7
257 ; NONEON-NOSVE-NEXT: fcvt s16, h5
258 ; NONEON-NOSVE-NEXT: fcvt s17, h22
259 ; NONEON-NOSVE-NEXT: ldr h7, [sp, #30]
260 ; NONEON-NOSVE-NEXT: ldr h20, [sp, #14]
261 ; NONEON-NOSVE-NEXT: str h1, [sp, #68]
262 ; NONEON-NOSVE-NEXT: fcsel s3, s19, s3, eq
263 ; NONEON-NOSVE-NEXT: fcmp s18, s4
264 ; NONEON-NOSVE-NEXT: fcvt s19, h7
265 ; NONEON-NOSVE-NEXT: fcvt s23, h20
266 ; NONEON-NOSVE-NEXT: ldr h18, [sp, #48]
267 ; NONEON-NOSVE-NEXT: str h2, [sp, #70]
268 ; NONEON-NOSVE-NEXT: fcsel s4, s21, s6, eq
269 ; NONEON-NOSVE-NEXT: fcmp s17, s16
270 ; NONEON-NOSVE-NEXT: fcvt s17, h18
271 ; NONEON-NOSVE-NEXT: fcvt s21, h24
272 ; NONEON-NOSVE-NEXT: ldr h16, [sp, #50]
273 ; NONEON-NOSVE-NEXT: str h3, [sp, #72]
274 ; NONEON-NOSVE-NEXT: fcsel s5, s22, s5, eq
275 ; NONEON-NOSVE-NEXT: fcmp s23, s19
276 ; NONEON-NOSVE-NEXT: fcvt s22, h16
277 ; NONEON-NOSVE-NEXT: fcvt s23, h25
278 ; NONEON-NOSVE-NEXT: ldr h19, [sp, #52]
279 ; NONEON-NOSVE-NEXT: str h4, [sp, #74]
280 ; NONEON-NOSVE-NEXT: fcsel s6, s20, s7, eq
281 ; NONEON-NOSVE-NEXT: fcmp s21, s17
282 ; NONEON-NOSVE-NEXT: fcvt s20, h19
283 ; NONEON-NOSVE-NEXT: fcvt s21, h26
284 ; NONEON-NOSVE-NEXT: ldr h17, [sp, #54]
285 ; NONEON-NOSVE-NEXT: str h5, [sp, #76]
286 ; NONEON-NOSVE-NEXT: fcsel s7, s24, s18, eq
287 ; NONEON-NOSVE-NEXT: fcmp s23, s22
288 ; NONEON-NOSVE-NEXT: fcvt s22, h17
289 ; NONEON-NOSVE-NEXT: fcvt s23, h27
290 ; NONEON-NOSVE-NEXT: ldr h18, [sp, #56]
291 ; NONEON-NOSVE-NEXT: ldr h24, [sp, #40]
292 ; NONEON-NOSVE-NEXT: str h6, [sp, #78]
293 ; NONEON-NOSVE-NEXT: fcsel s16, s25, s16, eq
294 ; NONEON-NOSVE-NEXT: fcmp s21, s20
295 ; NONEON-NOSVE-NEXT: fcvt s21, h18
296 ; NONEON-NOSVE-NEXT: fcvt s25, h24
297 ; NONEON-NOSVE-NEXT: ldr h20, [sp, #58]
298 ; NONEON-NOSVE-NEXT: str h7, [sp, #80]
299 ; NONEON-NOSVE-NEXT: fcsel s19, s26, s19, eq
300 ; NONEON-NOSVE-NEXT: fcmp s23, s22
301 ; NONEON-NOSVE-NEXT: fcvt s23, h20
302 ; NONEON-NOSVE-NEXT: fcvt s26, h28
303 ; NONEON-NOSVE-NEXT: ldr h22, [sp, #60]
304 ; NONEON-NOSVE-NEXT: str h16, [sp, #82]
305 ; NONEON-NOSVE-NEXT: fcsel s17, s27, s17, eq
306 ; NONEON-NOSVE-NEXT: fcmp s25, s21
307 ; NONEON-NOSVE-NEXT: fcvt s25, h22
308 ; NONEON-NOSVE-NEXT: fcvt s27, h29
309 ; NONEON-NOSVE-NEXT: ldr h21, [sp, #62]
310 ; NONEON-NOSVE-NEXT: str h19, [sp, #84]
311 ; NONEON-NOSVE-NEXT: fcsel s18, s24, s18, eq
312 ; NONEON-NOSVE-NEXT: ldr h24, [sp, #46]
313 ; NONEON-NOSVE-NEXT: fcmp s26, s23
314 ; NONEON-NOSVE-NEXT: fcvt s23, h21
315 ; NONEON-NOSVE-NEXT: str h17, [sp, #86]
316 ; NONEON-NOSVE-NEXT: fcvt s26, h24
317 ; NONEON-NOSVE-NEXT: fcsel s20, s28, s20, eq
318 ; NONEON-NOSVE-NEXT: fcmp s27, s25
319 ; NONEON-NOSVE-NEXT: ldr h25, [sp, #16]
320 ; NONEON-NOSVE-NEXT: ldr h27, [sp]
321 ; NONEON-NOSVE-NEXT: str h18, [sp, #88]
322 ; NONEON-NOSVE-NEXT: fcvt s17, h25
323 ; NONEON-NOSVE-NEXT: fcvt s18, h27
324 ; NONEON-NOSVE-NEXT: fcsel s7, s29, s22, eq
325 ; NONEON-NOSVE-NEXT: fcmp s26, s23
326 ; NONEON-NOSVE-NEXT: str h20, [sp, #90]
327 ; NONEON-NOSVE-NEXT: fcsel s16, s24, s21, eq
328 ; NONEON-NOSVE-NEXT: str h7, [sp, #92]
329 ; NONEON-NOSVE-NEXT: fcmp s18, s17
330 ; NONEON-NOSVE-NEXT: str h16, [sp, #94]
331 ; NONEON-NOSVE-NEXT: fcsel s2, s27, s25, eq
332 ; NONEON-NOSVE-NEXT: str h2, [sp, #64]
333 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
334 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
335 ; NONEON-NOSVE-NEXT: add sp, sp, #96
336 ; NONEON-NOSVE-NEXT: ret
337 %op1 = load <16 x half>, ptr %a
338 %op2 = load <16 x half>, ptr %b
339 %mask = fcmp oeq <16 x half> %op1, %op2
340 %sel = select <16 x i1> %mask, <16 x half> %op1, <16 x half> %op2
341 store <16 x half> %sel, ptr %a
345 define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) {
346 ; CHECK-LABEL: select_v2f32:
348 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
349 ; CHECK-NEXT: ptrue p0.s
350 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
351 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
352 ; CHECK-NEXT: lsl z2.s, z2.s, #31
353 ; CHECK-NEXT: asr z2.s, z2.s, #31
354 ; CHECK-NEXT: and z2.s, z2.s, #0x1
355 ; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
356 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
357 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
360 ; NONEON-NOSVE-LABEL: select_v2f32:
361 ; NONEON-NOSVE: // %bb.0:
362 ; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #-32]!
363 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
364 ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp]
365 ; NONEON-NOSVE-NEXT: str d1, [sp, #16]
366 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
367 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
368 ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
369 ; NONEON-NOSVE-NEXT: cmp w8, #0
370 ; NONEON-NOSVE-NEXT: sbfx w8, w9, #0, #1
371 ; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne
372 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
373 ; NONEON-NOSVE-NEXT: cmp w8, #0
374 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
375 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
376 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
377 ; NONEON-NOSVE-NEXT: add sp, sp, #32
378 ; NONEON-NOSVE-NEXT: ret
379 %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
383 define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) {
384 ; CHECK-LABEL: select_v4f32:
386 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
387 ; CHECK-NEXT: ptrue p0.s
388 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
389 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
390 ; CHECK-NEXT: uunpklo z2.s, z2.h
391 ; CHECK-NEXT: lsl z2.s, z2.s, #31
392 ; CHECK-NEXT: asr z2.s, z2.s, #31
393 ; CHECK-NEXT: and z2.s, z2.s, #0x1
394 ; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
395 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
396 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
399 ; NONEON-NOSVE-LABEL: select_v4f32:
400 ; NONEON-NOSVE: // %bb.0:
401 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
402 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
403 ; NONEON-NOSVE-NEXT: str d2, [sp, #8]
404 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
405 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16]
406 ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12]
407 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #24]
408 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
409 ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1
410 ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10]
411 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
412 ; NONEON-NOSVE-NEXT: cmp w9, #0
413 ; NONEON-NOSVE-NEXT: sbfx w9, w11, #0, #1
414 ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
415 ; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne
416 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
417 ; NONEON-NOSVE-NEXT: cmp w9, #0
418 ; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1
419 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
420 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #16]
421 ; NONEON-NOSVE-NEXT: cmp w9, #0
422 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #56]
423 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
424 ; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne
425 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
426 ; NONEON-NOSVE-NEXT: cmp w8, #0
427 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
428 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #48]
429 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
430 ; NONEON-NOSVE-NEXT: add sp, sp, #64
431 ; NONEON-NOSVE-NEXT: ret
432 %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
436 define void @select_v8f32(ptr %a, ptr %b) {
437 ; CHECK-LABEL: select_v8f32:
439 ; CHECK-NEXT: ldp q0, q2, [x0]
440 ; CHECK-NEXT: ptrue p0.s, vl4
441 ; CHECK-NEXT: ldp q1, q3, [x1]
442 ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
443 ; CHECK-NEXT: fcmeq p0.s, p0/z, z2.s, z3.s
444 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
445 ; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s
446 ; CHECK-NEXT: stp q0, q1, [x0]
449 ; NONEON-NOSVE-LABEL: select_v8f32:
450 ; NONEON-NOSVE: // %bb.0:
451 ; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
452 ; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
453 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
454 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
455 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32]
456 ; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #20]
457 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #4]
458 ; NONEON-NOSVE-NEXT: ldr s4, [sp, #12]
459 ; NONEON-NOSVE-NEXT: ldr s17, [sp]
460 ; NONEON-NOSVE-NEXT: ldp s6, s7, [sp, #36]
461 ; NONEON-NOSVE-NEXT: fcmp s1, s0
462 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, eq
463 ; NONEON-NOSVE-NEXT: fcmp s3, s2
464 ; NONEON-NOSVE-NEXT: ldp s1, s5, [sp, #28]
465 ; NONEON-NOSVE-NEXT: fcsel s2, s3, s2, eq
466 ; NONEON-NOSVE-NEXT: ldp s16, s3, [sp, #44]
467 ; NONEON-NOSVE-NEXT: fcmp s4, s1
468 ; NONEON-NOSVE-NEXT: fcsel s1, s4, s1, eq
469 ; NONEON-NOSVE-NEXT: fcmp s5, s3
470 ; NONEON-NOSVE-NEXT: ldr s4, [sp, #52]
471 ; NONEON-NOSVE-NEXT: fcsel s3, s5, s3, eq
472 ; NONEON-NOSVE-NEXT: fcmp s6, s4
473 ; NONEON-NOSVE-NEXT: ldr s5, [sp, #56]
474 ; NONEON-NOSVE-NEXT: stp s2, s1, [sp, #72]
475 ; NONEON-NOSVE-NEXT: fcsel s4, s6, s4, eq
476 ; NONEON-NOSVE-NEXT: fcmp s7, s5
477 ; NONEON-NOSVE-NEXT: ldr s6, [sp, #60]
478 ; NONEON-NOSVE-NEXT: fcsel s5, s7, s5, eq
479 ; NONEON-NOSVE-NEXT: fcmp s16, s6
480 ; NONEON-NOSVE-NEXT: ldr s7, [sp, #16]
481 ; NONEON-NOSVE-NEXT: stp s3, s4, [sp, #80]
482 ; NONEON-NOSVE-NEXT: fcsel s6, s16, s6, eq
483 ; NONEON-NOSVE-NEXT: fcmp s17, s7
484 ; NONEON-NOSVE-NEXT: fcsel s3, s17, s7, eq
485 ; NONEON-NOSVE-NEXT: stp s5, s6, [sp, #88]
486 ; NONEON-NOSVE-NEXT: stp s3, s0, [sp, #64]
487 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
488 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
489 ; NONEON-NOSVE-NEXT: add sp, sp, #96
490 ; NONEON-NOSVE-NEXT: ret
491 %op1 = load <8 x float>, ptr %a
492 %op2 = load <8 x float>, ptr %b
493 %mask = fcmp oeq <8 x float> %op1, %op2
494 %sel = select <8 x i1> %mask, <8 x float> %op1, <8 x float> %op2
495 store <8 x float> %sel, ptr %a
499 define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> %mask) {
500 ; CHECK-LABEL: select_v1f64:
502 ; CHECK-NEXT: tst w0, #0x1
503 ; CHECK-NEXT: fcsel d0, d0, d1, ne
506 ; NONEON-NOSVE-LABEL: select_v1f64:
507 ; NONEON-NOSVE: // %bb.0:
508 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
509 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
510 ; NONEON-NOSVE-NEXT: tst w0, #0x1
511 ; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne
512 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
513 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
514 ; NONEON-NOSVE-NEXT: add sp, sp, #16
515 ; NONEON-NOSVE-NEXT: ret
516 %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
517 ret <1 x double> %sel
520 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) {
521 ; CHECK-LABEL: select_v2f64:
523 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
524 ; CHECK-NEXT: ptrue p0.d
525 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
526 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
527 ; CHECK-NEXT: uunpklo z2.d, z2.s
528 ; CHECK-NEXT: lsl z2.d, z2.d, #63
529 ; CHECK-NEXT: asr z2.d, z2.d, #63
530 ; CHECK-NEXT: and z2.d, z2.d, #0x1
531 ; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
532 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
533 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
536 ; NONEON-NOSVE-LABEL: select_v2f64:
537 ; NONEON-NOSVE: // %bb.0:
538 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
539 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
540 ; NONEON-NOSVE-NEXT: str d2, [sp, #8]
541 ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8]
542 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16]
543 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #16]
544 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
545 ; NONEON-NOSVE-NEXT: sbfx x8, x8, #0, #1
546 ; NONEON-NOSVE-NEXT: cmp x8, #0
547 ; NONEON-NOSVE-NEXT: sbfx x8, x9, #0, #1
548 ; NONEON-NOSVE-NEXT: fcsel d3, d2, d0, ne
549 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
550 ; NONEON-NOSVE-NEXT: cmp x8, #0
551 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
552 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #48]
553 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
554 ; NONEON-NOSVE-NEXT: add sp, sp, #64
555 ; NONEON-NOSVE-NEXT: ret
556 %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2
557 ret <2 x double> %sel
560 define void @select_v4f64(ptr %a, ptr %b) {
561 ; CHECK-LABEL: select_v4f64:
563 ; CHECK-NEXT: ldp q0, q2, [x0]
564 ; CHECK-NEXT: ptrue p0.d, vl2
565 ; CHECK-NEXT: ldp q1, q3, [x1]
566 ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
567 ; CHECK-NEXT: fcmeq p0.d, p0/z, z2.d, z3.d
568 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
569 ; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d
570 ; CHECK-NEXT: stp q0, q1, [x0]
573 ; NONEON-NOSVE-LABEL: select_v4f64:
574 ; NONEON-NOSVE: // %bb.0:
575 ; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
576 ; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
577 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
578 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
579 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32]
580 ; NONEON-NOSVE-NEXT: ldp d5, d1, [sp]
581 ; NONEON-NOSVE-NEXT: ldp d0, d3, [sp, #24]
582 ; NONEON-NOSVE-NEXT: ldp d4, d2, [sp, #40]
583 ; NONEON-NOSVE-NEXT: fcmp d1, d0
584 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, eq
585 ; NONEON-NOSVE-NEXT: fcmp d3, d2
586 ; NONEON-NOSVE-NEXT: ldr d1, [sp, #56]
587 ; NONEON-NOSVE-NEXT: fcsel d2, d3, d2, eq
588 ; NONEON-NOSVE-NEXT: fcmp d4, d1
589 ; NONEON-NOSVE-NEXT: ldr d3, [sp, #16]
590 ; NONEON-NOSVE-NEXT: fcsel d1, d4, d1, eq
591 ; NONEON-NOSVE-NEXT: fcmp d5, d3
592 ; NONEON-NOSVE-NEXT: fcsel d3, d5, d3, eq
593 ; NONEON-NOSVE-NEXT: stp d2, d1, [sp, #80]
594 ; NONEON-NOSVE-NEXT: stp d3, d0, [sp, #64]
595 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
596 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
597 ; NONEON-NOSVE-NEXT: add sp, sp, #96
598 ; NONEON-NOSVE-NEXT: ret
599 %op1 = load <4 x double>, ptr %a
600 %op2 = load <4 x double>, ptr %b
601 %mask = fcmp oeq <4 x double> %op1, %op2
602 %sel = select <4 x i1> %mask, <4 x double> %op1, <4 x double> %op2
603 store <4 x double> %sel, ptr %a