1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
4 define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
5 ; CHECK-LABEL: match_nxv16i8_v1i8:
7 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
8 ; CHECK-NEXT: mov z1.b, b1
9 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
11 %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask)
12 ret <vscale x 16 x i1> %r
15 define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
16 ; CHECK-LABEL: match_nxv16i8_v2i8:
18 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
19 ; CHECK-NEXT: mov w8, v1.s[1]
20 ; CHECK-NEXT: fmov w9, s1
21 ; CHECK-NEXT: ptrue p1.b
22 ; CHECK-NEXT: mov z2.b, w9
23 ; CHECK-NEXT: mov z1.b, w8
24 ; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
25 ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
26 ; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
27 ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
29 %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
30 ret <vscale x 16 x i1> %r
33 define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
34 ; CHECK-LABEL: match_nxv16i8_v4i8:
36 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
37 ; CHECK-NEXT: addvl sp, sp, #-1
38 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
39 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
40 ; CHECK-NEXT: .cfi_offset w29, -16
41 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
42 ; CHECK-NEXT: umov w8, v1.h[1]
43 ; CHECK-NEXT: umov w9, v1.h[0]
44 ; CHECK-NEXT: umov w10, v1.h[2]
45 ; CHECK-NEXT: ptrue p1.b
46 ; CHECK-NEXT: mov z2.b, w8
47 ; CHECK-NEXT: mov z3.b, w9
48 ; CHECK-NEXT: umov w8, v1.h[3]
49 ; CHECK-NEXT: mov z1.b, w10
50 ; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b
51 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
52 ; CHECK-NEXT: mov z2.b, w8
53 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z1.b
54 ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
55 ; CHECK-NEXT: mov p2.b, p3/m, p3.b
56 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
57 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
58 ; CHECK-NEXT: mov p1.b, p2/m, p2.b
59 ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
60 ; CHECK-NEXT: addvl sp, sp, #1
61 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
63 %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask)
64 ret <vscale x 16 x i1> %r
67 define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
68 ; CHECK-LABEL: match_nxv16i8_v8i8:
70 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
71 ; CHECK-NEXT: mov z1.d, d1
72 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
74 %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask)
75 ret <vscale x 16 x i1> %r
78 define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
79 ; CHECK-LABEL: match_nxv16i8_v16i8:
81 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
82 ; CHECK-NEXT: mov z1.q, q1
83 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
85 %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask)
86 ret <vscale x 16 x i1> %r
89 define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 {
90 ; CHECK-LABEL: match_v16i8_v1i8:
92 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
93 ; CHECK-NEXT: dup v1.16b, v1.b[0]
94 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
95 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
97 %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask)
101 define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 {
102 ; CHECK-LABEL: match_v16i8_v2i8:
104 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
105 ; CHECK-NEXT: dup v3.16b, v1.b[4]
106 ; CHECK-NEXT: dup v1.16b, v1.b[0]
107 ; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
108 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
109 ; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
110 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
112 %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask)
116 define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 {
117 ; CHECK-LABEL: match_v16i8_v4i8:
119 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
120 ; CHECK-NEXT: dup v3.16b, v1.b[2]
121 ; CHECK-NEXT: dup v4.16b, v1.b[0]
122 ; CHECK-NEXT: dup v5.16b, v1.b[4]
123 ; CHECK-NEXT: dup v1.16b, v1.b[6]
124 ; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
125 ; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
126 ; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
127 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
128 ; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
129 ; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
130 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
131 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
133 %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask)
137 define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) #0 {
138 ; CHECK-LABEL: match_v16i8_v8i8:
140 ; CHECK-NEXT: shl v2.16b, v2.16b, #7
141 ; CHECK-NEXT: ptrue p0.b, vl16
142 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
143 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
144 ; CHECK-NEXT: mov z1.d, d1
145 ; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
146 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
147 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
148 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
149 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
151 %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask)
155 define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 {
156 ; CHECK-LABEL: match_v16i8_v16i8:
158 ; CHECK-NEXT: shl v2.16b, v2.16b, #7
159 ; CHECK-NEXT: ptrue p0.b, vl16
160 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
161 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
162 ; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
163 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
164 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
165 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
166 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
168 %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
172 define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
173 ; CHECK-LABEL: match_v8i8_v8i8:
175 ; CHECK-NEXT: shl v2.8b, v2.8b, #7
176 ; CHECK-NEXT: ptrue p0.b, vl8
177 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
178 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
179 ; CHECK-NEXT: mov z1.d, d1
180 ; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
181 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
182 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
183 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
184 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
186 %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask)
190 define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 {
191 ; CHECK-LABEL: match_nxv8i16_v8i16:
193 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
194 ; CHECK-NEXT: mov z1.q, q1
195 ; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
197 %r = tail call <vscale x 8 x i1> @llvm.experimental.vector.match(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask)
198 ret <vscale x 8 x i1> %r
201 define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 {
202 ; CHECK-LABEL: match_v8i16:
204 ; CHECK-NEXT: ushll v2.8h, v2.8b, #0
205 ; CHECK-NEXT: ptrue p0.h, vl8
206 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
207 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
208 ; CHECK-NEXT: shl v2.8h, v2.8h, #15
209 ; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
210 ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
211 ; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
212 ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
213 ; CHECK-NEXT: xtn v0.8b, v0.8h
215 %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask)
219 ; Cases where op2 has more elements than op1.
221 define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) #0 {
222 ; CHECK-LABEL: match_v8i8_v16i8:
224 ; CHECK-NEXT: shl v2.8b, v2.8b, #7
225 ; CHECK-NEXT: ptrue p0.b, vl8
226 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
227 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
228 ; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
229 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
230 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
231 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
232 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
234 %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
238 define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
239 ; CHECK-LABEL: match_nxv16i8_v32i8:
241 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
242 ; CHECK-NEXT: addvl sp, sp, #-1
243 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
244 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
245 ; CHECK-NEXT: .cfi_offset w29, -16
246 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
247 ; CHECK-NEXT: mov z3.b, z1.b[1]
248 ; CHECK-NEXT: mov z4.b, b1
249 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
250 ; CHECK-NEXT: ptrue p1.b
251 ; CHECK-NEXT: mov z5.b, z1.b[2]
252 ; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z3.b
253 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z4.b
254 ; CHECK-NEXT: mov z3.b, z1.b[3]
255 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z5.b
256 ; CHECK-NEXT: mov z4.b, z1.b[4]
257 ; CHECK-NEXT: mov p2.b, p3/m, p3.b
258 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
259 ; CHECK-NEXT: mov z3.b, z1.b[5]
260 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
261 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
262 ; CHECK-NEXT: mov z4.b, z1.b[6]
263 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
264 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
265 ; CHECK-NEXT: mov z3.b, z1.b[7]
266 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
267 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
268 ; CHECK-NEXT: mov z4.b, z1.b[8]
269 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
270 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
271 ; CHECK-NEXT: mov z3.b, z1.b[9]
272 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
273 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
274 ; CHECK-NEXT: mov z4.b, z1.b[10]
275 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
276 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
277 ; CHECK-NEXT: mov z3.b, z1.b[11]
278 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
279 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
280 ; CHECK-NEXT: mov z4.b, z1.b[12]
281 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
282 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
283 ; CHECK-NEXT: mov z3.b, z1.b[13]
284 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
285 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
286 ; CHECK-NEXT: mov z4.b, z1.b[14]
287 ; CHECK-NEXT: mov z1.b, z1.b[15]
288 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
289 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
290 ; CHECK-NEXT: mov z3.b, b2
291 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
292 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
293 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
294 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
295 ; CHECK-NEXT: mov z1.b, z2.b[1]
296 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
297 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
298 ; CHECK-NEXT: mov z3.b, z2.b[2]
299 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
300 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
301 ; CHECK-NEXT: mov z1.b, z2.b[3]
302 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
303 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
304 ; CHECK-NEXT: mov z3.b, z2.b[4]
305 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
306 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
307 ; CHECK-NEXT: mov z1.b, z2.b[5]
308 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
309 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
310 ; CHECK-NEXT: mov z3.b, z2.b[6]
311 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
312 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
313 ; CHECK-NEXT: mov z1.b, z2.b[7]
314 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
315 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
316 ; CHECK-NEXT: mov z3.b, z2.b[8]
317 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
318 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
319 ; CHECK-NEXT: mov z1.b, z2.b[9]
320 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
321 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
322 ; CHECK-NEXT: mov z3.b, z2.b[10]
323 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
324 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
325 ; CHECK-NEXT: mov z1.b, z2.b[11]
326 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
327 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
328 ; CHECK-NEXT: mov z3.b, z2.b[12]
329 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
330 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
331 ; CHECK-NEXT: mov z1.b, z2.b[13]
332 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
333 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
334 ; CHECK-NEXT: mov z3.b, z2.b[14]
335 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
336 ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
337 ; CHECK-NEXT: mov z1.b, z2.b[15]
338 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
339 ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
340 ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z1.b
341 ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
342 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
343 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
344 ; CHECK-NEXT: mov p1.b, p2/m, p2.b
345 ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
346 ; CHECK-NEXT: addvl sp, sp, #1
347 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
349 %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask)
350 ret <vscale x 16 x i1> %r
353 define <16 x i1> @match_v16i8_v32i8(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask) #0 {
354 ; CHECK-LABEL: match_v16i8_v32i8:
356 ; CHECK-NEXT: dup v4.16b, v1.b[1]
357 ; CHECK-NEXT: dup v5.16b, v1.b[0]
358 ; CHECK-NEXT: dup v6.16b, v1.b[2]
359 ; CHECK-NEXT: dup v7.16b, v1.b[3]
360 ; CHECK-NEXT: dup v16.16b, v1.b[4]
361 ; CHECK-NEXT: dup v17.16b, v1.b[5]
362 ; CHECK-NEXT: dup v18.16b, v1.b[6]
363 ; CHECK-NEXT: dup v19.16b, v1.b[7]
364 ; CHECK-NEXT: dup v20.16b, v1.b[8]
365 ; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
366 ; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
367 ; CHECK-NEXT: cmeq v6.16b, v0.16b, v6.16b
368 ; CHECK-NEXT: cmeq v7.16b, v0.16b, v7.16b
369 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
370 ; CHECK-NEXT: cmeq v17.16b, v0.16b, v17.16b
371 ; CHECK-NEXT: dup v21.16b, v2.b[7]
372 ; CHECK-NEXT: dup v22.16b, v1.b[10]
373 ; CHECK-NEXT: orr v4.16b, v5.16b, v4.16b
374 ; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b
375 ; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
376 ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
377 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
378 ; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b
379 ; CHECK-NEXT: dup v18.16b, v1.b[9]
380 ; CHECK-NEXT: dup v19.16b, v1.b[11]
381 ; CHECK-NEXT: dup v20.16b, v1.b[12]
382 ; CHECK-NEXT: cmeq v22.16b, v0.16b, v22.16b
383 ; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b
384 ; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b
385 ; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
386 ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
387 ; CHECK-NEXT: dup v18.16b, v1.b[13]
388 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
389 ; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b
390 ; CHECK-NEXT: dup v19.16b, v2.b[0]
391 ; CHECK-NEXT: dup v20.16b, v2.b[1]
392 ; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b
393 ; CHECK-NEXT: dup v5.16b, v2.b[6]
394 ; CHECK-NEXT: orr v6.16b, v6.16b, v7.16b
395 ; CHECK-NEXT: orr v7.16b, v16.16b, v17.16b
396 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v18.16b
397 ; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b
398 ; CHECK-NEXT: cmeq v18.16b, v0.16b, v20.16b
399 ; CHECK-NEXT: dup v19.16b, v2.b[2]
400 ; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
401 ; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
402 ; CHECK-NEXT: dup v21.16b, v2.b[8]
403 ; CHECK-NEXT: orr v6.16b, v6.16b, v22.16b
404 ; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b
405 ; CHECK-NEXT: dup v16.16b, v1.b[14]
406 ; CHECK-NEXT: dup v1.16b, v1.b[15]
407 ; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b
408 ; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b
409 ; CHECK-NEXT: dup v19.16b, v2.b[3]
410 ; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
411 ; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
412 ; CHECK-NEXT: dup v21.16b, v2.b[9]
413 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
414 ; CHECK-NEXT: cmeq v1.16b, v0.16b, v1.16b
415 ; CHECK-NEXT: orr v4.16b, v4.16b, v6.16b
416 ; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b
417 ; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b
418 ; CHECK-NEXT: dup v19.16b, v2.b[4]
419 ; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
420 ; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
421 ; CHECK-NEXT: dup v21.16b, v2.b[10]
422 ; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b
423 ; CHECK-NEXT: orr v16.16b, v17.16b, v18.16b
424 ; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b
425 ; CHECK-NEXT: dup v18.16b, v2.b[5]
426 ; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
427 ; CHECK-NEXT: cmeq v19.16b, v0.16b, v21.16b
428 ; CHECK-NEXT: dup v20.16b, v2.b[11]
429 ; CHECK-NEXT: orr v1.16b, v7.16b, v1.16b
430 ; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
431 ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
432 ; CHECK-NEXT: dup v17.16b, v2.b[12]
433 ; CHECK-NEXT: orr v5.16b, v5.16b, v19.16b
434 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v20.16b
435 ; CHECK-NEXT: dup v18.16b, v2.b[13]
436 ; CHECK-NEXT: dup v19.16b, v2.b[14]
437 ; CHECK-NEXT: orr v1.16b, v4.16b, v1.16b
438 ; CHECK-NEXT: dup v2.16b, v2.b[15]
439 ; CHECK-NEXT: orr v4.16b, v6.16b, v7.16b
440 ; CHECK-NEXT: cmeq v6.16b, v0.16b, v17.16b
441 ; CHECK-NEXT: orr v5.16b, v5.16b, v16.16b
442 ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
443 ; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
444 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v2.16b
445 ; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b
446 ; CHECK-NEXT: orr v4.16b, v5.16b, v6.16b
447 ; CHECK-NEXT: orr v5.16b, v7.16b, v16.16b
448 ; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b
449 ; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
450 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
451 ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
453 %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask)
457 ; Data types not supported by MATCH.
458 ; Note: The cases for SVE could be made tighter.
460 define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask) #0 {
461 ; CHECK-LABEL: match_nxv4xi32_v4i32:
463 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
464 ; CHECK-NEXT: addvl sp, sp, #-1
465 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
466 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
467 ; CHECK-NEXT: .cfi_offset w29, -16
468 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
469 ; CHECK-NEXT: mov z2.s, z1.s[1]
470 ; CHECK-NEXT: mov z3.s, s1
471 ; CHECK-NEXT: ptrue p1.s
472 ; CHECK-NEXT: mov z4.s, z1.s[2]
473 ; CHECK-NEXT: mov z1.s, z1.s[3]
474 ; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z2.s
475 ; CHECK-NEXT: cmpeq p3.s, p1/z, z0.s, z3.s
476 ; CHECK-NEXT: cmpeq p4.s, p1/z, z0.s, z4.s
477 ; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s
478 ; CHECK-NEXT: mov p2.b, p3/m, p3.b
479 ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
480 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
481 ; CHECK-NEXT: mov p1.b, p2/m, p2.b
482 ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
483 ; CHECK-NEXT: addvl sp, sp, #1
484 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
486 %r = tail call <vscale x 4 x i1> @llvm.experimental.vector.match(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask)
487 ret <vscale x 4 x i1> %r
490 define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) #0 {
491 ; CHECK-LABEL: match_nxv2xi64_v2i64:
493 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
494 ; CHECK-NEXT: mov z2.d, z1.d[1]
495 ; CHECK-NEXT: ptrue p1.d
496 ; CHECK-NEXT: mov z1.d, d1
497 ; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z2.d
498 ; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d
499 ; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
500 ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
502 %r = tail call <vscale x 2 x i1> @llvm.experimental.vector.match(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask)
503 ret <vscale x 2 x i1> %r
506 define <4 x i1> @match_v4xi32_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 {
507 ; CHECK-LABEL: match_v4xi32_v4i32:
509 ; CHECK-NEXT: dup v3.4s, v1.s[1]
510 ; CHECK-NEXT: dup v4.4s, v1.s[0]
511 ; CHECK-NEXT: dup v5.4s, v1.s[2]
512 ; CHECK-NEXT: dup v1.4s, v1.s[3]
513 ; CHECK-NEXT: cmeq v3.4s, v0.4s, v3.4s
514 ; CHECK-NEXT: cmeq v4.4s, v0.4s, v4.4s
515 ; CHECK-NEXT: cmeq v5.4s, v0.4s, v5.4s
516 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
517 ; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
518 ; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
519 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
520 ; CHECK-NEXT: xtn v0.4h, v0.4s
521 ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
523 %r = tail call <4 x i1> @llvm.experimental.vector.match(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask)
527 define <2 x i1> @match_v2xi64_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 {
528 ; CHECK-LABEL: match_v2xi64_v2i64:
530 ; CHECK-NEXT: dup v3.2d, v1.d[1]
531 ; CHECK-NEXT: dup v1.2d, v1.d[0]
532 ; CHECK-NEXT: cmeq v3.2d, v0.2d, v3.2d
533 ; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
534 ; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
535 ; CHECK-NEXT: xtn v0.2s, v0.2d
536 ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
538 %r = tail call <2 x i1> @llvm.experimental.vector.match(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask)
542 attributes #0 = { "target-features"="+sve2" }