1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
4 ; EOR3 (vector, bitwise, unpredicated)
6 define <vscale x 16 x i8> @eor3_i8(<vscale x 16 x i8> %a,
8 <vscale x 16 x i8> %c) {
10 ; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
12 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.eor3.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
13 ret <vscale x 16 x i8> %res
16 define <vscale x 8 x i16> @eor3_i16(<vscale x 8 x i16> %a,
17 <vscale x 8 x i16> %b,
18 <vscale x 8 x i16> %c) {
19 ; CHECK-LABEL: eor3_i16
20 ; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
22 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.eor3.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
23 ret <vscale x 8 x i16> %res
26 define <vscale x 4 x i32> @eor3_i32(<vscale x 4 x i32> %a,
27 <vscale x 4 x i32> %b,
28 <vscale x 4 x i32> %c) {
29 ; CHECK-LABEL: eor3_i32
30 ; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
32 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.eor3.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
33 ret <vscale x 4 x i32> %res
36 define <vscale x 2 x i64> @eor3_i64(<vscale x 2 x i64> %a,
37 <vscale x 2 x i64> %b,
38 <vscale x 2 x i64> %c) {
39 ; CHECK-LABEL: eor3_i64
40 ; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
42 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.eor3.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
43 ret <vscale x 2 x i64> %res
47 ; BCAX (vector, bitwise, unpredicated)
49 define <vscale x 16 x i8> @bcax_i8(<vscale x 16 x i8> %a,
50 <vscale x 16 x i8> %b,
51 <vscale x 16 x i8> %c) {
52 ; CHECK-LABEL: bcax_i8
53 ; CHECK: bcax z0.d, z0.d, z1.d, z2.d
55 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bcax.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
56 ret <vscale x 16 x i8> %res
59 define <vscale x 8 x i16> @bcax_i16(<vscale x 8 x i16> %a,
60 <vscale x 8 x i16> %b,
61 <vscale x 8 x i16> %c) {
62 ; CHECK-LABEL: bcax_i16
63 ; CHECK: bcax z0.d, z0.d, z1.d, z2.d
65 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bcax.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
66 ret <vscale x 8 x i16> %res
69 define <vscale x 4 x i32> @bcax_i32(<vscale x 4 x i32> %a,
70 <vscale x 4 x i32> %b,
71 <vscale x 4 x i32> %c) {
72 ; CHECK-LABEL: bcax_i32
73 ; CHECK: bcax z0.d, z0.d, z1.d, z2.d
75 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bcax.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
76 ret <vscale x 4 x i32> %res
79 define <vscale x 2 x i64> @bcax_i64(<vscale x 2 x i64> %a,
80 <vscale x 2 x i64> %b,
81 <vscale x 2 x i64> %c) {
82 ; CHECK-LABEL: bcax_i64
83 ; CHECK: bcax z0.d, z0.d, z1.d, z2.d
85 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bcax.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
86 ret <vscale x 2 x i64> %res
90 ; BSL (vector, bitwise, unpredicated)
92 define <vscale x 16 x i8> @bsl_i8(<vscale x 16 x i8> %a,
93 <vscale x 16 x i8> %b,
94 <vscale x 16 x i8> %c) {
96 ; CHECK: bsl z0.d, z0.d, z1.d, z2.d
98 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
99 ret <vscale x 16 x i8> %res
102 define <vscale x 8 x i16> @bsl_i16(<vscale x 8 x i16> %a,
103 <vscale x 8 x i16> %b,
104 <vscale x 8 x i16> %c) {
105 ; CHECK-LABEL: bsl_i16
106 ; CHECK: bsl z0.d, z0.d, z1.d, z2.d
108 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
109 ret <vscale x 8 x i16> %res
112 define <vscale x 4 x i32> @bsl_i32(<vscale x 4 x i32> %a,
113 <vscale x 4 x i32> %b,
114 <vscale x 4 x i32> %c) {
115 ; CHECK-LABEL: bsl_i32
116 ; CHECK: bsl z0.d, z0.d, z1.d, z2.d
118 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
119 ret <vscale x 4 x i32> %res
122 define <vscale x 2 x i64> @bsl_i64(<vscale x 2 x i64> %a,
123 <vscale x 2 x i64> %b,
124 <vscale x 2 x i64> %c) {
125 ; CHECK-LABEL: bsl_i64
126 ; CHECK: bsl z0.d, z0.d, z1.d, z2.d
128 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
129 ret <vscale x 2 x i64> %res
133 ; BSL1N (vector, bitwise, unpredicated)
135 define <vscale x 16 x i8> @bsl1n_i8(<vscale x 16 x i8> %a,
136 <vscale x 16 x i8> %b,
137 <vscale x 16 x i8> %c) {
138 ; CHECK-LABEL: bsl1n_i8
139 ; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
141 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl1n.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
142 ret <vscale x 16 x i8> %res
145 define <vscale x 8 x i16> @bsl1n_i16(<vscale x 8 x i16> %a,
146 <vscale x 8 x i16> %b,
147 <vscale x 8 x i16> %c) {
148 ; CHECK-LABEL: bsl1n_i16
149 ; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
151 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl1n.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
152 ret <vscale x 8 x i16> %res
155 define <vscale x 4 x i32> @bsl1n_i32(<vscale x 4 x i32> %a,
156 <vscale x 4 x i32> %b,
157 <vscale x 4 x i32> %c) {
158 ; CHECK-LABEL: bsl1n_i32
159 ; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
161 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl1n.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
162 ret <vscale x 4 x i32> %res
165 define <vscale x 2 x i64> @bsl1n_i64(<vscale x 2 x i64> %a,
166 <vscale x 2 x i64> %b,
167 <vscale x 2 x i64> %c) {
168 ; CHECK-LABEL: bsl1n_i64
169 ; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
171 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl1n.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
172 ret <vscale x 2 x i64> %res
176 ; BSL2N (vector, bitwise, unpredicated)
178 define <vscale x 16 x i8> @bsl2n_i8(<vscale x 16 x i8> %a,
179 <vscale x 16 x i8> %b,
180 <vscale x 16 x i8> %c) {
181 ; CHECK-LABEL: bsl2n_i8
182 ; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
184 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl2n.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
185 ret <vscale x 16 x i8> %res
188 define <vscale x 8 x i16> @bsl2n_i16(<vscale x 8 x i16> %a,
189 <vscale x 8 x i16> %b,
190 <vscale x 8 x i16> %c) {
191 ; CHECK-LABEL: bsl2n_i16
192 ; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
194 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl2n.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
195 ret <vscale x 8 x i16> %res
198 define <vscale x 4 x i32> @bsl2n_i32(<vscale x 4 x i32> %a,
199 <vscale x 4 x i32> %b,
200 <vscale x 4 x i32> %c) {
201 ; CHECK-LABEL: bsl2n_i32
202 ; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
204 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl2n.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
205 ret <vscale x 4 x i32> %res
208 define <vscale x 2 x i64> @bsl2n_i64(<vscale x 2 x i64> %a,
209 <vscale x 2 x i64> %b,
210 <vscale x 2 x i64> %c) {
211 ; CHECK-LABEL: bsl2n_i64
212 ; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
214 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl2n.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
215 ret <vscale x 2 x i64> %res
219 ; NBSL (vector, bitwise, unpredicated)
221 define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a,
222 <vscale x 16 x i8> %b,
223 <vscale x 16 x i8> %c) {
224 ; CHECK-LABEL: nbsl_i8
225 ; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
227 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.nbsl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
228 ret <vscale x 16 x i8> %res
231 define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a,
232 <vscale x 8 x i16> %b,
233 <vscale x 8 x i16> %c) {
234 ; CHECK-LABEL: nbsl_i16
235 ; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
237 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.nbsl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
238 ret <vscale x 8 x i16> %res
241 define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a,
242 <vscale x 4 x i32> %b,
243 <vscale x 4 x i32> %c) {
244 ; CHECK-LABEL: nbsl_i32
245 ; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
247 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.nbsl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
248 ret <vscale x 4 x i32> %res
251 define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a,
252 <vscale x 2 x i64> %b,
253 <vscale x 2 x i64> %c) {
254 ; CHECK-LABEL: nbsl_i64
255 ; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
257 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.nbsl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
258 ret <vscale x 2 x i64> %res
262 ; XAR (vector, bitwise, unpredicated)
265 define <vscale x 16 x i8> @xar_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
266 ; CHECK-LABEL: xar_b:
267 ; CHECK: xar z0.b, z0.b, z1.b, #1
269 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.xar.nxv16i8(<vscale x 16 x i8> %a,
270 <vscale x 16 x i8> %b,
272 ret <vscale x 16 x i8> %out
275 define <vscale x 8 x i16> @xar_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
276 ; CHECK-LABEL: xar_h:
277 ; CHECK: xar z0.h, z0.h, z1.h, #2
279 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.xar.nxv8i16(<vscale x 8 x i16> %a,
280 <vscale x 8 x i16> %b,
282 ret <vscale x 8 x i16> %out
285 define <vscale x 4 x i32> @xar_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
286 ; CHECK-LABEL: xar_s:
287 ; CHECK: xar z0.s, z0.s, z1.s, #3
289 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.xar.nxv4i32(<vscale x 4 x i32> %a,
290 <vscale x 4 x i32> %b,
292 ret <vscale x 4 x i32> %out
295 define <vscale x 2 x i64> @xar_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
296 ; CHECK-LABEL: xar_d:
297 ; CHECK: xar z0.d, z0.d, z1.d, #4
299 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.xar.nxv2i64(<vscale x 2 x i64> %a,
300 <vscale x 2 x i64> %b,
302 ret <vscale x 2 x i64> %out
305 declare <vscale x 16 x i8> @llvm.aarch64.sve.eor3.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
306 declare <vscale x 8 x i16> @llvm.aarch64.sve.eor3.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
307 declare <vscale x 4 x i32> @llvm.aarch64.sve.eor3.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
308 declare <vscale x 2 x i64> @llvm.aarch64.sve.eor3.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
309 declare <vscale x 16 x i8> @llvm.aarch64.sve.bcax.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
310 declare <vscale x 8 x i16> @llvm.aarch64.sve.bcax.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
311 declare <vscale x 4 x i32> @llvm.aarch64.sve.bcax.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
312 declare <vscale x 2 x i64> @llvm.aarch64.sve.bcax.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
313 declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
314 declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
315 declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
316 declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
317 declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl1n.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
318 declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl1n.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
319 declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl1n.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
320 declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl1n.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
321 declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl2n.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
322 declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl2n.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
323 declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl2n.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
324 declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl2n.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
325 declare <vscale x 16 x i8> @llvm.aarch64.sve.nbsl.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
326 declare <vscale x 8 x i16> @llvm.aarch64.sve.nbsl.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
327 declare <vscale x 4 x i32> @llvm.aarch64.sve.nbsl.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
328 declare <vscale x 2 x i64> @llvm.aarch64.sve.nbsl.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
329 declare <vscale x 16 x i8> @llvm.aarch64.sve.xar.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
330 declare <vscale x 8 x i16> @llvm.aarch64.sve.xar.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
331 declare <vscale x 4 x i32> @llvm.aarch64.sve.xar.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
332 declare <vscale x 2 x i64> @llvm.aarch64.sve.xar.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)