1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
8 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
11 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
13 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
14 <vscale x 16 x i8> %a,
15 <vscale x 16 x i8> %b)
16 ret <vscale x 16 x i8> %out
19 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
20 ; CHECK-LABEL: asr_i16:
22 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
24 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
25 <vscale x 8 x i16> %a,
26 <vscale x 8 x i16> %b)
27 ret <vscale x 8 x i16> %out
30 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
31 ; CHECK-LABEL: asr_i32:
33 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
35 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
36 <vscale x 4 x i32> %a,
37 <vscale x 4 x i32> %b)
38 ret <vscale x 4 x i32> %out
41 define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
42 ; CHECK-LABEL: asr_i64:
44 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
46 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
47 <vscale x 2 x i64> %a,
48 <vscale x 2 x i64> %b)
49 ret <vscale x 2 x i64> %out
52 define <vscale x 16 x i8> @asr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
53 ; CHECK-LABEL: asr_wide_i8:
55 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.d
57 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
58 <vscale x 16 x i8> %a,
59 <vscale x 2 x i64> %b)
60 ret <vscale x 16 x i8> %out
63 define <vscale x 8 x i16> @asr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
64 ; CHECK-LABEL: asr_wide_i16:
66 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.d
68 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
69 <vscale x 8 x i16> %a,
70 <vscale x 2 x i64> %b)
71 ret <vscale x 8 x i16> %out
74 define <vscale x 4 x i32> @asr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
75 ; CHECK-LABEL: asr_wide_i32:
77 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.d
79 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
80 <vscale x 4 x i32> %a,
81 <vscale x 2 x i64> %b)
82 ret <vscale x 4 x i32> %out
89 define <vscale x 16 x i8> @asrd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
90 ; CHECK-LABEL: asrd_i8:
92 ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #1
94 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %pg,
95 <vscale x 16 x i8> %a,
97 ret <vscale x 16 x i8> %out
100 define <vscale x 8 x i16> @asrd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
101 ; CHECK-LABEL: asrd_i16:
103 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #2
105 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> %pg,
106 <vscale x 8 x i16> %a,
108 ret <vscale x 8 x i16> %out
111 define <vscale x 4 x i32> @asrd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
112 ; CHECK-LABEL: asrd_i32:
114 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #31
116 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> %pg,
117 <vscale x 4 x i32> %a,
119 ret <vscale x 4 x i32> %out
122 define <vscale x 2 x i64> @asrd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
123 ; CHECK-LABEL: asrd_i64:
125 ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #64
127 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> %pg,
128 <vscale x 2 x i64> %a,
130 ret <vscale x 2 x i64> %out
137 define <vscale x 16 x i8> @insr_i8(<vscale x 16 x i8> %a, i8 %b) {
138 ; CHECK-LABEL: insr_i8:
140 ; CHECK-NEXT: insr z0.b, w0
142 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> %a, i8 %b)
143 ret <vscale x 16 x i8> %out
146 define <vscale x 8 x i16> @insr_i16(<vscale x 8 x i16> %a, i16 %b) {
147 ; CHECK-LABEL: insr_i16:
149 ; CHECK-NEXT: insr z0.h, w0
151 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> %a, i16 %b)
152 ret <vscale x 8 x i16> %out
155 define <vscale x 4 x i32> @insr_i32(<vscale x 4 x i32> %a, i32 %b) {
156 ; CHECK-LABEL: insr_i32:
158 ; CHECK-NEXT: insr z0.s, w0
160 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> %a, i32 %b)
161 ret <vscale x 4 x i32> %out
164 define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) {
165 ; CHECK-LABEL: insr_i64:
167 ; CHECK-NEXT: insr z0.d, x0
169 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> %a, i64 %b)
170 ret <vscale x 2 x i64> %out
173 define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
174 ; CHECK-LABEL: insr_f16:
176 ; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1
177 ; CHECK-NEXT: insr z0.h, h1
179 %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
180 ret <vscale x 8 x half> %out
183 define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
184 ; CHECK-LABEL: insr_bf16:
186 ; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1
187 ; CHECK-NEXT: insr z0.h, h1
189 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
190 ret <vscale x 8 x bfloat> %out
193 define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
194 ; CHECK-LABEL: insr_f32:
196 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $z1
197 ; CHECK-NEXT: insr z0.s, s1
199 %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
200 ret <vscale x 4 x float> %out
203 define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
204 ; CHECK-LABEL: insr_f64:
206 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
207 ; CHECK-NEXT: insr z0.d, d1
209 %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b)
210 ret <vscale x 2 x double> %out
217 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
218 ; CHECK-LABEL: lsl_i8:
220 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
222 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
223 <vscale x 16 x i8> %a,
224 <vscale x 16 x i8> %b)
225 ret <vscale x 16 x i8> %out
228 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
229 ; CHECK-LABEL: lsl_i16:
231 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
233 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
234 <vscale x 8 x i16> %a,
235 <vscale x 8 x i16> %b)
236 ret <vscale x 8 x i16> %out
239 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
240 ; CHECK-LABEL: lsl_i32:
242 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
244 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
245 <vscale x 4 x i32> %a,
246 <vscale x 4 x i32> %b)
247 ret <vscale x 4 x i32> %out
250 define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
251 ; CHECK-LABEL: lsl_i64:
253 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
255 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
256 <vscale x 2 x i64> %a,
257 <vscale x 2 x i64> %b)
258 ret <vscale x 2 x i64> %out
261 define <vscale x 16 x i8> @lsl_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
262 ; CHECK-LABEL: lsl_wide_i8:
264 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.d
266 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
267 <vscale x 16 x i8> %a,
268 <vscale x 2 x i64> %b)
269 ret <vscale x 16 x i8> %out
272 define <vscale x 8 x i16> @lsl_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
273 ; CHECK-LABEL: lsl_wide_i16:
275 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.d
277 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
278 <vscale x 8 x i16> %a,
279 <vscale x 2 x i64> %b)
280 ret <vscale x 8 x i16> %out
283 define <vscale x 4 x i32> @lsl_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
284 ; CHECK-LABEL: lsl_wide_i32:
286 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.d
288 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
289 <vscale x 4 x i32> %a,
290 <vscale x 2 x i64> %b)
291 ret <vscale x 4 x i32> %out
298 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
299 ; CHECK-LABEL: lsr_i8:
301 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
303 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
304 <vscale x 16 x i8> %a,
305 <vscale x 16 x i8> %b)
306 ret <vscale x 16 x i8> %out
309 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
310 ; CHECK-LABEL: lsr_i16:
312 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
314 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
315 <vscale x 8 x i16> %a,
316 <vscale x 8 x i16> %b)
317 ret <vscale x 8 x i16> %out
320 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
321 ; CHECK-LABEL: lsr_i32:
323 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
325 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
326 <vscale x 4 x i32> %a,
327 <vscale x 4 x i32> %b)
328 ret <vscale x 4 x i32> %out
331 define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
332 ; CHECK-LABEL: lsr_i64:
334 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
336 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
337 <vscale x 2 x i64> %a,
338 <vscale x 2 x i64> %b)
339 ret <vscale x 2 x i64> %out
342 define <vscale x 16 x i8> @lsr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
343 ; CHECK-LABEL: lsr_wide_i8:
345 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.d
347 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
348 <vscale x 16 x i8> %a,
349 <vscale x 2 x i64> %b)
350 ret <vscale x 16 x i8> %out
353 define <vscale x 8 x i16> @lsr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
354 ; CHECK-LABEL: lsr_wide_i16:
356 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.d
358 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
359 <vscale x 8 x i16> %a,
360 <vscale x 2 x i64> %b)
361 ret <vscale x 8 x i16> %out
364 define <vscale x 4 x i32> @lsr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
365 ; CHECK-LABEL: lsr_wide_i32:
367 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.d
369 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
370 <vscale x 4 x i32> %a,
371 <vscale x 2 x i64> %b)
372 ret <vscale x 4 x i32> %out
375 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
376 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
377 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
378 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
380 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
381 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
382 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
384 declare <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
385 declare <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
386 declare <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
387 declare <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
389 declare <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8>, i8)
390 declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i16)
391 declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)
392 declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)
393 declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)
394 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat>, bfloat)
395 declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)
396 declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)
398 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
399 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
400 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
401 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
403 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
404 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
405 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
407 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
408 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
409 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
410 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
412 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
413 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
414 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
416 ; +bf16 is required for the bfloat version.
417 attributes #0 = { "target-features"="+sve,+bf16" }