1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
11 define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) {
12 ; CHECK-LABEL: bitreverse_v4i8:
14 ; CHECK-NEXT: ptrue p0.h, vl4
15 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
17 ; CHECK-NEXT: lsr z0.h, z0.h, #8
18 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
20 %res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op)
24 define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) {
25 ; CHECK-LABEL: bitreverse_v8i8:
27 ; CHECK-NEXT: ptrue p0.b, vl8
28 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
29 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
30 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
32 %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op)
36 define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) {
37 ; CHECK-LABEL: bitreverse_v16i8:
39 ; CHECK-NEXT: ptrue p0.b, vl16
40 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
41 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
42 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
44 %res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op)
48 define void @bitreverse_v32i8(ptr %a) {
49 ; CHECK-LABEL: bitreverse_v32i8:
51 ; CHECK-NEXT: ptrue p0.b, vl16
52 ; CHECK-NEXT: ldp q0, q1, [x0]
53 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
54 ; CHECK-NEXT: rbit z1.b, p0/m, z1.b
55 ; CHECK-NEXT: stp q0, q1, [x0]
57 %op = load <32 x i8>, ptr %a
58 %res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op)
59 store <32 x i8> %res, ptr %a
63 define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) {
64 ; CHECK-LABEL: bitreverse_v2i16:
66 ; CHECK-NEXT: ptrue p0.s, vl2
67 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
68 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
69 ; CHECK-NEXT: lsr z0.s, z0.s, #16
70 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
72 %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op)
76 define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) {
77 ; CHECK-LABEL: bitreverse_v4i16:
79 ; CHECK-NEXT: ptrue p0.h, vl4
80 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
81 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
82 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
84 %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op)
88 define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) {
89 ; CHECK-LABEL: bitreverse_v8i16:
91 ; CHECK-NEXT: ptrue p0.h, vl8
92 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
93 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
94 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
96 %res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op)
100 define void @bitreverse_v16i16(ptr %a) {
101 ; CHECK-LABEL: bitreverse_v16i16:
103 ; CHECK-NEXT: ptrue p0.h, vl8
104 ; CHECK-NEXT: ldp q0, q1, [x0]
105 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
106 ; CHECK-NEXT: rbit z1.h, p0/m, z1.h
107 ; CHECK-NEXT: stp q0, q1, [x0]
109 %op = load <16 x i16>, ptr %a
110 %res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op)
111 store <16 x i16> %res, ptr %a
115 define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) {
116 ; CHECK-LABEL: bitreverse_v2i32:
118 ; CHECK-NEXT: ptrue p0.s, vl2
119 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
120 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
121 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
123 %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op)
127 define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) {
128 ; CHECK-LABEL: bitreverse_v4i32:
130 ; CHECK-NEXT: ptrue p0.s, vl4
131 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
132 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
133 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
135 %res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op)
139 define void @bitreverse_v8i32(ptr %a) {
140 ; CHECK-LABEL: bitreverse_v8i32:
142 ; CHECK-NEXT: ptrue p0.s, vl4
143 ; CHECK-NEXT: ldp q0, q1, [x0]
144 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
145 ; CHECK-NEXT: rbit z1.s, p0/m, z1.s
146 ; CHECK-NEXT: stp q0, q1, [x0]
148 %op = load <8 x i32>, ptr %a
149 %res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op)
150 store <8 x i32> %res, ptr %a
154 define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
155 ; CHECK-LABEL: bitreverse_v1i64:
157 ; CHECK-NEXT: ptrue p0.d, vl1
158 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
159 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
160 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
162 %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
166 define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) {
167 ; CHECK-LABEL: bitreverse_v2i64:
169 ; CHECK-NEXT: ptrue p0.d, vl2
170 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
171 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
172 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
174 %res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op)
178 define void @bitreverse_v4i64(ptr %a) {
179 ; CHECK-LABEL: bitreverse_v4i64:
181 ; CHECK-NEXT: ptrue p0.d, vl2
182 ; CHECK-NEXT: ldp q0, q1, [x0]
183 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
184 ; CHECK-NEXT: rbit z1.d, p0/m, z1.d
185 ; CHECK-NEXT: stp q0, q1, [x0]
187 %op = load <4 x i64>, ptr %a
188 %res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op)
189 store <4 x i64> %res, ptr %a
197 define <2 x i16> @bswap_v2i16(<2 x i16> %op) {
198 ; CHECK-LABEL: bswap_v2i16:
200 ; CHECK-NEXT: ptrue p0.s, vl2
201 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
202 ; CHECK-NEXT: revb z0.s, p0/m, z0.s
203 ; CHECK-NEXT: lsr z0.s, z0.s, #16
204 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
206 %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op)
210 define <4 x i16> @bswap_v4i16(<4 x i16> %op) {
211 ; CHECK-LABEL: bswap_v4i16:
213 ; CHECK-NEXT: ptrue p0.h, vl4
214 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
215 ; CHECK-NEXT: revb z0.h, p0/m, z0.h
216 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
218 %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op)
222 define <8 x i16> @bswap_v8i16(<8 x i16> %op) {
223 ; CHECK-LABEL: bswap_v8i16:
225 ; CHECK-NEXT: ptrue p0.h, vl8
226 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
227 ; CHECK-NEXT: revb z0.h, p0/m, z0.h
228 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
230 %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op)
234 define void @bswap_v16i16(ptr %a) {
235 ; CHECK-LABEL: bswap_v16i16:
237 ; CHECK-NEXT: ptrue p0.h, vl8
238 ; CHECK-NEXT: ldp q0, q1, [x0]
239 ; CHECK-NEXT: revb z0.h, p0/m, z0.h
240 ; CHECK-NEXT: revb z1.h, p0/m, z1.h
241 ; CHECK-NEXT: stp q0, q1, [x0]
243 %op = load <16 x i16>, ptr %a
244 %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op)
245 store <16 x i16> %res, ptr %a
249 define <2 x i32> @bswap_v2i32(<2 x i32> %op) {
250 ; CHECK-LABEL: bswap_v2i32:
252 ; CHECK-NEXT: ptrue p0.s, vl2
253 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
254 ; CHECK-NEXT: revb z0.s, p0/m, z0.s
255 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
257 %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op)
261 define <4 x i32> @bswap_v4i32(<4 x i32> %op) {
262 ; CHECK-LABEL: bswap_v4i32:
264 ; CHECK-NEXT: ptrue p0.s, vl4
265 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
266 ; CHECK-NEXT: revb z0.s, p0/m, z0.s
267 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
269 %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op)
273 define void @bswap_v8i32(ptr %a) {
274 ; CHECK-LABEL: bswap_v8i32:
276 ; CHECK-NEXT: ptrue p0.s, vl4
277 ; CHECK-NEXT: ldp q0, q1, [x0]
278 ; CHECK-NEXT: revb z0.s, p0/m, z0.s
279 ; CHECK-NEXT: revb z1.s, p0/m, z1.s
280 ; CHECK-NEXT: stp q0, q1, [x0]
282 %op = load <8 x i32>, ptr %a
283 %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
284 store <8 x i32> %res, ptr %a
288 define <1 x i64> @bswap_v1i64(<1 x i64> %op) {
289 ; CHECK-LABEL: bswap_v1i64:
291 ; CHECK-NEXT: ptrue p0.d, vl1
292 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
293 ; CHECK-NEXT: revb z0.d, p0/m, z0.d
294 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
296 %res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op)
300 define <2 x i64> @bswap_v2i64(<2 x i64> %op) {
301 ; CHECK-LABEL: bswap_v2i64:
303 ; CHECK-NEXT: ptrue p0.d, vl2
304 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
305 ; CHECK-NEXT: revb z0.d, p0/m, z0.d
306 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
308 %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op)
312 define void @bswap_v4i64(ptr %a) {
313 ; CHECK-LABEL: bswap_v4i64:
315 ; CHECK-NEXT: ptrue p0.d, vl2
316 ; CHECK-NEXT: ldp q0, q1, [x0]
317 ; CHECK-NEXT: revb z0.d, p0/m, z0.d
318 ; CHECK-NEXT: revb z1.d, p0/m, z1.d
319 ; CHECK-NEXT: stp q0, q1, [x0]
321 %op = load <4 x i64>, ptr %a
322 %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)
323 store <4 x i64> %res, ptr %a
327 declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
328 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
329 declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
330 declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
331 declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
332 declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
333 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
334 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
335 declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
336 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
337 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
338 declare <1 x i64> @llvm.bitreverse.v1i64(<1 x i64>)
339 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
340 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
342 declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
343 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
344 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
345 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
346 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
347 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
348 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
349 declare <1 x i64> @llvm.bswap.v1i64(<1 x i64>)
350 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
351 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)