1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2-RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB
7 ; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB
9 define void @bswap_v8i16(ptr %x, ptr %y) {
10 ; CHECK-LABEL: bswap_v8i16:
12 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13 ; CHECK-NEXT: vle16.v v8, (a0)
14 ; CHECK-NEXT: vsrl.vi v9, v8, 8
15 ; CHECK-NEXT: vsll.vi v8, v8, 8
16 ; CHECK-NEXT: vor.vv v8, v8, v9
17 ; CHECK-NEXT: vse16.v v8, (a0)
20 ; ZVKB-LABEL: bswap_v8i16:
22 ; ZVKB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
23 ; ZVKB-NEXT: vle16.v v8, (a0)
24 ; ZVKB-NEXT: vrev8.v v8, v8
25 ; ZVKB-NEXT: vse16.v v8, (a0)
27 %a = load <8 x i16>, ptr %x
28 %b = load <8 x i16>, ptr %y
29 %c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
30 store <8 x i16> %c, ptr %x
33 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
35 define void @bswap_v4i32(ptr %x, ptr %y) {
36 ; CHECK-LABEL: bswap_v4i32:
38 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
39 ; CHECK-NEXT: vle32.v v8, (a0)
40 ; CHECK-NEXT: vsrl.vi v9, v8, 8
41 ; CHECK-NEXT: lui a1, 16
42 ; CHECK-NEXT: addi a1, a1, -256
43 ; CHECK-NEXT: vand.vx v9, v9, a1
44 ; CHECK-NEXT: vsrl.vi v10, v8, 24
45 ; CHECK-NEXT: vor.vv v9, v9, v10
46 ; CHECK-NEXT: vand.vx v10, v8, a1
47 ; CHECK-NEXT: vsll.vi v10, v10, 8
48 ; CHECK-NEXT: vsll.vi v8, v8, 24
49 ; CHECK-NEXT: vor.vv v8, v8, v10
50 ; CHECK-NEXT: vor.vv v8, v8, v9
51 ; CHECK-NEXT: vse32.v v8, (a0)
54 ; ZVKB-LABEL: bswap_v4i32:
56 ; ZVKB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
57 ; ZVKB-NEXT: vle32.v v8, (a0)
58 ; ZVKB-NEXT: vrev8.v v8, v8
59 ; ZVKB-NEXT: vse32.v v8, (a0)
61 %a = load <4 x i32>, ptr %x
62 %b = load <4 x i32>, ptr %y
63 %c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
64 store <4 x i32> %c, ptr %x
67 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
69 define void @bswap_v2i64(ptr %x, ptr %y) {
70 ; RV32-LABEL: bswap_v2i64:
72 ; RV32-NEXT: addi sp, sp, -16
73 ; RV32-NEXT: .cfi_def_cfa_offset 16
74 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
75 ; RV32-NEXT: vle64.v v8, (a0)
76 ; RV32-NEXT: sw zero, 12(sp)
77 ; RV32-NEXT: lui a1, 1044480
78 ; RV32-NEXT: sw a1, 8(sp)
79 ; RV32-NEXT: li a1, 56
80 ; RV32-NEXT: vsrl.vx v9, v8, a1
81 ; RV32-NEXT: li a2, 40
82 ; RV32-NEXT: vsrl.vx v10, v8, a2
83 ; RV32-NEXT: lui a3, 16
84 ; RV32-NEXT: addi a3, a3, -256
85 ; RV32-NEXT: vand.vx v10, v10, a3
86 ; RV32-NEXT: vor.vv v9, v10, v9
87 ; RV32-NEXT: vsrl.vi v10, v8, 24
88 ; RV32-NEXT: addi a4, sp, 8
89 ; RV32-NEXT: vlse64.v v11, (a4), zero
90 ; RV32-NEXT: lui a4, 4080
91 ; RV32-NEXT: vand.vx v10, v10, a4
92 ; RV32-NEXT: vsrl.vi v12, v8, 8
93 ; RV32-NEXT: vand.vv v12, v12, v11
94 ; RV32-NEXT: vor.vv v10, v12, v10
95 ; RV32-NEXT: vor.vv v9, v10, v9
96 ; RV32-NEXT: vsll.vx v10, v8, a1
97 ; RV32-NEXT: vand.vx v12, v8, a3
98 ; RV32-NEXT: vsll.vx v12, v12, a2
99 ; RV32-NEXT: vor.vv v10, v10, v12
100 ; RV32-NEXT: vand.vx v12, v8, a4
101 ; RV32-NEXT: vsll.vi v12, v12, 24
102 ; RV32-NEXT: vand.vv v8, v8, v11
103 ; RV32-NEXT: vsll.vi v8, v8, 8
104 ; RV32-NEXT: vor.vv v8, v12, v8
105 ; RV32-NEXT: vor.vv v8, v10, v8
106 ; RV32-NEXT: vor.vv v8, v8, v9
107 ; RV32-NEXT: vse64.v v8, (a0)
108 ; RV32-NEXT: addi sp, sp, 16
111 ; RV64-LABEL: bswap_v2i64:
113 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
114 ; RV64-NEXT: vle64.v v8, (a0)
115 ; RV64-NEXT: li a1, 56
116 ; RV64-NEXT: vsrl.vx v9, v8, a1
117 ; RV64-NEXT: li a2, 40
118 ; RV64-NEXT: vsrl.vx v10, v8, a2
119 ; RV64-NEXT: lui a3, 16
120 ; RV64-NEXT: addiw a3, a3, -256
121 ; RV64-NEXT: vand.vx v10, v10, a3
122 ; RV64-NEXT: vor.vv v9, v10, v9
123 ; RV64-NEXT: vsrl.vi v10, v8, 24
124 ; RV64-NEXT: lui a4, 4080
125 ; RV64-NEXT: vand.vx v10, v10, a4
126 ; RV64-NEXT: vsrl.vi v11, v8, 8
127 ; RV64-NEXT: li a5, 255
128 ; RV64-NEXT: slli a5, a5, 24
129 ; RV64-NEXT: vand.vx v11, v11, a5
130 ; RV64-NEXT: vor.vv v10, v11, v10
131 ; RV64-NEXT: vor.vv v9, v10, v9
132 ; RV64-NEXT: vand.vx v10, v8, a5
133 ; RV64-NEXT: vsll.vi v10, v10, 8
134 ; RV64-NEXT: vand.vx v11, v8, a4
135 ; RV64-NEXT: vsll.vi v11, v11, 24
136 ; RV64-NEXT: vor.vv v10, v11, v10
137 ; RV64-NEXT: vsll.vx v11, v8, a1
138 ; RV64-NEXT: vand.vx v8, v8, a3
139 ; RV64-NEXT: vsll.vx v8, v8, a2
140 ; RV64-NEXT: vor.vv v8, v11, v8
141 ; RV64-NEXT: vor.vv v8, v8, v10
142 ; RV64-NEXT: vor.vv v8, v8, v9
143 ; RV64-NEXT: vse64.v v8, (a0)
146 ; ZVKB-LABEL: bswap_v2i64:
148 ; ZVKB-NEXT: vsetivli zero, 2, e64, m1, ta, ma
149 ; ZVKB-NEXT: vle64.v v8, (a0)
150 ; ZVKB-NEXT: vrev8.v v8, v8
151 ; ZVKB-NEXT: vse64.v v8, (a0)
153 %a = load <2 x i64>, ptr %x
154 %b = load <2 x i64>, ptr %y
155 %c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
156 store <2 x i64> %c, ptr %x
159 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
161 define void @bswap_v16i16(ptr %x, ptr %y) {
162 ; LMULMAX2-RV32-LABEL: bswap_v16i16:
163 ; LMULMAX2-RV32: # %bb.0:
164 ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
165 ; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
166 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8
167 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8
168 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
169 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
170 ; LMULMAX2-RV32-NEXT: ret
172 ; LMULMAX2-RV64-LABEL: bswap_v16i16:
173 ; LMULMAX2-RV64: # %bb.0:
174 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
175 ; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
176 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8
177 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8
178 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
179 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
180 ; LMULMAX2-RV64-NEXT: ret
182 ; LMULMAX1-RV32-LABEL: bswap_v16i16:
183 ; LMULMAX1-RV32: # %bb.0:
184 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
185 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
186 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a1)
187 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
188 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8
189 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8
190 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
191 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8
192 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8
193 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
194 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
195 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a1)
196 ; LMULMAX1-RV32-NEXT: ret
198 ; LMULMAX1-RV64-LABEL: bswap_v16i16:
199 ; LMULMAX1-RV64: # %bb.0:
200 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
201 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
202 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a1)
203 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a0)
204 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8
205 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8
206 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
207 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8
208 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8
209 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
210 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
211 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
212 ; LMULMAX1-RV64-NEXT: ret
214 ; ZVKB-LABEL: bswap_v16i16:
216 ; ZVKB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
217 ; ZVKB-NEXT: vle16.v v8, (a0)
218 ; ZVKB-NEXT: vrev8.v v8, v8
219 ; ZVKB-NEXT: vse16.v v8, (a0)
221 %a = load <16 x i16>, ptr %x
222 %b = load <16 x i16>, ptr %y
223 %c = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
224 store <16 x i16> %c, ptr %x
227 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
229 define void @bswap_v8i32(ptr %x, ptr %y) {
230 ; LMULMAX2-RV32-LABEL: bswap_v8i32:
231 ; LMULMAX2-RV32: # %bb.0:
232 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
233 ; LMULMAX2-RV32-NEXT: vle32.v v8, (a0)
234 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8
235 ; LMULMAX2-RV32-NEXT: lui a1, 16
236 ; LMULMAX2-RV32-NEXT: addi a1, a1, -256
237 ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
238 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24
239 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12
240 ; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1
241 ; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8
242 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24
243 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12
244 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
245 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
246 ; LMULMAX2-RV32-NEXT: ret
248 ; LMULMAX2-RV64-LABEL: bswap_v8i32:
249 ; LMULMAX2-RV64: # %bb.0:
250 ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
251 ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
252 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8
253 ; LMULMAX2-RV64-NEXT: lui a1, 16
254 ; LMULMAX2-RV64-NEXT: addi a1, a1, -256
255 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
256 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24
257 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12
258 ; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1
259 ; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8
260 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24
261 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12
262 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
263 ; LMULMAX2-RV64-NEXT: vse32.v v8, (a0)
264 ; LMULMAX2-RV64-NEXT: ret
266 ; LMULMAX1-RV32-LABEL: bswap_v8i32:
267 ; LMULMAX1-RV32: # %bb.0:
268 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
269 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
270 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a1)
271 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a0)
272 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8
273 ; LMULMAX1-RV32-NEXT: lui a2, 16
274 ; LMULMAX1-RV32-NEXT: addi a2, a2, -256
275 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
276 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24
277 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11
278 ; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2
279 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8
280 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24
281 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11
282 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
283 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8
284 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
285 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24
286 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11
287 ; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2
288 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8
289 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24
290 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11
291 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
292 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0)
293 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
294 ; LMULMAX1-RV32-NEXT: ret
296 ; LMULMAX1-RV64-LABEL: bswap_v8i32:
297 ; LMULMAX1-RV64: # %bb.0:
298 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
299 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
300 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a1)
301 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a0)
302 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8
303 ; LMULMAX1-RV64-NEXT: lui a2, 16
304 ; LMULMAX1-RV64-NEXT: addi a2, a2, -256
305 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
306 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24
307 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11
308 ; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2
309 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8
310 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24
311 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11
312 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
313 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8
314 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
315 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24
316 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11
317 ; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2
318 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8
319 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24
320 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11
321 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
322 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
323 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
324 ; LMULMAX1-RV64-NEXT: ret
326 ; ZVKB-LABEL: bswap_v8i32:
328 ; ZVKB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
329 ; ZVKB-NEXT: vle32.v v8, (a0)
330 ; ZVKB-NEXT: vrev8.v v8, v8
331 ; ZVKB-NEXT: vse32.v v8, (a0)
333 %a = load <8 x i32>, ptr %x
334 %b = load <8 x i32>, ptr %y
335 %c = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
336 store <8 x i32> %c, ptr %x
339 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
341 define void @bswap_v4i64(ptr %x, ptr %y) {
342 ; LMULMAX2-RV32-LABEL: bswap_v4i64:
343 ; LMULMAX2-RV32: # %bb.0:
344 ; LMULMAX2-RV32-NEXT: addi sp, sp, -16
345 ; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16
346 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
347 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
348 ; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
349 ; LMULMAX2-RV32-NEXT: lui a1, 1044480
350 ; LMULMAX2-RV32-NEXT: sw a1, 8(sp)
351 ; LMULMAX2-RV32-NEXT: li a1, 56
352 ; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1
353 ; LMULMAX2-RV32-NEXT: li a2, 40
354 ; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2
355 ; LMULMAX2-RV32-NEXT: lui a3, 16
356 ; LMULMAX2-RV32-NEXT: addi a3, a3, -256
357 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3
358 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10
359 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24
360 ; LMULMAX2-RV32-NEXT: addi a4, sp, 8
361 ; LMULMAX2-RV32-NEXT: vlse64.v v14, (a4), zero
362 ; LMULMAX2-RV32-NEXT: lui a4, 4080
363 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4
364 ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8
365 ; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14
366 ; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12
367 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10
368 ; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1
369 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3
370 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2
371 ; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16
372 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4
373 ; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24
374 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14
375 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8
376 ; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8
377 ; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8
378 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
379 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
380 ; LMULMAX2-RV32-NEXT: addi sp, sp, 16
381 ; LMULMAX2-RV32-NEXT: ret
383 ; LMULMAX2-RV64-LABEL: bswap_v4i64:
384 ; LMULMAX2-RV64: # %bb.0:
385 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
386 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
387 ; LMULMAX2-RV64-NEXT: li a1, 56
388 ; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1
389 ; LMULMAX2-RV64-NEXT: li a2, 40
390 ; LMULMAX2-RV64-NEXT: vsrl.vx v12, v8, a2
391 ; LMULMAX2-RV64-NEXT: lui a3, 16
392 ; LMULMAX2-RV64-NEXT: addiw a3, a3, -256
393 ; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3
394 ; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10
395 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24
396 ; LMULMAX2-RV64-NEXT: lui a4, 4080
397 ; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4
398 ; LMULMAX2-RV64-NEXT: vsrl.vi v14, v8, 8
399 ; LMULMAX2-RV64-NEXT: li a5, 255
400 ; LMULMAX2-RV64-NEXT: slli a5, a5, 24
401 ; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a5
402 ; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12
403 ; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10
404 ; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a5
405 ; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8
406 ; LMULMAX2-RV64-NEXT: vand.vx v14, v8, a4
407 ; LMULMAX2-RV64-NEXT: vsll.vi v14, v14, 24
408 ; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12
409 ; LMULMAX2-RV64-NEXT: vsll.vx v14, v8, a1
410 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a3
411 ; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2
412 ; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8
413 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12
414 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
415 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
416 ; LMULMAX2-RV64-NEXT: ret
418 ; LMULMAX1-RV32-LABEL: bswap_v4i64:
419 ; LMULMAX1-RV32: # %bb.0:
420 ; LMULMAX1-RV32-NEXT: addi sp, sp, -16
421 ; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16
422 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
423 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
424 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
425 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
426 ; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
427 ; LMULMAX1-RV32-NEXT: lui a2, 1044480
428 ; LMULMAX1-RV32-NEXT: sw a2, 8(sp)
429 ; LMULMAX1-RV32-NEXT: li a2, 56
430 ; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2
431 ; LMULMAX1-RV32-NEXT: li a3, 40
432 ; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3
433 ; LMULMAX1-RV32-NEXT: lui a4, 16
434 ; LMULMAX1-RV32-NEXT: addi a4, a4, -256
435 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4
436 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10
437 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24
438 ; LMULMAX1-RV32-NEXT: addi a5, sp, 8
439 ; LMULMAX1-RV32-NEXT: vlse64.v v12, (a5), zero
440 ; LMULMAX1-RV32-NEXT: lui a5, 4080
441 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5
442 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8
443 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12
444 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11
445 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10
446 ; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v12
447 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8
448 ; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a5
449 ; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24
450 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11
451 ; LMULMAX1-RV32-NEXT: vsll.vx v13, v9, a2
452 ; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
453 ; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a3
454 ; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9
455 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11
456 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
457 ; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2
458 ; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a3
459 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4
460 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10
461 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24
462 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5
463 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 8
464 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12
465 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11
466 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10
467 ; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a2
468 ; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a4
469 ; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3
470 ; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v13
471 ; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a5
472 ; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24
473 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12
474 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8
475 ; LMULMAX1-RV32-NEXT: vor.vv v8, v13, v8
476 ; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8
477 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
478 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
479 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
480 ; LMULMAX1-RV32-NEXT: addi sp, sp, 16
481 ; LMULMAX1-RV32-NEXT: ret
483 ; LMULMAX1-RV64-LABEL: bswap_v4i64:
484 ; LMULMAX1-RV64: # %bb.0:
485 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
486 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
487 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a1)
488 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0)
489 ; LMULMAX1-RV64-NEXT: li a2, 56
490 ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2
491 ; LMULMAX1-RV64-NEXT: li a3, 40
492 ; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3
493 ; LMULMAX1-RV64-NEXT: lui a4, 16
494 ; LMULMAX1-RV64-NEXT: addiw a4, a4, -256
495 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4
496 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10
497 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24
498 ; LMULMAX1-RV64-NEXT: lui a5, 4080
499 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5
500 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8
501 ; LMULMAX1-RV64-NEXT: li a6, 255
502 ; LMULMAX1-RV64-NEXT: slli a6, a6, 24
503 ; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6
504 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11
505 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10
506 ; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a6
507 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8
508 ; LMULMAX1-RV64-NEXT: vand.vx v12, v8, a5
509 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24
510 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11
511 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2
512 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
513 ; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3
514 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8
515 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11
516 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
517 ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2
518 ; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3
519 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4
520 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10
521 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24
522 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5
523 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8
524 ; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6
525 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11
526 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10
527 ; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a6
528 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8
529 ; LMULMAX1-RV64-NEXT: vand.vx v12, v9, a5
530 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24
531 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11
532 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2
533 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
534 ; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3
535 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9
536 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11
537 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
538 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
539 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1)
540 ; LMULMAX1-RV64-NEXT: ret
542 ; ZVKB-LABEL: bswap_v4i64:
544 ; ZVKB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
545 ; ZVKB-NEXT: vle64.v v8, (a0)
546 ; ZVKB-NEXT: vrev8.v v8, v8
547 ; ZVKB-NEXT: vse64.v v8, (a0)
549 %a = load <4 x i64>, ptr %x
550 %b = load <4 x i64>, ptr %y
551 %c = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
552 store <4 x i64> %c, ptr %x
555 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)