1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvl256b \
3 ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
4 ; RUN: | FileCheck %s --check-prefixes=CHECK,V
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zfh,+zvfh,+zvl256b \
6 ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
7 ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F
9 define void @vnsrl_0_i8(ptr %in, ptr %out) {
10 ; CHECK-LABEL: vnsrl_0_i8:
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
13 ; CHECK-NEXT: vle8.v v8, (a0)
14 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
15 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
16 ; CHECK-NEXT: vse8.v v8, (a1)
19 %0 = load <16 x i8>, ptr %in, align 1
20 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
21 store <8 x i8> %shuffle.i5, ptr %out, align 1
25 define void @vnsrl_8_i8(ptr %in, ptr %out) {
26 ; CHECK-LABEL: vnsrl_8_i8:
27 ; CHECK: # %bb.0: # %entry
28 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
29 ; CHECK-NEXT: vle8.v v8, (a0)
30 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
31 ; CHECK-NEXT: vnsrl.wi v8, v8, 8
32 ; CHECK-NEXT: vse8.v v8, (a1)
35 %0 = load <16 x i8>, ptr %in, align 1
36 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
37 store <8 x i8> %shuffle.i5, ptr %out, align 1
41 define void @vnsrl_0_i16(ptr %in, ptr %out) {
42 ; V-LABEL: vnsrl_0_i16:
43 ; V: # %bb.0: # %entry
44 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
45 ; V-NEXT: vle16.v v8, (a0)
46 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
47 ; V-NEXT: vnsrl.wi v8, v8, 0
48 ; V-NEXT: vse16.v v8, (a1)
51 ; ZVE32F-LABEL: vnsrl_0_i16:
52 ; ZVE32F: # %bb.0: # %entry
53 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
54 ; ZVE32F-NEXT: vle16.v v8, (a0)
55 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
56 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
57 ; ZVE32F-NEXT: vse16.v v8, (a1)
60 %0 = load <8 x i16>, ptr %in, align 2
61 %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
62 store <4 x i16> %shuffle.i5, ptr %out, align 2
66 define void @vnsrl_16_i16(ptr %in, ptr %out) {
67 ; V-LABEL: vnsrl_16_i16:
68 ; V: # %bb.0: # %entry
69 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
70 ; V-NEXT: vle16.v v8, (a0)
71 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
72 ; V-NEXT: vnsrl.wi v8, v8, 16
73 ; V-NEXT: vse16.v v8, (a1)
76 ; ZVE32F-LABEL: vnsrl_16_i16:
77 ; ZVE32F: # %bb.0: # %entry
78 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
79 ; ZVE32F-NEXT: vle16.v v8, (a0)
80 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
81 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 16
82 ; ZVE32F-NEXT: vse16.v v8, (a1)
85 %0 = load <8 x i16>, ptr %in, align 2
86 %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
87 store <4 x i16> %shuffle.i5, ptr %out, align 2
91 define void @vnsrl_0_half(ptr %in, ptr %out) {
92 ; V-LABEL: vnsrl_0_half:
93 ; V: # %bb.0: # %entry
94 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
95 ; V-NEXT: vle16.v v8, (a0)
96 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
97 ; V-NEXT: vnsrl.wi v8, v8, 0
98 ; V-NEXT: vse16.v v8, (a1)
101 ; ZVE32F-LABEL: vnsrl_0_half:
102 ; ZVE32F: # %bb.0: # %entry
103 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
104 ; ZVE32F-NEXT: vle16.v v8, (a0)
105 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
106 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
107 ; ZVE32F-NEXT: vse16.v v8, (a1)
110 %0 = load <8 x half>, ptr %in, align 2
111 %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
112 store <4 x half> %shuffle.i5, ptr %out, align 2
116 define void @vnsrl_16_half(ptr %in, ptr %out) {
117 ; V-LABEL: vnsrl_16_half:
118 ; V: # %bb.0: # %entry
119 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
120 ; V-NEXT: vle16.v v8, (a0)
121 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
122 ; V-NEXT: vnsrl.wi v8, v8, 16
123 ; V-NEXT: vse16.v v8, (a1)
126 ; ZVE32F-LABEL: vnsrl_16_half:
127 ; ZVE32F: # %bb.0: # %entry
128 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
129 ; ZVE32F-NEXT: vle16.v v8, (a0)
130 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
131 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 16
132 ; ZVE32F-NEXT: vse16.v v8, (a1)
135 %0 = load <8 x half>, ptr %in, align 2
136 %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
137 store <4 x half> %shuffle.i5, ptr %out, align 2
141 define void @vnsrl_0_i32(ptr %in, ptr %out) {
142 ; V-LABEL: vnsrl_0_i32:
143 ; V: # %bb.0: # %entry
144 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
145 ; V-NEXT: vle32.v v8, (a0)
146 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
147 ; V-NEXT: vnsrl.wi v8, v8, 0
148 ; V-NEXT: vse32.v v8, (a1)
151 ; ZVE32F-LABEL: vnsrl_0_i32:
152 ; ZVE32F: # %bb.0: # %entry
153 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
154 ; ZVE32F-NEXT: vle32.v v8, (a0)
155 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
156 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
157 ; ZVE32F-NEXT: vslideup.vi v8, v9, 1
158 ; ZVE32F-NEXT: vse32.v v8, (a1)
161 %0 = load <4 x i32>, ptr %in, align 4
162 %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
163 store <2 x i32> %shuffle.i5, ptr %out, align 4
167 define void @vnsrl_32_i32(ptr %in, ptr %out) {
168 ; V-LABEL: vnsrl_32_i32:
169 ; V: # %bb.0: # %entry
170 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
171 ; V-NEXT: vle32.v v8, (a0)
173 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
174 ; V-NEXT: vnsrl.wx v8, v8, a0
175 ; V-NEXT: vse32.v v8, (a1)
178 ; ZVE32F-LABEL: vnsrl_32_i32:
179 ; ZVE32F: # %bb.0: # %entry
180 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
181 ; ZVE32F-NEXT: vle32.v v8, (a0)
182 ; ZVE32F-NEXT: vmv.v.i v0, 1
183 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
184 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
185 ; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
186 ; ZVE32F-NEXT: vse32.v v9, (a1)
189 %0 = load <4 x i32>, ptr %in, align 4
190 %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
191 store <2 x i32> %shuffle.i5, ptr %out, align 4
195 define void @vnsrl_0_float(ptr %in, ptr %out) {
196 ; V-LABEL: vnsrl_0_float:
197 ; V: # %bb.0: # %entry
198 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
199 ; V-NEXT: vle32.v v8, (a0)
200 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
201 ; V-NEXT: vnsrl.wi v8, v8, 0
202 ; V-NEXT: vse32.v v8, (a1)
205 ; ZVE32F-LABEL: vnsrl_0_float:
206 ; ZVE32F: # %bb.0: # %entry
207 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
208 ; ZVE32F-NEXT: vle32.v v8, (a0)
209 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
210 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
211 ; ZVE32F-NEXT: vslideup.vi v8, v9, 1
212 ; ZVE32F-NEXT: vse32.v v8, (a1)
215 %0 = load <4 x float>, ptr %in, align 4
216 %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
217 store <2 x float> %shuffle.i5, ptr %out, align 4
221 define void @vnsrl_32_float(ptr %in, ptr %out) {
222 ; V-LABEL: vnsrl_32_float:
223 ; V: # %bb.0: # %entry
224 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
225 ; V-NEXT: vle32.v v8, (a0)
227 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
228 ; V-NEXT: vnsrl.wx v8, v8, a0
229 ; V-NEXT: vse32.v v8, (a1)
232 ; ZVE32F-LABEL: vnsrl_32_float:
233 ; ZVE32F: # %bb.0: # %entry
234 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
235 ; ZVE32F-NEXT: vle32.v v8, (a0)
236 ; ZVE32F-NEXT: vmv.v.i v0, 1
237 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
238 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
239 ; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
240 ; ZVE32F-NEXT: vse32.v v9, (a1)
243 %0 = load <4 x float>, ptr %in, align 4
244 %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>
245 store <2 x float> %shuffle.i5, ptr %out, align 4
249 define void @vnsrl_0_i64(ptr %in, ptr %out) {
250 ; V-LABEL: vnsrl_0_i64:
251 ; V: # %bb.0: # %entry
252 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
253 ; V-NEXT: vle64.v v8, (a0)
254 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
255 ; V-NEXT: vslidedown.vi v9, v8, 2
256 ; V-NEXT: vslideup.vi v8, v9, 1
257 ; V-NEXT: vse64.v v8, (a1)
260 ; ZVE32F-LABEL: vnsrl_0_i64:
261 ; ZVE32F: # %bb.0: # %entry
262 ; ZVE32F-NEXT: ld a2, 16(a0)
263 ; ZVE32F-NEXT: ld a0, 0(a0)
264 ; ZVE32F-NEXT: sd a2, 8(a1)
265 ; ZVE32F-NEXT: sd a0, 0(a1)
268 %0 = load <4 x i64>, ptr %in, align 8
269 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
270 store <2 x i64> %shuffle.i5, ptr %out, align 8
274 define void @vnsrl_64_i64(ptr %in, ptr %out) {
275 ; V-LABEL: vnsrl_64_i64:
276 ; V: # %bb.0: # %entry
277 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
278 ; V-NEXT: vle64.v v8, (a0)
279 ; V-NEXT: vmv.v.i v0, 1
280 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
281 ; V-NEXT: vslidedown.vi v9, v8, 2
282 ; V-NEXT: vrgather.vi v9, v8, 1, v0.t
283 ; V-NEXT: vse64.v v9, (a1)
286 ; ZVE32F-LABEL: vnsrl_64_i64:
287 ; ZVE32F: # %bb.0: # %entry
288 ; ZVE32F-NEXT: ld a2, 24(a0)
289 ; ZVE32F-NEXT: ld a0, 8(a0)
290 ; ZVE32F-NEXT: sd a2, 8(a1)
291 ; ZVE32F-NEXT: sd a0, 0(a1)
294 %0 = load <4 x i64>, ptr %in, align 8
295 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
296 store <2 x i64> %shuffle.i5, ptr %out, align 8
300 define void @vnsrl_0_double(ptr %in, ptr %out) {
301 ; V-LABEL: vnsrl_0_double:
302 ; V: # %bb.0: # %entry
303 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
304 ; V-NEXT: vle64.v v8, (a0)
305 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
306 ; V-NEXT: vslidedown.vi v9, v8, 2
307 ; V-NEXT: vslideup.vi v8, v9, 1
308 ; V-NEXT: vse64.v v8, (a1)
311 ; ZVE32F-LABEL: vnsrl_0_double:
312 ; ZVE32F: # %bb.0: # %entry
313 ; ZVE32F-NEXT: ld a2, 16(a0)
314 ; ZVE32F-NEXT: ld a0, 0(a0)
315 ; ZVE32F-NEXT: sd a2, 8(a1)
316 ; ZVE32F-NEXT: sd a0, 0(a1)
319 %0 = load <4 x double>, ptr %in, align 8
320 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>
321 store <2 x double> %shuffle.i5, ptr %out, align 8
325 define void @vnsrl_64_double(ptr %in, ptr %out) {
326 ; V-LABEL: vnsrl_64_double:
327 ; V: # %bb.0: # %entry
328 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
329 ; V-NEXT: vle64.v v8, (a0)
330 ; V-NEXT: vmv.v.i v0, 1
331 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
332 ; V-NEXT: vslidedown.vi v9, v8, 2
333 ; V-NEXT: vrgather.vi v9, v8, 1, v0.t
334 ; V-NEXT: vse64.v v9, (a1)
337 ; ZVE32F-LABEL: vnsrl_64_double:
338 ; ZVE32F: # %bb.0: # %entry
339 ; ZVE32F-NEXT: ld a2, 24(a0)
340 ; ZVE32F-NEXT: ld a0, 8(a0)
341 ; ZVE32F-NEXT: sd a2, 8(a1)
342 ; ZVE32F-NEXT: sd a0, 0(a1)
345 %0 = load <4 x double>, ptr %in, align 8
346 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
347 store <2 x double> %shuffle.i5, ptr %out, align 8