1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b \
3 ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
4 ; RUN: | FileCheck %s --check-prefixes=CHECK,V
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \
6 ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
7 ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F
9 define void @vnsrl_0_i8(ptr %in, ptr %out) {
10 ; CHECK-LABEL: vnsrl_0_i8:
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
13 ; CHECK-NEXT: vle8.v v8, (a0)
14 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
15 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
16 ; CHECK-NEXT: vse8.v v8, (a1)
19 %0 = load <16 x i8>, ptr %in, align 1
20 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
21 store <8 x i8> %shuffle.i5, ptr %out, align 1
25 define void @vnsrl_8_i8(ptr %in, ptr %out) {
26 ; CHECK-LABEL: vnsrl_8_i8:
27 ; CHECK: # %bb.0: # %entry
28 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
29 ; CHECK-NEXT: vle8.v v8, (a0)
30 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
31 ; CHECK-NEXT: vnsrl.wi v8, v8, 8
32 ; CHECK-NEXT: vse8.v v8, (a1)
35 %0 = load <16 x i8>, ptr %in, align 1
36 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
37 store <8 x i8> %shuffle.i5, ptr %out, align 1
41 define void @vnsrl_0_i16(ptr %in, ptr %out) {
42 ; V-LABEL: vnsrl_0_i16:
43 ; V: # %bb.0: # %entry
44 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
45 ; V-NEXT: vle16.v v8, (a0)
46 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
47 ; V-NEXT: vnsrl.wi v8, v8, 0
48 ; V-NEXT: vse16.v v8, (a1)
51 ; ZVE32F-LABEL: vnsrl_0_i16:
52 ; ZVE32F: # %bb.0: # %entry
53 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
54 ; ZVE32F-NEXT: vle16.v v8, (a0)
55 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
56 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
57 ; ZVE32F-NEXT: vse16.v v8, (a1)
60 %0 = load <8 x i16>, ptr %in, align 2
61 %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
62 store <4 x i16> %shuffle.i5, ptr %out, align 2
66 define void @vnsrl_16_i16(ptr %in, ptr %out) {
67 ; V-LABEL: vnsrl_16_i16:
68 ; V: # %bb.0: # %entry
69 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
70 ; V-NEXT: vle16.v v8, (a0)
71 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
72 ; V-NEXT: vnsrl.wi v8, v8, 16
73 ; V-NEXT: vse16.v v8, (a1)
76 ; ZVE32F-LABEL: vnsrl_16_i16:
77 ; ZVE32F: # %bb.0: # %entry
78 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
79 ; ZVE32F-NEXT: vle16.v v8, (a0)
80 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
81 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 16
82 ; ZVE32F-NEXT: vse16.v v8, (a1)
85 %0 = load <8 x i16>, ptr %in, align 2
86 %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
87 store <4 x i16> %shuffle.i5, ptr %out, align 2
91 define void @vnsrl_0_half(ptr %in, ptr %out) {
92 ; V-LABEL: vnsrl_0_half:
93 ; V: # %bb.0: # %entry
94 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
95 ; V-NEXT: vle16.v v8, (a0)
96 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
97 ; V-NEXT: vnsrl.wi v8, v8, 0
98 ; V-NEXT: vse16.v v8, (a1)
101 ; ZVE32F-LABEL: vnsrl_0_half:
102 ; ZVE32F: # %bb.0: # %entry
103 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
104 ; ZVE32F-NEXT: vle16.v v8, (a0)
105 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
106 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
107 ; ZVE32F-NEXT: vse16.v v8, (a1)
110 %0 = load <8 x half>, ptr %in, align 2
111 %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
112 store <4 x half> %shuffle.i5, ptr %out, align 2
116 define void @vnsrl_16_half(ptr %in, ptr %out) {
117 ; V-LABEL: vnsrl_16_half:
118 ; V: # %bb.0: # %entry
119 ; V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
120 ; V-NEXT: vle16.v v8, (a0)
121 ; V-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
122 ; V-NEXT: vnsrl.wi v8, v8, 16
123 ; V-NEXT: vse16.v v8, (a1)
126 ; ZVE32F-LABEL: vnsrl_16_half:
127 ; ZVE32F: # %bb.0: # %entry
128 ; ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
129 ; ZVE32F-NEXT: vle16.v v8, (a0)
130 ; ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
131 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 16
132 ; ZVE32F-NEXT: vse16.v v8, (a1)
135 %0 = load <8 x half>, ptr %in, align 2
136 %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
137 store <4 x half> %shuffle.i5, ptr %out, align 2
141 define void @vnsrl_0_i32(ptr %in, ptr %out) {
142 ; V-LABEL: vnsrl_0_i32:
143 ; V: # %bb.0: # %entry
144 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
145 ; V-NEXT: vle32.v v8, (a0)
146 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
147 ; V-NEXT: vnsrl.wi v8, v8, 0
148 ; V-NEXT: vse32.v v8, (a1)
151 ; ZVE32F-LABEL: vnsrl_0_i32:
152 ; ZVE32F: # %bb.0: # %entry
153 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
154 ; ZVE32F-NEXT: vle32.v v8, (a0)
155 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
156 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
157 ; ZVE32F-NEXT: vslideup.vi v8, v9, 1
158 ; ZVE32F-NEXT: vse32.v v8, (a1)
161 %0 = load <4 x i32>, ptr %in, align 4
162 %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
163 store <2 x i32> %shuffle.i5, ptr %out, align 4
167 define void @vnsrl_32_i32(ptr %in, ptr %out) {
168 ; V-LABEL: vnsrl_32_i32:
169 ; V: # %bb.0: # %entry
170 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
171 ; V-NEXT: vle32.v v8, (a0)
173 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
174 ; V-NEXT: vnsrl.wx v8, v8, a0
175 ; V-NEXT: vse32.v v8, (a1)
178 ; ZVE32F-LABEL: vnsrl_32_i32:
179 ; ZVE32F: # %bb.0: # %entry
180 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
181 ; ZVE32F-NEXT: vle32.v v8, (a0)
182 ; ZVE32F-NEXT: vmv.v.i v0, 1
183 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
184 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
185 ; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
186 ; ZVE32F-NEXT: vse32.v v9, (a1)
189 %0 = load <4 x i32>, ptr %in, align 4
190 %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
191 store <2 x i32> %shuffle.i5, ptr %out, align 4
195 define void @vnsrl_0_float(ptr %in, ptr %out) {
196 ; V-LABEL: vnsrl_0_float:
197 ; V: # %bb.0: # %entry
198 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
199 ; V-NEXT: vle32.v v8, (a0)
200 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
201 ; V-NEXT: vnsrl.wi v8, v8, 0
202 ; V-NEXT: vse32.v v8, (a1)
205 ; ZVE32F-LABEL: vnsrl_0_float:
206 ; ZVE32F: # %bb.0: # %entry
207 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
208 ; ZVE32F-NEXT: vle32.v v8, (a0)
209 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
210 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
211 ; ZVE32F-NEXT: vslideup.vi v8, v9, 1
212 ; ZVE32F-NEXT: vse32.v v8, (a1)
215 %0 = load <4 x float>, ptr %in, align 4
216 %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
217 store <2 x float> %shuffle.i5, ptr %out, align 4
221 define void @vnsrl_32_float(ptr %in, ptr %out) {
222 ; V-LABEL: vnsrl_32_float:
223 ; V: # %bb.0: # %entry
224 ; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
225 ; V-NEXT: vle32.v v8, (a0)
227 ; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
228 ; V-NEXT: vnsrl.wx v8, v8, a0
229 ; V-NEXT: vse32.v v8, (a1)
232 ; ZVE32F-LABEL: vnsrl_32_float:
233 ; ZVE32F: # %bb.0: # %entry
234 ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
235 ; ZVE32F-NEXT: vle32.v v8, (a0)
236 ; ZVE32F-NEXT: vmv.v.i v0, 1
237 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
238 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
239 ; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
240 ; ZVE32F-NEXT: vse32.v v9, (a1)
243 %0 = load <4 x float>, ptr %in, align 4
244 %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>
245 store <2 x float> %shuffle.i5, ptr %out, align 4
249 define void @vnsrl_0_i64(ptr %in, ptr %out) {
250 ; V-LABEL: vnsrl_0_i64:
251 ; V: # %bb.0: # %entry
252 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
253 ; V-NEXT: vle64.v v8, (a0)
254 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
255 ; V-NEXT: vslidedown.vi v9, v8, 2
256 ; V-NEXT: vslideup.vi v8, v9, 1
257 ; V-NEXT: vse64.v v8, (a1)
260 ; ZVE32F-LABEL: vnsrl_0_i64:
261 ; ZVE32F: # %bb.0: # %entry
262 ; ZVE32F-NEXT: ld a2, 0(a0)
263 ; ZVE32F-NEXT: ld a0, 16(a0)
264 ; ZVE32F-NEXT: sd a2, 0(a1)
265 ; ZVE32F-NEXT: sd a0, 8(a1)
268 %0 = load <4 x i64>, ptr %in, align 8
269 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
270 store <2 x i64> %shuffle.i5, ptr %out, align 8
274 define void @vnsrl_64_i64(ptr %in, ptr %out) {
275 ; V-LABEL: vnsrl_64_i64:
276 ; V: # %bb.0: # %entry
277 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
278 ; V-NEXT: vle64.v v8, (a0)
279 ; V-NEXT: vmv.v.i v0, 1
280 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
281 ; V-NEXT: vslidedown.vi v9, v8, 2
282 ; V-NEXT: vrgather.vi v9, v8, 1, v0.t
283 ; V-NEXT: vse64.v v9, (a1)
286 ; ZVE32F-LABEL: vnsrl_64_i64:
287 ; ZVE32F: # %bb.0: # %entry
288 ; ZVE32F-NEXT: ld a2, 8(a0)
289 ; ZVE32F-NEXT: ld a0, 24(a0)
290 ; ZVE32F-NEXT: sd a2, 0(a1)
291 ; ZVE32F-NEXT: sd a0, 8(a1)
294 %0 = load <4 x i64>, ptr %in, align 8
295 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
296 store <2 x i64> %shuffle.i5, ptr %out, align 8
300 define void @vnsrl_0_double(ptr %in, ptr %out) {
301 ; V-LABEL: vnsrl_0_double:
302 ; V: # %bb.0: # %entry
303 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
304 ; V-NEXT: vle64.v v8, (a0)
305 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
306 ; V-NEXT: vslidedown.vi v9, v8, 2
307 ; V-NEXT: vslideup.vi v8, v9, 1
308 ; V-NEXT: vse64.v v8, (a1)
311 ; ZVE32F-LABEL: vnsrl_0_double:
312 ; ZVE32F: # %bb.0: # %entry
313 ; ZVE32F-NEXT: ld a2, 0(a0)
314 ; ZVE32F-NEXT: ld a0, 16(a0)
315 ; ZVE32F-NEXT: sd a2, 0(a1)
316 ; ZVE32F-NEXT: sd a0, 8(a1)
319 %0 = load <4 x double>, ptr %in, align 8
320 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>
321 store <2 x double> %shuffle.i5, ptr %out, align 8
325 define void @vnsrl_64_double(ptr %in, ptr %out) {
326 ; V-LABEL: vnsrl_64_double:
327 ; V: # %bb.0: # %entry
328 ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
329 ; V-NEXT: vle64.v v8, (a0)
330 ; V-NEXT: vmv.v.i v0, 1
331 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
332 ; V-NEXT: vslidedown.vi v9, v8, 2
333 ; V-NEXT: vrgather.vi v9, v8, 1, v0.t
334 ; V-NEXT: vse64.v v9, (a1)
337 ; ZVE32F-LABEL: vnsrl_64_double:
338 ; ZVE32F: # %bb.0: # %entry
339 ; ZVE32F-NEXT: ld a2, 8(a0)
340 ; ZVE32F-NEXT: ld a0, 24(a0)
341 ; ZVE32F-NEXT: sd a2, 0(a1)
342 ; ZVE32F-NEXT: sd a0, 8(a1)
345 %0 = load <4 x double>, ptr %in, align 8
346 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
347 store <2 x double> %shuffle.i5, ptr %out, align 8
351 define void @vnsrl_0_i8_undef(ptr %in, ptr %out) {
352 ; CHECK-LABEL: vnsrl_0_i8_undef:
353 ; CHECK: # %bb.0: # %entry
354 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
355 ; CHECK-NEXT: vle8.v v8, (a0)
356 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
357 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
358 ; CHECK-NEXT: vse8.v v8, (a1)
361 %0 = load <16 x i8>, ptr %in, align 1
362 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 undef>
363 store <8 x i8> %shuffle.i5, ptr %out, align 1
367 define void @vnsrl_0_i8_undef2(ptr %in, ptr %out) {
368 ; CHECK-LABEL: vnsrl_0_i8_undef2:
369 ; CHECK: # %bb.0: # %entry
370 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
371 ; CHECK-NEXT: vle8.v v8, (a0)
372 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
373 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
374 ; CHECK-NEXT: vse8.v v8, (a1)
377 %0 = load <16 x i8>, ptr %in, align 1
378 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14>
379 store <8 x i8> %shuffle.i5, ptr %out, align 1
383 ; TODO: Allow an undef initial element
384 define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) {
385 ; CHECK-LABEL: vnsrl_0_i8_undef3:
386 ; CHECK: # %bb.0: # %entry
387 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
388 ; CHECK-NEXT: vle8.v v8, (a0)
389 ; CHECK-NEXT: li a0, -32
390 ; CHECK-NEXT: vmv.s.x v0, a0
391 ; CHECK-NEXT: lui a0, 24640
392 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
393 ; CHECK-NEXT: vid.v v9
394 ; CHECK-NEXT: addi a0, a0, 6
395 ; CHECK-NEXT: vadd.vv v9, v9, v9
396 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
397 ; CHECK-NEXT: vmv.v.x v10, a0
398 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
399 ; CHECK-NEXT: vadd.vi v9, v9, -8
400 ; CHECK-NEXT: vrgather.vv v11, v8, v10
401 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
402 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
403 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
404 ; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t
405 ; CHECK-NEXT: vse8.v v11, (a1)
408 %0 = load <16 x i8>, ptr %in, align 1
409 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 6, i32 10, i32 12, i32 14>
410 store <8 x i8> %shuffle.i5, ptr %out, align 1
414 ; Not a vnsrl (checking for a prior pattern matching bug)
415 define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) {
416 ; CHECK-LABEL: vnsrl_0_i8_undef_negative:
417 ; CHECK: # %bb.0: # %entry
418 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
419 ; CHECK-NEXT: vle8.v v8, (a0)
420 ; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
421 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0)
422 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
423 ; CHECK-NEXT: vid.v v9
424 ; CHECK-NEXT: vle8.v v10, (a0)
425 ; CHECK-NEXT: li a0, 48
426 ; CHECK-NEXT: vadd.vv v9, v9, v9
427 ; CHECK-NEXT: vmv.s.x v0, a0
428 ; CHECK-NEXT: vadd.vi v9, v9, -8
429 ; CHECK-NEXT: vrgather.vv v11, v8, v10
430 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
431 ; CHECK-NEXT: vslidedown.vi v8, v8, 8
432 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
433 ; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t
434 ; CHECK-NEXT: vse8.v v11, (a1)
437 %0 = load <16 x i8>, ptr %in, align 1
438 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 1>
439 store <8 x i8> %shuffle.i5, ptr %out, align 1
443 define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) {
444 ; V-LABEL: vnsrl_0_i8_single_src:
445 ; V: # %bb.0: # %entry
446 ; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
447 ; V-NEXT: vle8.v v8, (a0)
448 ; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
449 ; V-NEXT: vnsrl.wi v8, v8, 0
450 ; V-NEXT: vse8.v v8, (a1)
453 ; ZVE32F-LABEL: vnsrl_0_i8_single_src:
454 ; ZVE32F: # %bb.0: # %entry
455 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
456 ; ZVE32F-NEXT: vle8.v v8, (a0)
457 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
458 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
459 ; ZVE32F-NEXT: vse8.v v8, (a1)
462 %0 = load <8 x i8>, ptr %in, align 1
463 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
464 store <4 x i8> %shuffle.i5, ptr %out, align 1
468 define void @vnsrl_8_i8_single_src(ptr %in, ptr %out) {
469 ; V-LABEL: vnsrl_8_i8_single_src:
470 ; V: # %bb.0: # %entry
471 ; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
472 ; V-NEXT: vle8.v v8, (a0)
473 ; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
474 ; V-NEXT: vnsrl.wi v8, v8, 8
475 ; V-NEXT: vse8.v v8, (a1)
478 ; ZVE32F-LABEL: vnsrl_8_i8_single_src:
479 ; ZVE32F: # %bb.0: # %entry
480 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
481 ; ZVE32F-NEXT: vle8.v v8, (a0)
482 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
483 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 8
484 ; ZVE32F-NEXT: vse8.v v8, (a1)
487 %0 = load <8 x i8>, ptr %in, align 1
488 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
489 store <4 x i8> %shuffle.i5, ptr %out, align 1
493 define void @vnsrl_0_i8_single_wideuse(ptr %in, ptr %out) {
494 ; V-LABEL: vnsrl_0_i8_single_wideuse:
495 ; V: # %bb.0: # %entry
496 ; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
497 ; V-NEXT: vle8.v v8, (a0)
498 ; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
499 ; V-NEXT: vnsrl.wi v8, v8, 0
500 ; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
501 ; V-NEXT: vse8.v v8, (a1)
504 ; ZVE32F-LABEL: vnsrl_0_i8_single_wideuse:
505 ; ZVE32F: # %bb.0: # %entry
506 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
507 ; ZVE32F-NEXT: vle8.v v8, (a0)
508 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
509 ; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
510 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
511 ; ZVE32F-NEXT: vse8.v v8, (a1)
514 %0 = load <8 x i8>, ptr %in, align 1
515 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
516 store <8 x i8> %shuffle.i5, ptr %out, align 1
520 ; Can't match the m8 result type as the source would have to be m16 which
521 ; isn't a legal type.
522 define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
523 ; V-LABEL: vnsrl_0_i32_single_src_m8:
524 ; V: # %bb.0: # %entry
526 ; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
527 ; V-NEXT: vle32.v v8, (a0)
528 ; V-NEXT: lui a0, 341
529 ; V-NEXT: addiw a0, a0, 1365
530 ; V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
531 ; V-NEXT: vmv.s.x v16, a0
532 ; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
533 ; V-NEXT: vcompress.vm v24, v8, v16
534 ; V-NEXT: vse32.v v24, (a1)
537 ; ZVE32F-LABEL: vnsrl_0_i32_single_src_m8:
538 ; ZVE32F: # %bb.0: # %entry
539 ; ZVE32F-NEXT: li a2, 64
540 ; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
541 ; ZVE32F-NEXT: vle32.v v8, (a0)
542 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
543 ; ZVE32F-NEXT: vmv.v.i v16, 0
544 ; ZVE32F-NEXT: lui a0, 341
545 ; ZVE32F-NEXT: addi a0, a0, 1365
546 ; ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma
547 ; ZVE32F-NEXT: vmv.s.x v16, a0
548 ; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
549 ; ZVE32F-NEXT: vcompress.vm v24, v8, v16
550 ; ZVE32F-NEXT: vse32.v v24, (a1)
553 %0 = load <64 x i32>, ptr %in, align 4
554 %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
555 store <64 x i32> %shuffle.i5, ptr %out, align 4