1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \
3 ; RUN: < %s | FileCheck %s
5 declare <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64>, <2 x i64>, i32, <2 x i1>, i32, i32)
6 declare <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32>, <4 x i32>, i32, <4 x i1>, i32, i32)
7 declare <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16>, <8 x i16>, i32, <8 x i1>, i32, i32)
8 declare <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8>, <16 x i8>, i32, <16 x i1>, i32, i32)
10 declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double>, <2 x double>, i32, <2 x i1>, i32, i32)
11 declare <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float>, <4 x float>, i32, <4 x i1>, i32, i32)
13 define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
14 ; CHECK-LABEL: test_vp_splice_v2i64:
16 ; CHECK-NEXT: addi a0, a0, -5
17 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
18 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
19 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
20 ; CHECK-NEXT: vslideup.vx v8, v9, a0
23 %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
27 define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
28 ; CHECK-LABEL: test_vp_splice_v2i64_negative_offset:
30 ; CHECK-NEXT: addi a0, a0, -5
31 ; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
32 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
33 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
34 ; CHECK-NEXT: vslideup.vi v8, v9, 5
37 %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
41 define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
42 ; CHECK-LABEL: test_vp_splice_v2i64_masked:
44 ; CHECK-NEXT: addi a0, a0, -5
45 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
46 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
47 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
48 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
50 %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
54 define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
55 ; CHECK-LABEL: test_vp_splice_v4i32:
57 ; CHECK-NEXT: addi a0, a0, -5
58 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
59 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
60 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
61 ; CHECK-NEXT: vslideup.vx v8, v9, a0
64 %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
68 define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
69 ; CHECK-LABEL: test_vp_splice_v4i32_negative_offset:
71 ; CHECK-NEXT: addi a0, a0, -5
72 ; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
73 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
74 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
75 ; CHECK-NEXT: vslideup.vi v8, v9, 5
78 %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
82 define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
83 ; CHECK-LABEL: test_vp_splice_v4i32_masked:
85 ; CHECK-NEXT: addi a0, a0, -5
86 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
87 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
88 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
89 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
91 %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)
95 define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
96 ; CHECK-LABEL: test_vp_splice_v8i16:
98 ; CHECK-NEXT: addi a0, a0, -5
99 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
100 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
101 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
102 ; CHECK-NEXT: vslideup.vx v8, v9, a0
105 %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
109 define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
110 ; CHECK-LABEL: test_vp_splice_v8i16_negative_offset:
112 ; CHECK-NEXT: addi a0, a0, -5
113 ; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma
114 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
115 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
116 ; CHECK-NEXT: vslideup.vi v8, v9, 5
119 %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
123 define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
124 ; CHECK-LABEL: test_vp_splice_v8i16_masked:
126 ; CHECK-NEXT: addi a0, a0, -5
127 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
128 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
129 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
130 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
132 %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb)
136 define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
137 ; CHECK-LABEL: test_vp_splice_v16i8:
139 ; CHECK-NEXT: addi a0, a0, -5
140 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
141 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
142 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
143 ; CHECK-NEXT: vslideup.vx v8, v9, a0
146 %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
150 define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
151 ; CHECK-LABEL: test_vp_splice_v16i8_negative_offset:
153 ; CHECK-NEXT: addi a0, a0, -5
154 ; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma
155 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
156 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
157 ; CHECK-NEXT: vslideup.vi v8, v9, 5
160 %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
164 define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
165 ; CHECK-LABEL: test_vp_splice_v16i8_masked:
167 ; CHECK-NEXT: addi a0, a0, -5
168 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
169 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
170 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
171 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
173 %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb)
177 define <2 x double> @test_vp_splice_v2f64(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
178 ; CHECK-LABEL: test_vp_splice_v2f64:
180 ; CHECK-NEXT: addi a0, a0, -5
181 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
182 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
183 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
184 ; CHECK-NEXT: vslideup.vx v8, v9, a0
187 %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
191 define <2 x double> @test_vp_splice_v2f64_negative_offset(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
192 ; CHECK-LABEL: test_vp_splice_v2f64_negative_offset:
194 ; CHECK-NEXT: addi a0, a0, -5
195 ; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
196 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
197 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
198 ; CHECK-NEXT: vslideup.vi v8, v9, 5
201 %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
205 define <2 x double> @test_vp_splice_v2f64_masked(<2 x double> %va, <2 x double> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
206 ; CHECK-LABEL: test_vp_splice_v2f64_masked:
208 ; CHECK-NEXT: addi a0, a0, -5
209 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
210 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
211 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
212 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
214 %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
218 define <4 x float> @test_vp_splice_v4f32(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
219 ; CHECK-LABEL: test_vp_splice_v4f32:
221 ; CHECK-NEXT: addi a0, a0, -5
222 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
223 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
224 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
225 ; CHECK-NEXT: vslideup.vx v8, v9, a0
228 %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
232 define <4 x float> @test_vp_splice_v4f32_negative_offset(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
233 ; CHECK-LABEL: test_vp_splice_v4f32_negative_offset:
235 ; CHECK-NEXT: addi a0, a0, -5
236 ; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
237 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
238 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
239 ; CHECK-NEXT: vslideup.vi v8, v9, 5
242 %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
246 define <4 x float> @test_vp_splice_v4f32_masked(<4 x float> %va, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
247 ; CHECK-LABEL: test_vp_splice_v4f32_masked:
249 ; CHECK-NEXT: addi a0, a0, -5
250 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
251 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
252 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
253 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
255 %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)