1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \
3 ; RUN: < %s | FileCheck %s
5 declare <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64>, <2 x i64>, i32, <2 x i1>, i32, i32)
6 declare <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32>, <4 x i32>, i32, <4 x i1>, i32, i32)
7 declare <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16>, <8 x i16>, i32, <8 x i1>, i32, i32)
8 declare <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8>, <16 x i8>, i32, <16 x i1>, i32, i32)
10 declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double>, <2 x double>, i32, <2 x i1>, i32, i32)
11 declare <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float>, <4 x float>, i32, <4 x i1>, i32, i32)
13 define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
14 ; CHECK-LABEL: test_vp_splice_v2i64:
16 ; CHECK-NEXT: addi a0, a0, -5
17 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
18 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
19 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
20 ; CHECK-NEXT: vslideup.vx v8, v9, a0
22 %head = insertelement <2 x i1> undef, i1 1, i32 0
23 %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
25 %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb)
29 define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
30 ; CHECK-LABEL: test_vp_splice_v2i64_negative_offset:
32 ; CHECK-NEXT: addi a0, a0, -5
33 ; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
34 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
35 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
36 ; CHECK-NEXT: vslideup.vi v8, v9, 5
38 %head = insertelement <2 x i1> undef, i1 1, i32 0
39 %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
41 %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb)
45 define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
46 ; CHECK-LABEL: test_vp_splice_v2i64_masked:
48 ; CHECK-NEXT: addi a0, a0, -5
49 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
50 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
51 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
52 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
54 %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
58 define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
59 ; CHECK-LABEL: test_vp_splice_v4i32:
61 ; CHECK-NEXT: addi a0, a0, -5
62 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
63 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
64 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
65 ; CHECK-NEXT: vslideup.vx v8, v9, a0
67 %head = insertelement <4 x i1> undef, i1 1, i32 0
68 %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
70 %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %allones, i32 %evla, i32 %evlb)
74 define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
75 ; CHECK-LABEL: test_vp_splice_v4i32_negative_offset:
77 ; CHECK-NEXT: addi a0, a0, -5
78 ; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
79 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
80 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
81 ; CHECK-NEXT: vslideup.vi v8, v9, 5
83 %head = insertelement <4 x i1> undef, i1 1, i32 0
84 %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
86 %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> %allones, i32 %evla, i32 %evlb)
90 define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
91 ; CHECK-LABEL: test_vp_splice_v4i32_masked:
93 ; CHECK-NEXT: addi a0, a0, -5
94 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
95 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
96 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
97 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
99 %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)
103 define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
104 ; CHECK-LABEL: test_vp_splice_v8i16:
106 ; CHECK-NEXT: addi a0, a0, -5
107 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
108 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
109 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
110 ; CHECK-NEXT: vslideup.vx v8, v9, a0
112 %head = insertelement <8 x i1> undef, i1 1, i32 0
113 %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
115 %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %allones, i32 %evla, i32 %evlb)
119 define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
120 ; CHECK-LABEL: test_vp_splice_v8i16_negative_offset:
122 ; CHECK-NEXT: addi a0, a0, -5
123 ; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma
124 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
125 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
126 ; CHECK-NEXT: vslideup.vi v8, v9, 5
128 %head = insertelement <8 x i1> undef, i1 1, i32 0
129 %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
131 %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> %allones, i32 %evla, i32 %evlb)
135 define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
136 ; CHECK-LABEL: test_vp_splice_v8i16_masked:
138 ; CHECK-NEXT: addi a0, a0, -5
139 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
140 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
141 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
142 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
144 %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb)
148 define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
149 ; CHECK-LABEL: test_vp_splice_v16i8:
151 ; CHECK-NEXT: addi a0, a0, -5
152 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
153 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
154 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
155 ; CHECK-NEXT: vslideup.vx v8, v9, a0
157 %head = insertelement <16 x i1> undef, i1 1, i32 0
158 %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
160 %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %allones, i32 %evla, i32 %evlb)
164 define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
165 ; CHECK-LABEL: test_vp_splice_v16i8_negative_offset:
167 ; CHECK-NEXT: addi a0, a0, -5
168 ; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma
169 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
170 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
171 ; CHECK-NEXT: vslideup.vi v8, v9, 5
173 %head = insertelement <16 x i1> undef, i1 1, i32 0
174 %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
176 %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> %allones, i32 %evla, i32 %evlb)
180 define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
181 ; CHECK-LABEL: test_vp_splice_v16i8_masked:
183 ; CHECK-NEXT: addi a0, a0, -5
184 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
185 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
186 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
187 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
189 %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb)
193 define <2 x double> @test_vp_splice_v2f64(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
194 ; CHECK-LABEL: test_vp_splice_v2f64:
196 ; CHECK-NEXT: addi a0, a0, -5
197 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
198 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
199 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
200 ; CHECK-NEXT: vslideup.vx v8, v9, a0
202 %head = insertelement <2 x i1> undef, i1 1, i32 0
203 %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
205 %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb)
209 define <2 x double> @test_vp_splice_v2f64_negative_offset(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
210 ; CHECK-LABEL: test_vp_splice_v2f64_negative_offset:
212 ; CHECK-NEXT: addi a0, a0, -5
213 ; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
214 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
215 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
216 ; CHECK-NEXT: vslideup.vi v8, v9, 5
218 %head = insertelement <2 x i1> undef, i1 1, i32 0
219 %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
221 %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb)
225 define <2 x double> @test_vp_splice_v2f64_masked(<2 x double> %va, <2 x double> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
226 ; CHECK-LABEL: test_vp_splice_v2f64_masked:
228 ; CHECK-NEXT: addi a0, a0, -5
229 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
230 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
231 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
232 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
234 %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
238 define <4 x float> @test_vp_splice_v4f32(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
239 ; CHECK-LABEL: test_vp_splice_v4f32:
241 ; CHECK-NEXT: addi a0, a0, -5
242 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
243 ; CHECK-NEXT: vslidedown.vi v8, v8, 5
244 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
245 ; CHECK-NEXT: vslideup.vx v8, v9, a0
247 %head = insertelement <4 x i1> undef, i1 1, i32 0
248 %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
250 %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> %allones, i32 %evla, i32 %evlb)
254 define <4 x float> @test_vp_splice_v4f32_negative_offset(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
255 ; CHECK-LABEL: test_vp_splice_v4f32_negative_offset:
257 ; CHECK-NEXT: addi a0, a0, -5
258 ; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
259 ; CHECK-NEXT: vslidedown.vx v8, v8, a0
260 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
261 ; CHECK-NEXT: vslideup.vi v8, v9, 5
263 %head = insertelement <4 x i1> undef, i1 1, i32 0
264 %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
266 %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 -5, <4 x i1> %allones, i32 %evla, i32 %evlb)
270 define <4 x float> @test_vp_splice_v4f32_masked(<4 x float> %va, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
271 ; CHECK-LABEL: test_vp_splice_v4f32_masked:
273 ; CHECK-NEXT: addi a0, a0, -5
274 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
275 ; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
276 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
277 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
279 %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)