1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
4 declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
5 declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
6 declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
7 declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
9 ; Test binary operator with vp.merge and vp.smax.
10 declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
11 define <vscale x 2 x i32> @vpmerge_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
12 ; CHECK-LABEL: vpmerge_vpadd:
14 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
15 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
17 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
18 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
19 ret <vscale x 2 x i32> %b
22 ; Test glued node of merge should not be deleted.
23 declare <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, metadata, <vscale x 2 x i1>, i32)
24 define <vscale x 2 x i32> @vpmerge_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
25 ; CHECK-LABEL: vpmerge_vpadd2:
27 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
28 ; CHECK-NEXT: vmseq.vv v0, v9, v10
29 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
30 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
32 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
33 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
34 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
35 ret <vscale x 2 x i32> %b
38 ; Test vp.merge has all-ones mask.
39 define <vscale x 2 x i32> @vpmerge_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
40 ; CHECK-LABEL: vpmerge_vpadd3:
42 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
43 ; CHECK-NEXT: vadd.vv v8, v9, v10
45 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
46 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
47 ret <vscale x 2 x i32> %b
50 ; Test float binary operator with vp.merge and vp.fadd.
51 declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
52 define <vscale x 2 x float> @vpmerge_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
53 ; CHECK-LABEL: vpmerge_vpfadd:
55 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
56 ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
58 %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
59 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
60 ret <vscale x 2 x float> %b
63 ; Test for binary operator with specific EEW by riscv.vrgatherei16.
64 declare <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64)
65 define <vscale x 2 x i32> @vpmerge_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
66 ; CHECK-LABEL: vpmerge_vrgatherei16:
68 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
69 ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10
71 %1 = zext i32 %vl to i64
72 %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1)
73 %3 = tail call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl)
74 ret <vscale x 2 x i32> %2
77 ; Test conversion by fptosi.
78 declare <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
79 define <vscale x 2 x i16> @vpmerge_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
80 ; CHECK-LABEL: vpmerge_vpfptosi:
82 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
83 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
85 %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
86 %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
87 ret <vscale x 2 x i16> %b
90 ; Test conversion by sitofp.
91 declare <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
92 define <vscale x 2 x float> @vpmerge_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
93 ; CHECK-LABEL: vpmerge_vpsitofp:
95 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
96 ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t
98 %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
99 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
100 ret <vscale x 2 x float> %b
103 ; Test integer extension by vp.zext.
104 declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
105 define <vscale x 2 x i32> @vpmerge_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
106 ; CHECK-LABEL: vpmerge_vpzext:
108 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
109 ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t
111 %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
112 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
113 ret <vscale x 2 x i32> %b
116 ; Test integer truncation by vp.trunc.
117 declare <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
118 define <vscale x 2 x i32> @vpmerge_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
119 ; CHECK-LABEL: vpmerge_vptrunc:
121 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
122 ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
124 %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
125 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
126 ret <vscale x 2 x i32> %b
129 ; Test integer extension by vp.fpext.
130 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
131 define <vscale x 2 x double> @vpmerge_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
132 ; CHECK-LABEL: vpmerge_vpfpext:
134 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
135 ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
137 %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
138 %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
139 ret <vscale x 2 x double> %b
142 ; Test integer truncation by vp.trunc.
143 declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
144 define <vscale x 2 x float> @vpmerge_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
145 ; CHECK-LABEL: vpmerge_vpfptrunc:
147 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
148 ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
150 %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
151 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
152 ret <vscale x 2 x float> %b
155 ; Test load operation by vp.load.
156 declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32)
157 define <vscale x 2 x i32> @vpmerge_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
158 ; CHECK-LABEL: vpmerge_vpload:
160 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
161 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
163 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
164 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
165 ret <vscale x 2 x i32> %b
168 ; Test result has chain and glued node.
169 define <vscale x 2 x i32> @vpmerge_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
170 ; CHECK-LABEL: vpmerge_vpload2:
172 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
173 ; CHECK-NEXT: vmseq.vv v0, v9, v10
174 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
175 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
177 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
178 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
179 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
180 ret <vscale x 2 x i32> %b
183 ; Test result has chain output of true operand of merge.vvm.
184 define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
185 ; CHECK-LABEL: vpmerge_vpload_store:
187 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
188 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
189 ; CHECK-NEXT: vs1r.v v8, (a0)
191 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
192 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
193 store <vscale x 2 x i32> %b, ptr %p
197 declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, ptr, i64)
198 define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
199 ; CHECK-LABEL: vpmerge_vleff:
201 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
202 ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
204 %1 = zext i32 %vl to i64
205 %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
206 %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
207 %c = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl)
208 ret <vscale x 2 x i32> %c
211 ; Test strided load by riscv.vlse
212 declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32>, ptr, i64, i64)
213 define <vscale x 2 x i32> @vpmerge_vlse(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
214 ; CHECK-LABEL: vpmerge_vlse:
216 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu
217 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
219 %1 = zext i32 %vl to i64
220 %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1)
221 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
222 ret <vscale x 2 x i32> %b
225 ; Test indexed load by riscv.vluxei
226 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32>, ptr, <vscale x 2 x i64>, i64)
227 define <vscale x 2 x i32> @vpmerge_vluxei(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
228 ; CHECK-LABEL: vpmerge_vluxei:
230 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu
231 ; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t
233 %1 = zext i32 %vl to i64
234 %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1)
235 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
236 ret <vscale x 2 x i32> %b
239 ; Test vector index by riscv.vid
240 declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32>, i64)
241 define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) {
242 ; CHECK-LABEL: vpmerge_vid:
244 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
245 ; CHECK-NEXT: vid.v v8, v0.t
247 %1 = zext i32 %vl to i64
248 %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1)
249 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
250 ret <vscale x 2 x i32> %b
253 ; Test not combine VIOTA_M and VMERGE_VVM without true mask.
254 declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64)
255 define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
256 ; CHECK-LABEL: vpmerge_viota:
258 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
259 ; CHECK-NEXT: viota.m v10, v9
260 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
261 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
263 %1 = zext i32 %vl to i64
264 %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
265 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
266 ret <vscale x 2 x i32> %b
269 ; Test combine VIOTA_M and VMERGE_VVM with true mask.
270 define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
271 ; CHECK-LABEL: vpmerge_viota2:
273 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
274 ; CHECK-NEXT: viota.m v8, v0
276 %1 = zext i32 %vl to i64
277 %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
278 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
279 ret <vscale x 2 x i32> %b
283 declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64)
284 define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
285 ; CHECK-LABEL: vpmerge_vflcass:
287 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
288 ; CHECK-NEXT: vfclass.v v8, v9, v0.t
290 %1 = zext i32 %vl to i64
291 %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1)
292 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
293 ret <vscale x 2 x i32> %b
297 declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
298 define <vscale x 2 x float> @vpmerge_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
299 ; CHECK-LABEL: vpmerge_vfsqrt:
301 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
302 ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t
304 %1 = zext i32 %vl to i64
305 %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
306 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
307 ret <vscale x 2 x float> %b
310 ; Test reciprocal operation by riscv.vfrec7
311 declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
312 define <vscale x 2 x float> @vpmerge_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
313 ; CHECK-LABEL: vpmerge_vfrec7:
315 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
316 ; CHECK-NEXT: vfrec7.v v8, v9, v0.t
318 %1 = zext i32 %vl to i64
319 %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
320 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
321 ret <vscale x 2 x float> %b
324 ; Test vector operations with VLMAX vector length.
326 ; Test binary operator with vp.merge and add.
327 define <vscale x 2 x i32> @vpmerge_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
328 ; CHECK-LABEL: vpmerge_add:
330 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
331 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
333 %a = add <vscale x 2 x i32> %x, %y
334 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
335 ret <vscale x 2 x i32> %b
338 ; Test binary operator with vp.merge and fadd.
339 define <vscale x 2 x float> @vpmerge_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
340 ; CHECK-LABEL: vpmerge_fadd:
342 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
343 ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
345 %a = fadd <vscale x 2 x float> %x, %y
346 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
347 ret <vscale x 2 x float> %b
350 ; This shouldn't be folded because we need to preserve exceptions with
351 ; "fpexcept.strict" exception behaviour, and masking may hide them.
352 define <vscale x 2 x float> @vpmerge_constrained_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) strictfp {
353 ; CHECK-LABEL: vpmerge_constrained_fadd:
355 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
356 ; CHECK-NEXT: vfadd.vv v9, v9, v10
357 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
358 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
360 %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
361 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) strictfp
362 ret <vscale x 2 x float> %b
364 declare <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata)
365 declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i64)
367 ; This shouldn't be folded because we need to preserve exceptions with
368 ; "fpexcept.strict" exception behaviour, and masking may hide them.
369 define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp {
370 ; CHECK-LABEL: vpmerge_constrained_fadd_vlmax:
372 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
373 ; CHECK-NEXT: vfadd.vv v9, v9, v10
374 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
375 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
377 %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
378 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp
379 ret <vscale x 2 x float> %b
382 ; Test conversion by fptosi.
383 define <vscale x 2 x i16> @vpmerge_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
384 ; CHECK-LABEL: vpmerge_fptosi:
386 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
387 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
389 %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16>
390 %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
391 ret <vscale x 2 x i16> %b
394 ; Test conversion by sitofp.
395 define <vscale x 2 x float> @vpmerge_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
396 ; CHECK-LABEL: vpmerge_sitofp:
398 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
399 ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t
401 %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float>
402 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
403 ret <vscale x 2 x float> %b
406 ; Test float extension by fpext.
407 define <vscale x 2 x double> @vpmerge_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
408 ; CHECK-LABEL: vpmerge_fpext:
410 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
411 ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
413 %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double>
414 %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
415 ret <vscale x 2 x double> %b
418 ; Test float truncation by fptrunc.
419 define <vscale x 2 x float> @vpmerge_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
420 ; CHECK-LABEL: vpmerge_fptrunc:
422 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
423 ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
425 %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
426 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
427 ret <vscale x 2 x float> %b
430 ; Test integer extension by zext.
431 define <vscale x 2 x i32> @vpmerge_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
432 ; CHECK-LABEL: vpmerge_zext:
434 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
435 ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t
437 %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32>
438 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
439 ret <vscale x 2 x i32> %b
442 ; Test integer truncation by trunc.
443 define <vscale x 2 x i32> @vpmerge_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
444 ; CHECK-LABEL: vpmerge_trunc:
446 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
447 ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
449 %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
450 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
451 ret <vscale x 2 x i32> %b
454 declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
455 declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
456 declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
457 declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
459 ; Test binary operator with vp.select and vp.smax.
460 define <vscale x 2 x i32> @vpselect_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
461 ; CHECK-LABEL: vpselect_vpadd:
463 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
464 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
466 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
467 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
468 ret <vscale x 2 x i32> %b
471 ; Test glued node of select should not be deleted.
472 define <vscale x 2 x i32> @vpselect_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
473 ; CHECK-LABEL: vpselect_vpadd2:
475 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
476 ; CHECK-NEXT: vmseq.vv v0, v9, v10
477 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
479 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
480 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
481 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
482 ret <vscale x 2 x i32> %b
485 ; Test vp.select has all-ones mask.
486 define <vscale x 2 x i32> @vpselect_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
487 ; CHECK-LABEL: vpselect_vpadd3:
489 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
490 ; CHECK-NEXT: vadd.vv v8, v9, v10
492 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
493 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
494 ret <vscale x 2 x i32> %b
497 ; Test float binary operator with vp.select and vp.fadd.
498 define <vscale x 2 x float> @vpselect_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
499 ; CHECK-LABEL: vpselect_vpfadd:
501 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
502 ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
504 %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
505 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
506 ret <vscale x 2 x float> %b
509 ; Test for binary operator with specific EEW by riscv.vrgatherei16.
510 define <vscale x 2 x i32> @vpselect_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
511 ; CHECK-LABEL: vpselect_vrgatherei16:
513 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
514 ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10
516 %1 = zext i32 %vl to i64
517 %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1)
518 %3 = tail call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl)
519 ret <vscale x 2 x i32> %2
522 ; Test conversion by fptosi.
523 define <vscale x 2 x i16> @vpselect_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
524 ; CHECK-LABEL: vpselect_vpfptosi:
526 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
527 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
529 %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
530 %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
531 ret <vscale x 2 x i16> %b
534 ; Test conversion by sitofp.
535 define <vscale x 2 x float> @vpselect_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
536 ; CHECK-LABEL: vpselect_vpsitofp:
538 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
539 ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t
541 %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
542 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
543 ret <vscale x 2 x float> %b
546 ; Test integer extension by vp.zext.
547 define <vscale x 2 x i32> @vpselect_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
548 ; CHECK-LABEL: vpselect_vpzext:
550 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
551 ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t
553 %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
554 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
555 ret <vscale x 2 x i32> %b
558 ; Test integer truncation by vp.trunc.
559 define <vscale x 2 x i32> @vpselect_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
560 ; CHECK-LABEL: vpselect_vptrunc:
562 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
563 ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
565 %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
566 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
567 ret <vscale x 2 x i32> %b
570 ; Test integer extension by vp.fpext.
571 define <vscale x 2 x double> @vpselect_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
572 ; CHECK-LABEL: vpselect_vpfpext:
574 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
575 ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
577 %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
578 %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
579 ret <vscale x 2 x double> %b
582 ; Test integer truncation by vp.trunc.
583 define <vscale x 2 x float> @vpselect_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
584 ; CHECK-LABEL: vpselect_vpfptrunc:
586 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
587 ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
589 %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
590 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
591 ret <vscale x 2 x float> %b
594 ; Test load operation by vp.load.
595 define <vscale x 2 x i32> @vpselect_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
596 ; CHECK-LABEL: vpselect_vpload:
598 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
599 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
601 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
602 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
603 ret <vscale x 2 x i32> %b
606 ; Test result has chain and glued node.
607 define <vscale x 2 x i32> @vpselect_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
608 ; CHECK-LABEL: vpselect_vpload2:
610 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
611 ; CHECK-NEXT: vmseq.vv v0, v9, v10
612 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
614 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
615 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
616 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
617 ret <vscale x 2 x i32> %b
620 ; Test result has chain output of true operand of select.vvm.
621 define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
622 ; CHECK-LABEL: vpselect_vpload_store:
624 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
625 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
626 ; CHECK-NEXT: vs1r.v v8, (a0)
628 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
629 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
630 store <vscale x 2 x i32> %b, ptr %p
634 define <vscale x 2 x i32> @vpselect_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
635 ; CHECK-LABEL: vpselect_vleff:
637 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
638 ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
640 %1 = zext i32 %vl to i64
641 %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
642 %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
643 %c = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl)
644 ret <vscale x 2 x i32> %c
647 ; Test strided load by riscv.vlse
648 define <vscale x 2 x i32> @vpselect_vlse(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
649 ; CHECK-LABEL: vpselect_vlse:
651 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
652 ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t
654 %1 = zext i32 %vl to i64
655 %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1)
656 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
657 ret <vscale x 2 x i32> %b
660 ; Test indexed load by riscv.vluxei
661 define <vscale x 2 x i32> @vpselect_vluxei(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
662 ; CHECK-LABEL: vpselect_vluxei:
664 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
665 ; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t
667 %1 = zext i32 %vl to i64
668 %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1)
669 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
670 ret <vscale x 2 x i32> %b
673 ; Test vector index by riscv.vid
674 define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) {
675 ; CHECK-LABEL: vpselect_vid:
677 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
678 ; CHECK-NEXT: vid.v v8, v0.t
680 %1 = zext i32 %vl to i64
681 %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1)
682 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
683 ret <vscale x 2 x i32> %b
687 define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
688 ; CHECK-LABEL: vpselect_viota:
690 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
691 ; CHECK-NEXT: viota.m v10, v9
692 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
694 %1 = zext i32 %vl to i64
695 %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
696 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
697 ret <vscale x 2 x i32> %b
701 define <vscale x 2 x i32> @vpselect_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
702 ; CHECK-LABEL: vpselect_vflcass:
704 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
705 ; CHECK-NEXT: vfclass.v v8, v9, v0.t
707 %1 = zext i32 %vl to i64
708 %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1)
709 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
710 ret <vscale x 2 x i32> %b
714 define <vscale x 2 x float> @vpselect_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
715 ; CHECK-LABEL: vpselect_vfsqrt:
717 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
718 ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t
720 %1 = zext i32 %vl to i64
721 %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
722 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
723 ret <vscale x 2 x float> %b
726 ; Test reciprocal operation by riscv.vfrec7
727 define <vscale x 2 x float> @vpselect_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
728 ; CHECK-LABEL: vpselect_vfrec7:
730 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
731 ; CHECK-NEXT: vfrec7.v v8, v9, v0.t
733 %1 = zext i32 %vl to i64
734 %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
735 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
736 ret <vscale x 2 x float> %b
740 declare <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64)
741 define <vscale x 2 x i32> @vpselect_vslideup(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
742 ; CHECK-LABEL: vpselect_vslideup:
744 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
745 ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
747 %1 = zext i32 %vl to i64
748 %a = call <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0)
749 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
750 ret <vscale x 2 x i32> %b
753 declare <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64)
754 define <vscale x 2 x i32> @vpselect_vslidedown(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
755 ; CHECK-LABEL: vpselect_vslidedown:
757 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
758 ; CHECK-NEXT: vslidedown.vx v8, v9, a0, v0.t
760 %1 = zext i32 %vl to i64
761 %a = call <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0)
762 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
763 ret <vscale x 2 x i32> %b
766 declare <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64)
767 define <vscale x 2 x i32> @vpselect_vslide1up(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
768 ; CHECK-LABEL: vpselect_vslide1up:
770 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
771 ; CHECK-NEXT: vslide1up.vx v8, v9, a0, v0.t
773 %1 = zext i32 %vl to i64
774 %a = call <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1)
775 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
776 ret <vscale x 2 x i32> %b
779 declare <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64)
780 define <vscale x 2 x i32> @vpselect_vslide1down(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
781 ; CHECK-LABEL: vpselect_vslide1down:
783 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
784 ; CHECK-NEXT: vslide1down.vx v8, v9, a0, v0.t
786 %1 = zext i32 %vl to i64
787 %a = call <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1)
788 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
789 ret <vscale x 2 x i32> %b
792 ; Test vector operations with VLMAX vector length.
794 ; Test binary operator with vp.select and add.
795 define <vscale x 2 x i32> @vpselect_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
796 ; CHECK-LABEL: vpselect_add:
798 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
799 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
801 %a = add <vscale x 2 x i32> %x, %y
802 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
803 ret <vscale x 2 x i32> %b
806 ; Test binary operator with vp.select and fadd.
807 define <vscale x 2 x float> @vpselect_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
808 ; CHECK-LABEL: vpselect_fadd:
810 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
811 ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
813 %a = fadd <vscale x 2 x float> %x, %y
814 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
815 ret <vscale x 2 x float> %b
818 ; Test conversion by fptosi.
819 define <vscale x 2 x i16> @vpselect_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
820 ; CHECK-LABEL: vpselect_fptosi:
822 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
823 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
825 %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16>
826 %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
827 ret <vscale x 2 x i16> %b
830 ; Test conversion by sitofp.
831 define <vscale x 2 x float> @vpselect_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
832 ; CHECK-LABEL: vpselect_sitofp:
834 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
835 ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t
837 %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float>
838 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
839 ret <vscale x 2 x float> %b
842 ; Test float extension by fpext.
843 define <vscale x 2 x double> @vpselect_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
844 ; CHECK-LABEL: vpselect_fpext:
846 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
847 ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
849 %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double>
850 %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
851 ret <vscale x 2 x double> %b
854 ; Test float truncation by fptrunc.
855 define <vscale x 2 x float> @vpselect_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
856 ; CHECK-LABEL: vpselect_fptrunc:
858 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
859 ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
861 %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
862 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
863 ret <vscale x 2 x float> %b
866 ; Test integer extension by zext.
867 define <vscale x 2 x i32> @vpselect_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
868 ; CHECK-LABEL: vpselect_zext:
870 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
871 ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t
873 %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32>
874 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
875 ret <vscale x 2 x i32> %b
878 ; Test integer truncation by trunc.
879 define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
880 ; CHECK-LABEL: vpselect_trunc:
882 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
883 ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
885 %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
886 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
887 ret <vscale x 2 x i32> %b
890 ; Folding this would create a loop in the DAG becuase the chain from the VLE is
891 ; used by the vssubu.
892 define void @test_dag_loop() {
893 ; CHECK-LABEL: test_dag_loop:
894 ; CHECK: # %bb.0: # %entry
895 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
896 ; CHECK-NEXT: vmclr.m v0
897 ; CHECK-NEXT: vmv.v.i v8, 0
898 ; CHECK-NEXT: vmv.v.i v12, 0
899 ; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu
900 ; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
901 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
902 ; CHECK-NEXT: vmseq.vv v0, v12, v8
903 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
904 ; CHECK-NEXT: vmv.v.i v8, 0
905 ; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, mu
906 ; CHECK-NEXT: vle16.v v8, (zero), v0.t
907 ; CHECK-NEXT: vsetivli zero, 0, e16, m8, ta, ma
908 ; CHECK-NEXT: vse16.v v8, (zero)
911 %0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, ptr null, i64 1)
912 %1 = tail call <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i8 0, <vscale x 32 x i1> zeroinitializer, i64 0, i64 0)
913 %2 = tail call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> %1, <vscale x 32 x i8> zeroinitializer, i64 0)
914 %3 = tail call <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %0, <vscale x 32 x i1> %2, i64 1)
915 call void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16> %3, ptr null, i64 0)
919 define <vscale x 1 x i16> @test_vaaddu(<vscale x 1 x i16> %var_11, i16 zeroext %var_9, <vscale x 1 x i1> %var_5, <vscale x 1 x i16> %var_0) {
920 ; CHECK-LABEL: test_vaaddu:
921 ; CHECK: # %bb.0: # %entry
922 ; CHECK-NEXT: csrwi vxrm, 0
923 ; CHECK-NEXT: vsetivli zero, 3, e16, mf4, ta, mu
924 ; CHECK-NEXT: vaaddu.vx v9, v8, a0, v0.t
925 ; CHECK-NEXT: vmv1r.v v8, v9
928 %0 = tail call <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_11, i16 %var_9, i64 0, i64 3)
929 %1 = tail call <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_0, <vscale x 1 x i16> %0, <vscale x 1 x i1> %var_5, i64 3)
930 ret <vscale x 1 x i16> %1
933 ; Test reductions don't have a vmerge folded into them, since the mask affects
936 declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
942 define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) {
943 ; CHECK-LABEL: vredsum:
945 ; CHECK-NEXT: vmv1r.v v11, v8
946 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
947 ; CHECK-NEXT: vredsum.vs v11, v9, v10
948 ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
950 %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
951 <vscale x 2 x i32> %passthru,
952 <vscale x 2 x i32> %x,
953 <vscale x 2 x i32> %y,
955 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 %vl)
956 ret <vscale x 2 x i32> %b
959 declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
960 <vscale x 2 x float>,
961 <vscale x 2 x float>,
962 <vscale x 2 x float>,
965 define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) {
966 ; CHECK-LABEL: vfredusum:
968 ; CHECK-NEXT: fsrmi a1, 0
969 ; CHECK-NEXT: vmv1r.v v11, v8
970 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
971 ; CHECK-NEXT: vfredusum.vs v11, v9, v10
972 ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
973 ; CHECK-NEXT: fsrm a1
975 %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
976 <vscale x 2 x float> %passthru,
977 <vscale x 2 x float> %x,
978 <vscale x 2 x float> %y,
980 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl)
981 ret <vscale x 2 x float> %b
984 ; However we can fold it in if the mask is all ones.
985 define <vscale x 2 x i32> @vredsum_allones_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl) {
986 ; CHECK-LABEL: vredsum_allones_mask:
988 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
989 ; CHECK-NEXT: vredsum.vs v8, v9, v10
991 %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
992 <vscale x 2 x i32> %passthru,
993 <vscale x 2 x i32> %x,
994 <vscale x 2 x i32> %y,
996 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl)
997 ret <vscale x 2 x i32> %b
1000 define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, i64 %vl) {
1001 ; CHECK-LABEL: vfredusum_allones_mask:
1003 ; CHECK-NEXT: fsrmi a1, 0
1004 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
1005 ; CHECK-NEXT: vfredusum.vs v8, v9, v10
1006 ; CHECK-NEXT: fsrm a1
1008 %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1009 <vscale x 2 x float> %passthru,
1010 <vscale x 2 x float> %x,
1011 <vscale x 2 x float> %y,
1013 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl)
1014 ret <vscale x 2 x float> %b
1017 define <vscale x 2 x i32> @unfoldable_vredsum_allones_mask_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
1018 ; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl:
1020 ; CHECK-NEXT: vmv1r.v v11, v8
1021 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma
1022 ; CHECK-NEXT: vredsum.vs v11, v9, v10
1023 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma
1024 ; CHECK-NEXT: vmv.v.v v8, v11
1026 %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1027 <vscale x 2 x i32> %passthru,
1028 <vscale x 2 x i32> %x,
1029 <vscale x 2 x i32> %y,
1031 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 1)
1032 ret <vscale x 2 x i32> %b
1035 declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64)
1036 declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg)
1037 declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64)
1038 declare <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i64)
1039 declare void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64)
1040 declare <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, i16, i64 immarg, i64)
1041 declare <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i64)
1043 ; Tests for folding vmerge into its ops when their VLs differ
1045 declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64)
1046 declare <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i64)
1048 ; Can fold with VL=2
1049 define <vscale x 2 x i32> @vmerge_smaller_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1050 ; CHECK-LABEL: vmerge_smaller_vl_same_passthru:
1052 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
1053 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
1055 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 4)
1056 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
1057 ret <vscale x 2 x i32> %b
1060 ; Can fold with VL=2
1061 define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1062 ; CHECK-LABEL: vmerge_larger_vl_same_passthru:
1064 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
1065 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
1067 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1068 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1069 ret <vscale x 2 x i32> %b
1072 ; Can fold with VL=2
1073 define <vscale x 2 x i32> @vmerge_smaller_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1074 ; CHECK-LABEL: vmerge_smaller_vl_different_passthru:
1076 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
1077 ; CHECK-NEXT: vadd.vv v8, v10, v11
1078 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
1079 ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
1080 ; CHECK-NEXT: vmv1r.v v8, v9
1082 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3)
1083 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
1084 ret <vscale x 2 x i32> %b
1087 ; Can't fold this because we need to take elements from both %pt1 and %pt2
1088 define <vscale x 2 x i32> @vmerge_larger_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1089 ; CHECK-LABEL: vmerge_larger_vl_different_passthru:
1091 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
1092 ; CHECK-NEXT: vadd.vv v8, v10, v11
1093 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
1094 ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
1095 ; CHECK-NEXT: vmv1r.v v8, v9
1097 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1098 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1099 ret <vscale x 2 x i32> %b
1102 ; Can fold with VL=2
1103 define <vscale x 2 x i32> @vmerge_smaller_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1104 ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru:
1106 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
1107 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
1109 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3)
1110 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
1111 ret <vscale x 2 x i32> %b
1114 ; Can fold with VL=2
1115 define <vscale x 2 x i32> @vmerge_larger_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1116 ; CHECK-LABEL: vmerge_larger_vl_poison_passthru:
1118 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
1119 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
1121 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1122 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1123 ret <vscale x 2 x i32> %b
1126 ; The vadd's new policy should be tail undisturbed since the false op of the
1127 ; vmerge moves from the the body to the tail, and we need to preserve it.
1128 define <vscale x 2 x i32> @vmerge_larger_vl_false_becomes_tail(<vscale x 2 x i32> %false, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1129 ; CHECK-LABEL: vmerge_larger_vl_false_becomes_tail:
1131 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
1132 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
1134 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1135 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %false, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1136 ret <vscale x 2 x i32> %b
1139 ; Test widening pseudos with their TIED variant (passthru same as first op).
1140 define <vscale x 2 x i64> @vpmerge_vwsub.w_tied(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) {
1141 ; CHECK-LABEL: vpmerge_vwsub.w_tied:
1143 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
1144 ; CHECK-NEXT: vwsub.wv v8, v8, v12, v0.t
1146 %vl.zext = zext i32 %vl to i64
1147 %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %passthru, <vscale x 2 x i32> %y, i64 %vl.zext)
1148 %b = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %passthru, i32 %vl)
1149 ret <vscale x 2 x i64> %b
1152 define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passthru, <vscale x 2 x double> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) {
1153 ; CHECK-LABEL: vpmerge_vfwsub.w_tied:
1155 ; CHECK-NEXT: fsrmi a1, 1
1156 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
1157 ; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t
1158 ; CHECK-NEXT: fsrm a1
1160 %vl.zext = zext i32 %vl to i64
1161 %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(<vscale x 2 x double> %passthru, <vscale x 2 x double> %passthru, <vscale x 2 x float> %y, i64 1, i64 %vl.zext)
1162 %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
1163 ret <vscale x 2 x double> %b
1166 define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
1167 ; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru:
1169 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
1170 ; CHECK-NEXT: vmacc.vv v8, v9, v10, v0.t
1172 %a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0)
1173 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
1174 <vscale x 2 x i32> poison,
1175 <vscale x 2 x i32> %passthru,
1176 <vscale x 2 x i32> %a,
1177 <vscale x 2 x i1> %m,
1180 ret <vscale x 2 x i32> %b
1183 define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
1184 ; CHECK-LABEL: true_mask_vmerge_implicit_passthru:
1186 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
1187 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
1189 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0)
1190 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
1191 <vscale x 2 x i32> poison,
1192 <vscale x 2 x i32> %passthru,
1193 <vscale x 2 x i32> %a,
1194 <vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer),
1197 ret <vscale x 2 x i32> %b
1201 define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
1202 ; CHECK-LABEL: unfoldable_mismatched_sew:
1204 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1205 ; CHECK-NEXT: vadd.vv v9, v9, v10
1206 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
1207 ; CHECK-NEXT: vmv.v.v v8, v9
1209 %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
1210 %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
1211 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
1212 <vscale x 2 x i32> %passthru,
1213 <vscale x 2 x i32> %passthru,
1214 <vscale x 2 x i32> %a.bitcast,
1215 <vscale x 2 x i1> splat (i1 true),
1218 ret <vscale x 2 x i32> %b