1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4 ; RUN: --check-prefixes=CHECK,ZVFH
5 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7 ; RUN: --check-prefixes=CHECK,ZVFH
8 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10 ; RUN: --check-prefixes=CHECK,ZVFHMIN
11 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13 ; RUN: --check-prefixes=CHECK,ZVFHMIN
15 declare <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
17 define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18 ; CHECK-LABEL: vfsub_vv_nxv1bf16:
20 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
21 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t
22 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
23 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
24 ; CHECK-NEXT: vfsub.vv v9, v9, v10, v0.t
25 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
26 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
28 %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
29 ret <vscale x 1 x bfloat> %v
32 define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) {
33 ; CHECK-LABEL: vfsub_vv_nxv1bf16_unmasked:
35 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
36 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
37 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
38 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
39 ; CHECK-NEXT: vfsub.vv v9, v9, v10
40 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
41 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
43 %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
44 ret <vscale x 1 x bfloat> %v
47 define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
48 ; CHECK-LABEL: vfsub_vf_nxv1bf16:
50 ; CHECK-NEXT: fmv.x.h a1, fa0
51 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
52 ; CHECK-NEXT: vmv.v.x v9, a1
53 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
54 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t
55 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
56 ; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t
57 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
58 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
60 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
61 %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
62 %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
63 ret <vscale x 1 x bfloat> %v
66 define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
67 ; CHECK-LABEL: vfsub_vf_nxv1bf16_unmasked:
69 ; CHECK-NEXT: fmv.x.h a1, fa0
70 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
71 ; CHECK-NEXT: vmv.v.x v9, a1
72 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
73 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
74 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
75 ; CHECK-NEXT: vfsub.vv v9, v10, v8
76 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
77 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
79 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
80 %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
81 %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
82 ret <vscale x 1 x bfloat> %v
85 declare <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
87 define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
88 ; CHECK-LABEL: vfsub_vv_nxv2bf16:
90 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
91 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t
92 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
93 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
94 ; CHECK-NEXT: vfsub.vv v9, v9, v10, v0.t
95 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
96 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
98 %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
99 ret <vscale x 2 x bfloat> %v
102 define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) {
103 ; CHECK-LABEL: vfsub_vv_nxv2bf16_unmasked:
105 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
106 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
107 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
108 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
109 ; CHECK-NEXT: vfsub.vv v9, v9, v10
110 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
111 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
113 %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
114 ret <vscale x 2 x bfloat> %v
117 define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
118 ; CHECK-LABEL: vfsub_vf_nxv2bf16:
120 ; CHECK-NEXT: fmv.x.h a1, fa0
121 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
122 ; CHECK-NEXT: vmv.v.x v9, a1
123 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
124 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t
125 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
126 ; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t
127 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
128 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
130 %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
131 %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
132 %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
133 ret <vscale x 2 x bfloat> %v
136 define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
137 ; CHECK-LABEL: vfsub_vf_nxv2bf16_unmasked:
139 ; CHECK-NEXT: fmv.x.h a1, fa0
140 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
141 ; CHECK-NEXT: vmv.v.x v9, a1
142 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
143 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
144 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
145 ; CHECK-NEXT: vfsub.vv v9, v10, v8
146 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
147 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
149 %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
150 %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
151 %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
152 ret <vscale x 2 x bfloat> %v
155 declare <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
157 define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
158 ; CHECK-LABEL: vfsub_vv_nxv4bf16:
160 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
161 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t
162 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
163 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
164 ; CHECK-NEXT: vfsub.vv v10, v12, v10, v0.t
165 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
166 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
168 %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
169 ret <vscale x 4 x bfloat> %v
172 define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) {
173 ; CHECK-LABEL: vfsub_vv_nxv4bf16_unmasked:
175 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
176 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
177 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
178 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
179 ; CHECK-NEXT: vfsub.vv v10, v12, v10
180 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
181 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
183 %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
184 ret <vscale x 4 x bfloat> %v
187 define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
188 ; CHECK-LABEL: vfsub_vf_nxv4bf16:
190 ; CHECK-NEXT: fmv.x.h a1, fa0
191 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
192 ; CHECK-NEXT: vmv.v.x v9, a1
193 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
194 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t
195 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
196 ; CHECK-NEXT: vfsub.vv v10, v10, v12, v0.t
197 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
198 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
200 %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
201 %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
202 %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
203 ret <vscale x 4 x bfloat> %v
206 define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
207 ; CHECK-LABEL: vfsub_vf_nxv4bf16_unmasked:
209 ; CHECK-NEXT: fmv.x.h a1, fa0
210 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
211 ; CHECK-NEXT: vmv.v.x v9, a1
212 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
213 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
214 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
215 ; CHECK-NEXT: vfsub.vv v10, v10, v12
216 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
217 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
219 %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
220 %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
221 %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
222 ret <vscale x 4 x bfloat> %v
225 declare <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
227 define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
228 ; CHECK-LABEL: vfsub_vv_nxv8bf16:
230 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
231 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10, v0.t
232 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
233 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
234 ; CHECK-NEXT: vfsub.vv v12, v16, v12, v0.t
235 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
236 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
238 %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
239 ret <vscale x 8 x bfloat> %v
242 define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) {
243 ; CHECK-LABEL: vfsub_vv_nxv8bf16_unmasked:
245 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
246 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
247 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
248 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
249 ; CHECK-NEXT: vfsub.vv v12, v16, v12
250 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
251 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
253 %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
254 ret <vscale x 8 x bfloat> %v
257 define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
258 ; CHECK-LABEL: vfsub_vf_nxv8bf16:
260 ; CHECK-NEXT: fmv.x.h a1, fa0
261 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
262 ; CHECK-NEXT: vmv.v.x v10, a1
263 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
264 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t
265 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
266 ; CHECK-NEXT: vfsub.vv v12, v12, v16, v0.t
267 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
268 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
270 %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
271 %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
272 %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
273 ret <vscale x 8 x bfloat> %v
276 define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
277 ; CHECK-LABEL: vfsub_vf_nxv8bf16_unmasked:
279 ; CHECK-NEXT: fmv.x.h a1, fa0
280 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
281 ; CHECK-NEXT: vmv.v.x v10, a1
282 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
283 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
284 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
285 ; CHECK-NEXT: vfsub.vv v12, v12, v16
286 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
287 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
289 %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
290 %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
291 %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
292 ret <vscale x 8 x bfloat> %v
295 declare <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
297 define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
298 ; CHECK-LABEL: vfsub_vv_nxv16bf16:
300 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
301 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
302 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t
303 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
304 ; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t
305 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
306 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
308 %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
309 ret <vscale x 16 x bfloat> %v
312 define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) {
313 ; CHECK-LABEL: vfsub_vv_nxv16bf16_unmasked:
315 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
316 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
317 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
318 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
319 ; CHECK-NEXT: vfsub.vv v16, v24, v16
320 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
321 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
323 %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
324 ret <vscale x 16 x bfloat> %v
327 define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
328 ; CHECK-LABEL: vfsub_vf_nxv16bf16:
330 ; CHECK-NEXT: fmv.x.h a1, fa0
331 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
332 ; CHECK-NEXT: vmv.v.x v12, a1
333 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
334 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
335 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
336 ; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
337 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
338 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
340 %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
341 %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
342 %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
343 ret <vscale x 16 x bfloat> %v
346 define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
347 ; CHECK-LABEL: vfsub_vf_nxv16bf16_unmasked:
349 ; CHECK-NEXT: fmv.x.h a1, fa0
350 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
351 ; CHECK-NEXT: vmv.v.x v12, a1
352 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
353 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
354 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
355 ; CHECK-NEXT: vfsub.vv v16, v16, v24
356 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
357 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
359 %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
360 %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
361 %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
362 ret <vscale x 16 x bfloat> %v
365 declare <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
367 define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
368 ; CHECK-LABEL: vfsub_vv_nxv32bf16:
370 ; CHECK-NEXT: addi sp, sp, -16
371 ; CHECK-NEXT: .cfi_def_cfa_offset 16
372 ; CHECK-NEXT: csrr a1, vlenb
373 ; CHECK-NEXT: slli a1, a1, 3
374 ; CHECK-NEXT: mv a2, a1
375 ; CHECK-NEXT: slli a1, a1, 1
376 ; CHECK-NEXT: add a1, a1, a2
377 ; CHECK-NEXT: sub sp, sp, a1
378 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
379 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
380 ; CHECK-NEXT: vmv1r.v v7, v0
381 ; CHECK-NEXT: csrr a1, vlenb
382 ; CHECK-NEXT: slli a1, a1, 4
383 ; CHECK-NEXT: add a1, sp, a1
384 ; CHECK-NEXT: addi a1, a1, 16
385 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
386 ; CHECK-NEXT: csrr a2, vlenb
387 ; CHECK-NEXT: slli a1, a2, 1
388 ; CHECK-NEXT: srli a2, a2, 2
389 ; CHECK-NEXT: sub a3, a0, a1
390 ; CHECK-NEXT: vslidedown.vx v0, v0, a2
391 ; CHECK-NEXT: sltu a2, a0, a3
392 ; CHECK-NEXT: addi a2, a2, -1
393 ; CHECK-NEXT: and a2, a2, a3
394 ; CHECK-NEXT: vmv4r.v v8, v16
395 ; CHECK-NEXT: csrr a3, vlenb
396 ; CHECK-NEXT: slli a3, a3, 3
397 ; CHECK-NEXT: add a3, sp, a3
398 ; CHECK-NEXT: addi a3, a3, 16
399 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
400 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
401 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
402 ; CHECK-NEXT: addi a2, sp, 16
403 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
404 ; CHECK-NEXT: csrr a2, vlenb
405 ; CHECK-NEXT: slli a2, a2, 4
406 ; CHECK-NEXT: add a2, sp, a2
407 ; CHECK-NEXT: addi a2, a2, 16
408 ; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
409 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
410 ; CHECK-NEXT: addi a2, sp, 16
411 ; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
412 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
413 ; CHECK-NEXT: vfsub.vv v16, v8, v16, v0.t
414 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
415 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t
416 ; CHECK-NEXT: bltu a0, a1, .LBB20_2
417 ; CHECK-NEXT: # %bb.1:
418 ; CHECK-NEXT: mv a0, a1
419 ; CHECK-NEXT: .LBB20_2:
420 ; CHECK-NEXT: vmv1r.v v0, v7
421 ; CHECK-NEXT: csrr a1, vlenb
422 ; CHECK-NEXT: slli a1, a1, 3
423 ; CHECK-NEXT: add a1, sp, a1
424 ; CHECK-NEXT: addi a1, a1, 16
425 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
426 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
427 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t
428 ; CHECK-NEXT: addi a0, sp, 16
429 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
430 ; CHECK-NEXT: csrr a0, vlenb
431 ; CHECK-NEXT: slli a0, a0, 4
432 ; CHECK-NEXT: add a0, sp, a0
433 ; CHECK-NEXT: addi a0, a0, 16
434 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
435 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t
436 ; CHECK-NEXT: addi a0, sp, 16
437 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
438 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
439 ; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t
440 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
441 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
442 ; CHECK-NEXT: csrr a0, vlenb
443 ; CHECK-NEXT: slli a0, a0, 3
444 ; CHECK-NEXT: mv a1, a0
445 ; CHECK-NEXT: slli a0, a0, 1
446 ; CHECK-NEXT: add a0, a0, a1
447 ; CHECK-NEXT: add sp, sp, a0
448 ; CHECK-NEXT: .cfi_def_cfa sp, 16
449 ; CHECK-NEXT: addi sp, sp, 16
450 ; CHECK-NEXT: .cfi_def_cfa_offset 0
452 %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl)
453 ret <vscale x 32 x bfloat> %v
456 define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) {
457 ; CHECK-LABEL: vfsub_vv_nxv32bf16_unmasked:
459 ; CHECK-NEXT: addi sp, sp, -16
460 ; CHECK-NEXT: .cfi_def_cfa_offset 16
461 ; CHECK-NEXT: csrr a1, vlenb
462 ; CHECK-NEXT: slli a1, a1, 3
463 ; CHECK-NEXT: sub sp, sp, a1
464 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
465 ; CHECK-NEXT: csrr a2, vlenb
466 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
467 ; CHECK-NEXT: vmset.m v24
468 ; CHECK-NEXT: slli a1, a2, 1
469 ; CHECK-NEXT: srli a2, a2, 2
470 ; CHECK-NEXT: sub a3, a0, a1
471 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
472 ; CHECK-NEXT: vslidedown.vx v0, v24, a2
473 ; CHECK-NEXT: sltu a2, a0, a3
474 ; CHECK-NEXT: addi a2, a2, -1
475 ; CHECK-NEXT: and a2, a2, a3
476 ; CHECK-NEXT: addi a3, sp, 16
477 ; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
478 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
479 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t
480 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
481 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
482 ; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
483 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
484 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t
485 ; CHECK-NEXT: bltu a0, a1, .LBB21_2
486 ; CHECK-NEXT: # %bb.1:
487 ; CHECK-NEXT: mv a0, a1
488 ; CHECK-NEXT: .LBB21_2:
489 ; CHECK-NEXT: addi a1, sp, 16
490 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
491 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
492 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
493 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
494 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
495 ; CHECK-NEXT: vfsub.vv v16, v24, v16
496 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
497 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
498 ; CHECK-NEXT: csrr a0, vlenb
499 ; CHECK-NEXT: slli a0, a0, 3
500 ; CHECK-NEXT: add sp, sp, a0
501 ; CHECK-NEXT: .cfi_def_cfa sp, 16
502 ; CHECK-NEXT: addi sp, sp, 16
503 ; CHECK-NEXT: .cfi_def_cfa_offset 0
505 %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
506 ret <vscale x 32 x bfloat> %v
509 define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
510 ; CHECK-LABEL: vfsub_vf_nxv32bf16:
512 ; CHECK-NEXT: addi sp, sp, -16
513 ; CHECK-NEXT: .cfi_def_cfa_offset 16
514 ; CHECK-NEXT: csrr a1, vlenb
515 ; CHECK-NEXT: slli a1, a1, 3
516 ; CHECK-NEXT: mv a2, a1
517 ; CHECK-NEXT: slli a1, a1, 1
518 ; CHECK-NEXT: add a1, a1, a2
519 ; CHECK-NEXT: sub sp, sp, a1
520 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
521 ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
522 ; CHECK-NEXT: vmv1r.v v7, v0
523 ; CHECK-NEXT: vmv8r.v v16, v8
524 ; CHECK-NEXT: fmv.x.h a1, fa0
525 ; CHECK-NEXT: csrr a2, vlenb
526 ; CHECK-NEXT: vmv.v.x v8, a1
527 ; CHECK-NEXT: slli a1, a2, 1
528 ; CHECK-NEXT: srli a2, a2, 2
529 ; CHECK-NEXT: sub a3, a0, a1
530 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
531 ; CHECK-NEXT: vslidedown.vx v0, v0, a2
532 ; CHECK-NEXT: sltu a2, a0, a3
533 ; CHECK-NEXT: addi a2, a2, -1
534 ; CHECK-NEXT: and a2, a2, a3
535 ; CHECK-NEXT: csrr a3, vlenb
536 ; CHECK-NEXT: slli a3, a3, 3
537 ; CHECK-NEXT: add a3, sp, a3
538 ; CHECK-NEXT: addi a3, a3, 16
539 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
540 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
541 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
542 ; CHECK-NEXT: vmv4r.v v8, v16
543 ; CHECK-NEXT: csrr a2, vlenb
544 ; CHECK-NEXT: slli a2, a2, 4
545 ; CHECK-NEXT: add a2, sp, a2
546 ; CHECK-NEXT: addi a2, a2, 16
547 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
548 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
549 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
550 ; CHECK-NEXT: vfsub.vv v24, v8, v24, v0.t
551 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
552 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t
553 ; CHECK-NEXT: bltu a0, a1, .LBB22_2
554 ; CHECK-NEXT: # %bb.1:
555 ; CHECK-NEXT: mv a0, a1
556 ; CHECK-NEXT: .LBB22_2:
557 ; CHECK-NEXT: vmv1r.v v0, v7
558 ; CHECK-NEXT: csrr a1, vlenb
559 ; CHECK-NEXT: slli a1, a1, 4
560 ; CHECK-NEXT: add a1, sp, a1
561 ; CHECK-NEXT: addi a1, a1, 16
562 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
563 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
564 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t
565 ; CHECK-NEXT: addi a0, sp, 16
566 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
567 ; CHECK-NEXT: csrr a0, vlenb
568 ; CHECK-NEXT: slli a0, a0, 3
569 ; CHECK-NEXT: add a0, sp, a0
570 ; CHECK-NEXT: addi a0, a0, 16
571 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
572 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t
573 ; CHECK-NEXT: vmv8r.v v24, v16
574 ; CHECK-NEXT: addi a0, sp, 16
575 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
576 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
577 ; CHECK-NEXT: vfsub.vv v24, v16, v24, v0.t
578 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
579 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t
580 ; CHECK-NEXT: csrr a0, vlenb
581 ; CHECK-NEXT: slli a0, a0, 3
582 ; CHECK-NEXT: mv a1, a0
583 ; CHECK-NEXT: slli a0, a0, 1
584 ; CHECK-NEXT: add a0, a0, a1
585 ; CHECK-NEXT: add sp, sp, a0
586 ; CHECK-NEXT: .cfi_def_cfa sp, 16
587 ; CHECK-NEXT: addi sp, sp, 16
588 ; CHECK-NEXT: .cfi_def_cfa_offset 0
590 %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
591 %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
592 %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
593 ret <vscale x 32 x bfloat> %v
596 define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
597 ; CHECK-LABEL: vfsub_vf_nxv32bf16_unmasked:
599 ; CHECK-NEXT: addi sp, sp, -16
600 ; CHECK-NEXT: .cfi_def_cfa_offset 16
601 ; CHECK-NEXT: csrr a1, vlenb
602 ; CHECK-NEXT: slli a1, a1, 3
603 ; CHECK-NEXT: sub sp, sp, a1
604 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
605 ; CHECK-NEXT: fmv.x.h a1, fa0
606 ; CHECK-NEXT: csrr a2, vlenb
607 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
608 ; CHECK-NEXT: vmset.m v24
609 ; CHECK-NEXT: vmv.v.x v16, a1
610 ; CHECK-NEXT: slli a1, a2, 1
611 ; CHECK-NEXT: srli a2, a2, 2
612 ; CHECK-NEXT: sub a3, a0, a1
613 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
614 ; CHECK-NEXT: vslidedown.vx v0, v24, a2
615 ; CHECK-NEXT: sltu a2, a0, a3
616 ; CHECK-NEXT: addi a2, a2, -1
617 ; CHECK-NEXT: and a2, a2, a3
618 ; CHECK-NEXT: addi a3, sp, 16
619 ; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
620 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
621 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t
622 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
623 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
624 ; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
625 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
626 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t
627 ; CHECK-NEXT: bltu a0, a1, .LBB23_2
628 ; CHECK-NEXT: # %bb.1:
629 ; CHECK-NEXT: mv a0, a1
630 ; CHECK-NEXT: .LBB23_2:
631 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
632 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
633 ; CHECK-NEXT: addi a0, sp, 16
634 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
635 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
636 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
637 ; CHECK-NEXT: vfsub.vv v16, v16, v24
638 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
639 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
640 ; CHECK-NEXT: csrr a0, vlenb
641 ; CHECK-NEXT: slli a0, a0, 3
642 ; CHECK-NEXT: add sp, sp, a0
643 ; CHECK-NEXT: .cfi_def_cfa sp, 16
644 ; CHECK-NEXT: addi sp, sp, 16
645 ; CHECK-NEXT: .cfi_def_cfa_offset 0
647 %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
648 %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
649 %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
650 ret <vscale x 32 x bfloat> %v
652 declare <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
654 define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
655 ; ZVFH-LABEL: vfsub_vv_nxv1f16:
657 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
658 ; ZVFH-NEXT: vfsub.vv v8, v8, v9, v0.t
661 ; ZVFHMIN-LABEL: vfsub_vv_nxv1f16:
663 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
664 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
665 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
666 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
667 ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t
668 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
669 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
671 %v = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
672 ret <vscale x 1 x half> %v
675 define <vscale x 1 x half> @vfsub_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <vscale x 1 x half> %b, i32 zeroext %evl) {
676 ; ZVFH-LABEL: vfsub_vv_nxv1f16_unmasked:
678 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
679 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
682 ; ZVFHMIN-LABEL: vfsub_vv_nxv1f16_unmasked:
684 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
685 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
686 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
687 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
688 ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10
689 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
690 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
692 %v = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
693 ret <vscale x 1 x half> %v
696 define <vscale x 1 x half> @vfsub_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
697 ; ZVFH-LABEL: vfsub_vf_nxv1f16:
699 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
700 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t
703 ; ZVFHMIN-LABEL: vfsub_vf_nxv1f16:
705 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
706 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
707 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
708 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
709 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
710 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
711 ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t
712 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
713 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
715 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
716 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
717 %v = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
718 ret <vscale x 1 x half> %v
721 define <vscale x 1 x half> @vfsub_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) {
722 ; ZVFH-LABEL: vfsub_vf_nxv1f16_unmasked:
724 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
725 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
728 ; ZVFHMIN-LABEL: vfsub_vf_nxv1f16_unmasked:
730 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
731 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
732 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
733 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
734 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
735 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
736 ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8
737 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
738 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
740 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
741 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
742 %v = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
743 ret <vscale x 1 x half> %v
746 declare <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x i1>, i32)
748 define <vscale x 2 x half> @vfsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
749 ; ZVFH-LABEL: vfsub_vv_nxv2f16:
751 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
752 ; ZVFH-NEXT: vfsub.vv v8, v8, v9, v0.t
755 ; ZVFHMIN-LABEL: vfsub_vv_nxv2f16:
757 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
758 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
759 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
760 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
761 ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t
762 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
763 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
765 %v = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
766 ret <vscale x 2 x half> %v
769 define <vscale x 2 x half> @vfsub_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <vscale x 2 x half> %b, i32 zeroext %evl) {
770 ; ZVFH-LABEL: vfsub_vv_nxv2f16_unmasked:
772 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
773 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
776 ; ZVFHMIN-LABEL: vfsub_vv_nxv2f16_unmasked:
778 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
779 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
780 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
781 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
782 ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10
783 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
784 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
786 %v = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
787 ret <vscale x 2 x half> %v
790 define <vscale x 2 x half> @vfsub_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
791 ; ZVFH-LABEL: vfsub_vf_nxv2f16:
793 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
794 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t
797 ; ZVFHMIN-LABEL: vfsub_vf_nxv2f16:
799 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
800 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
801 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
802 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
803 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
804 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
805 ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t
806 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
807 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
809 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
810 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
811 %v = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
812 ret <vscale x 2 x half> %v
815 define <vscale x 2 x half> @vfsub_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, half %b, i32 zeroext %evl) {
816 ; ZVFH-LABEL: vfsub_vf_nxv2f16_unmasked:
818 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
819 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
822 ; ZVFHMIN-LABEL: vfsub_vf_nxv2f16_unmasked:
824 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
825 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
826 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
827 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
828 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
829 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
830 ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8
831 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
832 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
834 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
835 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
836 %v = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
837 ret <vscale x 2 x half> %v
840 declare <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x i1>, i32)
842 define <vscale x 4 x half> @vfsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
843 ; ZVFH-LABEL: vfsub_vv_nxv4f16:
845 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
846 ; ZVFH-NEXT: vfsub.vv v8, v8, v9, v0.t
849 ; ZVFHMIN-LABEL: vfsub_vv_nxv4f16:
851 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
852 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
853 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
854 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
855 ; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10, v0.t
856 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
857 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
859 %v = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
860 ret <vscale x 4 x half> %v
863 define <vscale x 4 x half> @vfsub_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <vscale x 4 x half> %b, i32 zeroext %evl) {
864 ; ZVFH-LABEL: vfsub_vv_nxv4f16_unmasked:
866 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
867 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
870 ; ZVFHMIN-LABEL: vfsub_vv_nxv4f16_unmasked:
872 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
873 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
874 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
875 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
876 ; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10
877 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
878 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
880 %v = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
881 ret <vscale x 4 x half> %v
884 define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
885 ; ZVFH-LABEL: vfsub_vf_nxv4f16:
887 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
888 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t
891 ; ZVFHMIN-LABEL: vfsub_vf_nxv4f16:
893 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
894 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
895 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
896 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
897 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
898 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
899 ; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t
900 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
901 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
903 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
904 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
905 %v = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
906 ret <vscale x 4 x half> %v
909 define <vscale x 4 x half> @vfsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, half %b, i32 zeroext %evl) {
910 ; ZVFH-LABEL: vfsub_vf_nxv4f16_unmasked:
912 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
913 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
916 ; ZVFHMIN-LABEL: vfsub_vf_nxv4f16_unmasked:
918 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
919 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
920 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
921 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
922 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
923 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
924 ; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12
925 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
926 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
928 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
929 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
930 %v = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
931 ret <vscale x 4 x half> %v
934 declare <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, i32)
936 define <vscale x 8 x half> @vfsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
937 ; ZVFH-LABEL: vfsub_vv_nxv8f16:
939 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
940 ; ZVFH-NEXT: vfsub.vv v8, v8, v10, v0.t
943 ; ZVFHMIN-LABEL: vfsub_vv_nxv8f16:
945 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
946 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
947 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
948 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
949 ; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12, v0.t
950 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
951 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
953 %v = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
954 ret <vscale x 8 x half> %v
957 define <vscale x 8 x half> @vfsub_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <vscale x 8 x half> %b, i32 zeroext %evl) {
958 ; ZVFH-LABEL: vfsub_vv_nxv8f16_unmasked:
960 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
961 ; ZVFH-NEXT: vfsub.vv v8, v8, v10
964 ; ZVFHMIN-LABEL: vfsub_vv_nxv8f16_unmasked:
966 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
967 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
968 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
969 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
970 ; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12
971 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
972 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
974 %v = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
975 ret <vscale x 8 x half> %v
978 define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
979 ; ZVFH-LABEL: vfsub_vf_nxv8f16:
981 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
982 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t
985 ; ZVFHMIN-LABEL: vfsub_vf_nxv8f16:
987 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
988 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
989 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
990 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
991 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
992 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
993 ; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t
994 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
995 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
997 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
998 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
999 %v = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
1000 ret <vscale x 8 x half> %v
1003 define <vscale x 8 x half> @vfsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, half %b, i32 zeroext %evl) {
1004 ; ZVFH-LABEL: vfsub_vf_nxv8f16_unmasked:
1006 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1007 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
1010 ; ZVFHMIN-LABEL: vfsub_vf_nxv8f16_unmasked:
1012 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
1013 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1014 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
1015 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
1016 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
1017 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1018 ; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16
1019 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1020 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
1022 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
1023 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
1024 %v = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1025 ret <vscale x 8 x half> %v
1028 declare <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x i1>, i32)
1030 define <vscale x 16 x half> @vfsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1031 ; ZVFH-LABEL: vfsub_vv_nxv16f16:
1033 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1034 ; ZVFH-NEXT: vfsub.vv v8, v8, v12, v0.t
1037 ; ZVFHMIN-LABEL: vfsub_vv_nxv16f16:
1039 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1040 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
1041 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
1042 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1043 ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t
1044 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1045 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
1047 %v = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
1048 ret <vscale x 16 x half> %v
1051 define <vscale x 16 x half> @vfsub_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, <vscale x 16 x half> %b, i32 zeroext %evl) {
1052 ; ZVFH-LABEL: vfsub_vv_nxv16f16_unmasked:
1054 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1055 ; ZVFH-NEXT: vfsub.vv v8, v8, v12
1058 ; ZVFHMIN-LABEL: vfsub_vv_nxv16f16_unmasked:
1060 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1061 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
1062 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
1063 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1064 ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16
1065 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1066 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
1068 %v = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1069 ret <vscale x 16 x half> %v
1072 define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1073 ; ZVFH-LABEL: vfsub_vf_nxv16f16:
1075 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1076 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t
1079 ; ZVFHMIN-LABEL: vfsub_vf_nxv16f16:
1081 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
1082 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1083 ; ZVFHMIN-NEXT: vmv.v.x v12, a1
1084 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
1085 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
1086 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1087 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t
1088 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1089 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
1091 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
1092 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
1093 %v = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
1094 ret <vscale x 16 x half> %v
1097 define <vscale x 16 x half> @vfsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %va, half %b, i32 zeroext %evl) {
1098 ; ZVFH-LABEL: vfsub_vf_nxv16f16_unmasked:
1100 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1101 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
1104 ; ZVFHMIN-LABEL: vfsub_vf_nxv16f16_unmasked:
1106 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
1107 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1108 ; ZVFHMIN-NEXT: vmv.v.x v12, a1
1109 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
1110 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
1111 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1112 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24
1113 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1114 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
1116 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
1117 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
1118 %v = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1119 ret <vscale x 16 x half> %v
1122 declare <vscale x 32 x half> @llvm.vp.fsub.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x i1>, i32)
1124 define <vscale x 32 x half> @vfsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1125 ; ZVFH-LABEL: vfsub_vv_nxv32f16:
1127 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1128 ; ZVFH-NEXT: vfsub.vv v8, v8, v16, v0.t
1131 ; ZVFHMIN-LABEL: vfsub_vv_nxv32f16:
1133 ; ZVFHMIN-NEXT: addi sp, sp, -16
1134 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
1135 ; ZVFHMIN-NEXT: csrr a1, vlenb
1136 ; ZVFHMIN-NEXT: slli a1, a1, 3
1137 ; ZVFHMIN-NEXT: mv a2, a1
1138 ; ZVFHMIN-NEXT: slli a1, a1, 1
1139 ; ZVFHMIN-NEXT: add a1, a1, a2
1140 ; ZVFHMIN-NEXT: sub sp, sp, a1
1141 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1142 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1143 ; ZVFHMIN-NEXT: vmv1r.v v7, v0
1144 ; ZVFHMIN-NEXT: csrr a1, vlenb
1145 ; ZVFHMIN-NEXT: slli a1, a1, 4
1146 ; ZVFHMIN-NEXT: add a1, sp, a1
1147 ; ZVFHMIN-NEXT: addi a1, a1, 16
1148 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1149 ; ZVFHMIN-NEXT: csrr a2, vlenb
1150 ; ZVFHMIN-NEXT: slli a1, a2, 1
1151 ; ZVFHMIN-NEXT: srli a2, a2, 2
1152 ; ZVFHMIN-NEXT: sub a3, a0, a1
1153 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
1154 ; ZVFHMIN-NEXT: sltu a2, a0, a3
1155 ; ZVFHMIN-NEXT: addi a2, a2, -1
1156 ; ZVFHMIN-NEXT: and a2, a2, a3
1157 ; ZVFHMIN-NEXT: vmv4r.v v8, v16
1158 ; ZVFHMIN-NEXT: csrr a3, vlenb
1159 ; ZVFHMIN-NEXT: slli a3, a3, 3
1160 ; ZVFHMIN-NEXT: add a3, sp, a3
1161 ; ZVFHMIN-NEXT: addi a3, a3, 16
1162 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
1163 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
1164 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
1165 ; ZVFHMIN-NEXT: addi a2, sp, 16
1166 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1167 ; ZVFHMIN-NEXT: csrr a2, vlenb
1168 ; ZVFHMIN-NEXT: slli a2, a2, 4
1169 ; ZVFHMIN-NEXT: add a2, sp, a2
1170 ; ZVFHMIN-NEXT: addi a2, a2, 16
1171 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1172 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
1173 ; ZVFHMIN-NEXT: addi a2, sp, 16
1174 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
1175 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1176 ; ZVFHMIN-NEXT: vfsub.vv v16, v8, v16, v0.t
1177 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1178 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
1179 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2
1180 ; ZVFHMIN-NEXT: # %bb.1:
1181 ; ZVFHMIN-NEXT: mv a0, a1
1182 ; ZVFHMIN-NEXT: .LBB44_2:
1183 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
1184 ; ZVFHMIN-NEXT: csrr a1, vlenb
1185 ; ZVFHMIN-NEXT: slli a1, a1, 3
1186 ; ZVFHMIN-NEXT: add a1, sp, a1
1187 ; ZVFHMIN-NEXT: addi a1, a1, 16
1188 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1189 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1190 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t
1191 ; ZVFHMIN-NEXT: addi a0, sp, 16
1192 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
1193 ; ZVFHMIN-NEXT: csrr a0, vlenb
1194 ; ZVFHMIN-NEXT: slli a0, a0, 4
1195 ; ZVFHMIN-NEXT: add a0, sp, a0
1196 ; ZVFHMIN-NEXT: addi a0, a0, 16
1197 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1198 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t
1199 ; ZVFHMIN-NEXT: addi a0, sp, 16
1200 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1201 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1202 ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t
1203 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1204 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
1205 ; ZVFHMIN-NEXT: csrr a0, vlenb
1206 ; ZVFHMIN-NEXT: slli a0, a0, 3
1207 ; ZVFHMIN-NEXT: mv a1, a0
1208 ; ZVFHMIN-NEXT: slli a0, a0, 1
1209 ; ZVFHMIN-NEXT: add a0, a0, a1
1210 ; ZVFHMIN-NEXT: add sp, sp, a0
1211 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
1212 ; ZVFHMIN-NEXT: addi sp, sp, 16
1213 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
1215 %v = call <vscale x 32 x half> @llvm.vp.fsub.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 %evl)
1216 ret <vscale x 32 x half> %v
1219 define <vscale x 32 x half> @vfsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %b, i32 zeroext %evl) {
1220 ; ZVFH-LABEL: vfsub_vv_nxv32f16_unmasked:
1222 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1223 ; ZVFH-NEXT: vfsub.vv v8, v8, v16
1226 ; ZVFHMIN-LABEL: vfsub_vv_nxv32f16_unmasked:
1228 ; ZVFHMIN-NEXT: addi sp, sp, -16
1229 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
1230 ; ZVFHMIN-NEXT: csrr a1, vlenb
1231 ; ZVFHMIN-NEXT: slli a1, a1, 3
1232 ; ZVFHMIN-NEXT: sub sp, sp, a1
1233 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1234 ; ZVFHMIN-NEXT: csrr a2, vlenb
1235 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
1236 ; ZVFHMIN-NEXT: vmset.m v24
1237 ; ZVFHMIN-NEXT: slli a1, a2, 1
1238 ; ZVFHMIN-NEXT: srli a2, a2, 2
1239 ; ZVFHMIN-NEXT: sub a3, a0, a1
1240 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
1241 ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2
1242 ; ZVFHMIN-NEXT: sltu a2, a0, a3
1243 ; ZVFHMIN-NEXT: addi a2, a2, -1
1244 ; ZVFHMIN-NEXT: and a2, a2, a3
1245 ; ZVFHMIN-NEXT: addi a3, sp, 16
1246 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1247 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
1248 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
1249 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
1250 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1251 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t
1252 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1253 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
1254 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2
1255 ; ZVFHMIN-NEXT: # %bb.1:
1256 ; ZVFHMIN-NEXT: mv a0, a1
1257 ; ZVFHMIN-NEXT: .LBB45_2:
1258 ; ZVFHMIN-NEXT: addi a1, sp, 16
1259 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1260 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1261 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
1262 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
1263 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1264 ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16
1265 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1266 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
1267 ; ZVFHMIN-NEXT: csrr a0, vlenb
1268 ; ZVFHMIN-NEXT: slli a0, a0, 3
1269 ; ZVFHMIN-NEXT: add sp, sp, a0
1270 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
1271 ; ZVFHMIN-NEXT: addi sp, sp, 16
1272 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
1274 %v = call <vscale x 32 x half> @llvm.vp.fsub.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
1275 ret <vscale x 32 x half> %v
1278 define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1279 ; ZVFH-LABEL: vfsub_vf_nxv32f16:
1281 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1282 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t
1285 ; ZVFHMIN-LABEL: vfsub_vf_nxv32f16:
1287 ; ZVFHMIN-NEXT: addi sp, sp, -16
1288 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
1289 ; ZVFHMIN-NEXT: csrr a1, vlenb
1290 ; ZVFHMIN-NEXT: slli a1, a1, 3
1291 ; ZVFHMIN-NEXT: mv a2, a1
1292 ; ZVFHMIN-NEXT: slli a1, a1, 1
1293 ; ZVFHMIN-NEXT: add a1, a1, a2
1294 ; ZVFHMIN-NEXT: sub sp, sp, a1
1295 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1296 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
1297 ; ZVFHMIN-NEXT: vmv1r.v v7, v0
1298 ; ZVFHMIN-NEXT: vmv8r.v v16, v8
1299 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
1300 ; ZVFHMIN-NEXT: csrr a2, vlenb
1301 ; ZVFHMIN-NEXT: vmv.v.x v8, a1
1302 ; ZVFHMIN-NEXT: slli a1, a2, 1
1303 ; ZVFHMIN-NEXT: srli a2, a2, 2
1304 ; ZVFHMIN-NEXT: sub a3, a0, a1
1305 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
1306 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
1307 ; ZVFHMIN-NEXT: sltu a2, a0, a3
1308 ; ZVFHMIN-NEXT: addi a2, a2, -1
1309 ; ZVFHMIN-NEXT: and a2, a2, a3
1310 ; ZVFHMIN-NEXT: csrr a3, vlenb
1311 ; ZVFHMIN-NEXT: slli a3, a3, 3
1312 ; ZVFHMIN-NEXT: add a3, sp, a3
1313 ; ZVFHMIN-NEXT: addi a3, a3, 16
1314 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
1315 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
1316 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
1317 ; ZVFHMIN-NEXT: vmv4r.v v8, v16
1318 ; ZVFHMIN-NEXT: csrr a2, vlenb
1319 ; ZVFHMIN-NEXT: slli a2, a2, 4
1320 ; ZVFHMIN-NEXT: add a2, sp, a2
1321 ; ZVFHMIN-NEXT: addi a2, a2, 16
1322 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1323 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
1324 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1325 ; ZVFHMIN-NEXT: vfsub.vv v24, v8, v24, v0.t
1326 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1327 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
1328 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2
1329 ; ZVFHMIN-NEXT: # %bb.1:
1330 ; ZVFHMIN-NEXT: mv a0, a1
1331 ; ZVFHMIN-NEXT: .LBB46_2:
1332 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
1333 ; ZVFHMIN-NEXT: csrr a1, vlenb
1334 ; ZVFHMIN-NEXT: slli a1, a1, 4
1335 ; ZVFHMIN-NEXT: add a1, sp, a1
1336 ; ZVFHMIN-NEXT: addi a1, a1, 16
1337 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1338 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1339 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
1340 ; ZVFHMIN-NEXT: addi a0, sp, 16
1341 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1342 ; ZVFHMIN-NEXT: csrr a0, vlenb
1343 ; ZVFHMIN-NEXT: slli a0, a0, 3
1344 ; ZVFHMIN-NEXT: add a0, sp, a0
1345 ; ZVFHMIN-NEXT: addi a0, a0, 16
1346 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1347 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
1348 ; ZVFHMIN-NEXT: vmv8r.v v24, v16
1349 ; ZVFHMIN-NEXT: addi a0, sp, 16
1350 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1351 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1352 ; ZVFHMIN-NEXT: vfsub.vv v24, v16, v24, v0.t
1353 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1354 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
1355 ; ZVFHMIN-NEXT: csrr a0, vlenb
1356 ; ZVFHMIN-NEXT: slli a0, a0, 3
1357 ; ZVFHMIN-NEXT: mv a1, a0
1358 ; ZVFHMIN-NEXT: slli a0, a0, 1
1359 ; ZVFHMIN-NEXT: add a0, a0, a1
1360 ; ZVFHMIN-NEXT: add sp, sp, a0
1361 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
1362 ; ZVFHMIN-NEXT: addi sp, sp, 16
1363 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
1365 %elt.head = insertelement <vscale x 32 x half> poison, half %b, i32 0
1366 %vb = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
1367 %v = call <vscale x 32 x half> @llvm.vp.fsub.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl)
1368 ret <vscale x 32 x half> %v
1371 define <vscale x 32 x half> @vfsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, half %b, i32 zeroext %evl) {
1372 ; ZVFH-LABEL: vfsub_vf_nxv32f16_unmasked:
1374 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
1375 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
1378 ; ZVFHMIN-LABEL: vfsub_vf_nxv32f16_unmasked:
1380 ; ZVFHMIN-NEXT: addi sp, sp, -16
1381 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
1382 ; ZVFHMIN-NEXT: csrr a1, vlenb
1383 ; ZVFHMIN-NEXT: slli a1, a1, 3
1384 ; ZVFHMIN-NEXT: sub sp, sp, a1
1385 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1386 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
1387 ; ZVFHMIN-NEXT: csrr a2, vlenb
1388 ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma
1389 ; ZVFHMIN-NEXT: vmset.m v24
1390 ; ZVFHMIN-NEXT: vmv.v.x v16, a1
1391 ; ZVFHMIN-NEXT: slli a1, a2, 1
1392 ; ZVFHMIN-NEXT: srli a2, a2, 2
1393 ; ZVFHMIN-NEXT: sub a3, a0, a1
1394 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
1395 ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2
1396 ; ZVFHMIN-NEXT: sltu a2, a0, a3
1397 ; ZVFHMIN-NEXT: addi a2, a2, -1
1398 ; ZVFHMIN-NEXT: and a2, a2, a3
1399 ; ZVFHMIN-NEXT: addi a3, sp, 16
1400 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1401 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
1402 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
1403 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
1404 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1405 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t
1406 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1407 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
1408 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2
1409 ; ZVFHMIN-NEXT: # %bb.1:
1410 ; ZVFHMIN-NEXT: mv a0, a1
1411 ; ZVFHMIN-NEXT: .LBB47_2:
1412 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1413 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
1414 ; ZVFHMIN-NEXT: addi a0, sp, 16
1415 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
1416 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
1417 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1418 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24
1419 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1420 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
1421 ; ZVFHMIN-NEXT: csrr a0, vlenb
1422 ; ZVFHMIN-NEXT: slli a0, a0, 3
1423 ; ZVFHMIN-NEXT: add sp, sp, a0
1424 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
1425 ; ZVFHMIN-NEXT: addi sp, sp, 16
1426 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
1428 %elt.head = insertelement <vscale x 32 x half> poison, half %b, i32 0
1429 %vb = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
1430 %v = call <vscale x 32 x half> @llvm.vp.fsub.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
1431 ret <vscale x 32 x half> %v
1434 declare <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
1436 define <vscale x 1 x float> @vfsub_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1437 ; CHECK-LABEL: vfsub_vv_nxv1f32:
1439 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1440 ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t
1442 %v = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
1443 ret <vscale x 1 x float> %v
1446 define <vscale x 1 x float> @vfsub_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, <vscale x 1 x float> %b, i32 zeroext %evl) {
1447 ; CHECK-LABEL: vfsub_vv_nxv1f32_unmasked:
1449 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1450 ; CHECK-NEXT: vfsub.vv v8, v8, v9
1452 %v = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1453 ret <vscale x 1 x float> %v
1456 define <vscale x 1 x float> @vfsub_vf_nxv1f32(<vscale x 1 x float> %va, float %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1457 ; CHECK-LABEL: vfsub_vf_nxv1f32:
1459 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1460 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1462 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
1463 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1464 %v = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
1465 ret <vscale x 1 x float> %v
1468 define <vscale x 1 x float> @vfsub_vf_nxv1f32_unmasked(<vscale x 1 x float> %va, float %b, i32 zeroext %evl) {
1469 ; CHECK-LABEL: vfsub_vf_nxv1f32_unmasked:
1471 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1472 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1474 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
1475 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1476 %v = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1477 ret <vscale x 1 x float> %v
1480 declare <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
1482 define <vscale x 2 x float> @vfsub_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1483 ; CHECK-LABEL: vfsub_vv_nxv2f32:
1485 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1486 ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t
1488 %v = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
1489 ret <vscale x 2 x float> %v
1492 define <vscale x 2 x float> @vfsub_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, <vscale x 2 x float> %b, i32 zeroext %evl) {
1493 ; CHECK-LABEL: vfsub_vv_nxv2f32_unmasked:
1495 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1496 ; CHECK-NEXT: vfsub.vv v8, v8, v9
1498 %v = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1499 ret <vscale x 2 x float> %v
1502 define <vscale x 2 x float> @vfsub_vf_nxv2f32(<vscale x 2 x float> %va, float %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1503 ; CHECK-LABEL: vfsub_vf_nxv2f32:
1505 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1506 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1508 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
1509 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1510 %v = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
1511 ret <vscale x 2 x float> %v
1514 define <vscale x 2 x float> @vfsub_vf_nxv2f32_unmasked(<vscale x 2 x float> %va, float %b, i32 zeroext %evl) {
1515 ; CHECK-LABEL: vfsub_vf_nxv2f32_unmasked:
1517 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1518 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1520 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
1521 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1522 %v = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1523 ret <vscale x 2 x float> %v
1526 declare <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
1528 define <vscale x 4 x float> @vfsub_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1529 ; CHECK-LABEL: vfsub_vv_nxv4f32:
1531 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1532 ; CHECK-NEXT: vfsub.vv v8, v8, v10, v0.t
1534 %v = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
1535 ret <vscale x 4 x float> %v
1538 define <vscale x 4 x float> @vfsub_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, <vscale x 4 x float> %b, i32 zeroext %evl) {
1539 ; CHECK-LABEL: vfsub_vv_nxv4f32_unmasked:
1541 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1542 ; CHECK-NEXT: vfsub.vv v8, v8, v10
1544 %v = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1545 ret <vscale x 4 x float> %v
1548 define <vscale x 4 x float> @vfsub_vf_nxv4f32(<vscale x 4 x float> %va, float %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1549 ; CHECK-LABEL: vfsub_vf_nxv4f32:
1551 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1552 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1554 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
1555 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1556 %v = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
1557 ret <vscale x 4 x float> %v
1560 define <vscale x 4 x float> @vfsub_vf_nxv4f32_unmasked(<vscale x 4 x float> %va, float %b, i32 zeroext %evl) {
1561 ; CHECK-LABEL: vfsub_vf_nxv4f32_unmasked:
1563 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1564 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1566 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
1567 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1568 %v = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1569 ret <vscale x 4 x float> %v
1572 declare <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
1574 define <vscale x 8 x float> @vfsub_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1575 ; CHECK-LABEL: vfsub_vv_nxv8f32:
1577 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1578 ; CHECK-NEXT: vfsub.vv v8, v8, v12, v0.t
1580 %v = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
1581 ret <vscale x 8 x float> %v
1584 define <vscale x 8 x float> @vfsub_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, <vscale x 8 x float> %b, i32 zeroext %evl) {
1585 ; CHECK-LABEL: vfsub_vv_nxv8f32_unmasked:
1587 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1588 ; CHECK-NEXT: vfsub.vv v8, v8, v12
1590 %v = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1591 ret <vscale x 8 x float> %v
1594 define <vscale x 8 x float> @vfsub_vf_nxv8f32(<vscale x 8 x float> %va, float %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1595 ; CHECK-LABEL: vfsub_vf_nxv8f32:
1597 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1598 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1600 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1601 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1602 %v = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
1603 ret <vscale x 8 x float> %v
1606 define <vscale x 8 x float> @vfsub_vf_nxv8f32_unmasked(<vscale x 8 x float> %va, float %b, i32 zeroext %evl) {
1607 ; CHECK-LABEL: vfsub_vf_nxv8f32_unmasked:
1609 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1610 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1612 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1613 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1614 %v = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1615 ret <vscale x 8 x float> %v
1618 declare <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
1620 define <vscale x 16 x float> @vfsub_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1621 ; CHECK-LABEL: vfsub_vv_nxv16f32:
1623 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1624 ; CHECK-NEXT: vfsub.vv v8, v8, v16, v0.t
1626 %v = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 %evl)
1627 ret <vscale x 16 x float> %v
1630 define <vscale x 16 x float> @vfsub_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, <vscale x 16 x float> %b, i32 zeroext %evl) {
1631 ; CHECK-LABEL: vfsub_vv_nxv16f32_unmasked:
1633 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1634 ; CHECK-NEXT: vfsub.vv v8, v8, v16
1636 %v = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1637 ret <vscale x 16 x float> %v
1640 define <vscale x 16 x float> @vfsub_vf_nxv16f32(<vscale x 16 x float> %va, float %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1641 ; CHECK-LABEL: vfsub_vf_nxv16f32:
1643 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1644 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1646 %elt.head = insertelement <vscale x 16 x float> poison, float %b, i32 0
1647 %vb = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
1648 %v = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> %m, i32 %evl)
1649 ret <vscale x 16 x float> %v
1652 define <vscale x 16 x float> @vfsub_vf_nxv16f32_unmasked(<vscale x 16 x float> %va, float %b, i32 zeroext %evl) {
1653 ; CHECK-LABEL: vfsub_vf_nxv16f32_unmasked:
1655 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1656 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1658 %elt.head = insertelement <vscale x 16 x float> poison, float %b, i32 0
1659 %vb = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
1660 %v = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1661 ret <vscale x 16 x float> %v
1664 declare <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
1666 define <vscale x 1 x double> @vfsub_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1667 ; CHECK-LABEL: vfsub_vv_nxv1f64:
1669 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1670 ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t
1672 %v = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 %evl)
1673 ret <vscale x 1 x double> %v
1676 define <vscale x 1 x double> @vfsub_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, <vscale x 1 x double> %b, i32 zeroext %evl) {
1677 ; CHECK-LABEL: vfsub_vv_nxv1f64_unmasked:
1679 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1680 ; CHECK-NEXT: vfsub.vv v8, v8, v9
1682 %v = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1683 ret <vscale x 1 x double> %v
1686 define <vscale x 1 x double> @vfsub_vf_nxv1f64(<vscale x 1 x double> %va, double %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1687 ; CHECK-LABEL: vfsub_vf_nxv1f64:
1689 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1690 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1692 %elt.head = insertelement <vscale x 1 x double> poison, double %b, i32 0
1693 %vb = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
1694 %v = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl)
1695 ret <vscale x 1 x double> %v
1698 define <vscale x 1 x double> @vfsub_vf_nxv1f64_unmasked(<vscale x 1 x double> %va, double %b, i32 zeroext %evl) {
1699 ; CHECK-LABEL: vfsub_vf_nxv1f64_unmasked:
1701 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1702 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1704 %elt.head = insertelement <vscale x 1 x double> poison, double %b, i32 0
1705 %vb = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
1706 %v = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1707 ret <vscale x 1 x double> %v
1710 declare <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
1712 define <vscale x 2 x double> @vfsub_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1713 ; CHECK-LABEL: vfsub_vv_nxv2f64:
1715 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1716 ; CHECK-NEXT: vfsub.vv v8, v8, v10, v0.t
1718 %v = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 %evl)
1719 ret <vscale x 2 x double> %v
1722 define <vscale x 2 x double> @vfsub_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, <vscale x 2 x double> %b, i32 zeroext %evl) {
1723 ; CHECK-LABEL: vfsub_vv_nxv2f64_unmasked:
1725 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1726 ; CHECK-NEXT: vfsub.vv v8, v8, v10
1728 %v = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1729 ret <vscale x 2 x double> %v
1732 define <vscale x 2 x double> @vfsub_vf_nxv2f64(<vscale x 2 x double> %va, double %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1733 ; CHECK-LABEL: vfsub_vf_nxv2f64:
1735 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1736 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1738 %elt.head = insertelement <vscale x 2 x double> poison, double %b, i32 0
1739 %vb = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
1740 %v = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl)
1741 ret <vscale x 2 x double> %v
1744 define <vscale x 2 x double> @vfsub_vf_nxv2f64_unmasked(<vscale x 2 x double> %va, double %b, i32 zeroext %evl) {
1745 ; CHECK-LABEL: vfsub_vf_nxv2f64_unmasked:
1747 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1748 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1750 %elt.head = insertelement <vscale x 2 x double> poison, double %b, i32 0
1751 %vb = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
1752 %v = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1753 ret <vscale x 2 x double> %v
1756 declare <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
1758 define <vscale x 4 x double> @vfsub_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1759 ; CHECK-LABEL: vfsub_vv_nxv4f64:
1761 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1762 ; CHECK-NEXT: vfsub.vv v8, v8, v12, v0.t
1764 %v = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 %evl)
1765 ret <vscale x 4 x double> %v
1768 define <vscale x 4 x double> @vfsub_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, <vscale x 4 x double> %b, i32 zeroext %evl) {
1769 ; CHECK-LABEL: vfsub_vv_nxv4f64_unmasked:
1771 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1772 ; CHECK-NEXT: vfsub.vv v8, v8, v12
1774 %v = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1775 ret <vscale x 4 x double> %v
1778 define <vscale x 4 x double> @vfsub_vf_nxv4f64(<vscale x 4 x double> %va, double %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1779 ; CHECK-LABEL: vfsub_vf_nxv4f64:
1781 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1782 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1784 %elt.head = insertelement <vscale x 4 x double> poison, double %b, i32 0
1785 %vb = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
1786 %v = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl)
1787 ret <vscale x 4 x double> %v
1790 define <vscale x 4 x double> @vfsub_vf_nxv4f64_unmasked(<vscale x 4 x double> %va, double %b, i32 zeroext %evl) {
1791 ; CHECK-LABEL: vfsub_vf_nxv4f64_unmasked:
1793 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1794 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1796 %elt.head = insertelement <vscale x 4 x double> poison, double %b, i32 0
1797 %vb = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
1798 %v = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1799 ret <vscale x 4 x double> %v
1802 declare <vscale x 7 x double> @llvm.vp.fsub.nxv7f64(<vscale x 7 x double>, <vscale x 7 x double>, <vscale x 7 x i1>, i32)
1804 define <vscale x 7 x double> @vfsub_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x double> %b, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1805 ; CHECK-LABEL: vfsub_vv_nxv7f64:
1807 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1808 ; CHECK-NEXT: vfsub.vv v8, v8, v16, v0.t
1810 %v = call <vscale x 7 x double> @llvm.vp.fsub.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x double> %b, <vscale x 7 x i1> %m, i32 %evl)
1811 ret <vscale x 7 x double> %v
1814 declare <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
1816 define <vscale x 8 x double> @vfsub_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1817 ; CHECK-LABEL: vfsub_vv_nxv8f64:
1819 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1820 ; CHECK-NEXT: vfsub.vv v8, v8, v16, v0.t
1822 %v = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 %evl)
1823 ret <vscale x 8 x double> %v
1826 define <vscale x 8 x double> @vfsub_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %b, i32 zeroext %evl) {
1827 ; CHECK-LABEL: vfsub_vv_nxv8f64_unmasked:
1829 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1830 ; CHECK-NEXT: vfsub.vv v8, v8, v16
1832 %v = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1833 ret <vscale x 8 x double> %v
1836 define <vscale x 8 x double> @vfsub_vf_nxv8f64(<vscale x 8 x double> %va, double %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1837 ; CHECK-LABEL: vfsub_vf_nxv8f64:
1839 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1840 ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
1842 %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
1843 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
1844 %v = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
1845 ret <vscale x 8 x double> %v
1848 define <vscale x 8 x double> @vfsub_vf_nxv8f64_unmasked(<vscale x 8 x double> %va, double %b, i32 zeroext %evl) {
1849 ; CHECK-LABEL: vfsub_vf_nxv8f64_unmasked:
1851 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1852 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
1854 %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
1855 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
1856 %v = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1857 ret <vscale x 8 x double> %v