1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
13 define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfmax_vv_nxv1f16:
16 ; ZVFH-NEXT: vmv1r.v v10, v0
17 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
18 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
19 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
20 ; ZVFH-NEXT: vmv1r.v v0, v10
21 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
22 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
23 ; ZVFH-NEXT: vmv1r.v v0, v10
24 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
27 ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16:
29 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
30 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
31 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
32 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
33 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
34 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
35 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
36 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
37 ; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
38 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
39 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
40 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
41 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
42 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
43 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
44 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
46 %v = call <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
47 ret <vscale x 1 x half> %v
50 define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, i32 zeroext %evl) {
51 ; ZVFH-LABEL: vfmax_vv_nxv1f16_unmasked:
53 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
54 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
55 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
56 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
57 ; ZVFH-NEXT: vmv1r.v v0, v10
58 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
59 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
62 ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked:
64 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
65 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
66 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
67 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
68 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
69 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
70 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
71 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
72 ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
73 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
74 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
75 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
76 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
77 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
79 %v = call <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
80 ret <vscale x 1 x half> %v
83 declare <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x i1>, i32)
85 define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
86 ; ZVFH-LABEL: vfmax_vv_nxv2f16:
88 ; ZVFH-NEXT: vmv1r.v v10, v0
89 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
90 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
91 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
92 ; ZVFH-NEXT: vmv1r.v v0, v10
93 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
94 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
95 ; ZVFH-NEXT: vmv1r.v v0, v10
96 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
99 ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16:
101 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
102 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
103 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
104 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
105 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
106 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
107 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
108 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
109 ; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
110 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
111 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
112 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
113 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
114 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
115 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
116 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
118 %v = call <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
119 ret <vscale x 2 x half> %v
122 define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 zeroext %evl) {
123 ; ZVFH-LABEL: vfmax_vv_nxv2f16_unmasked:
125 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
126 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
127 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
128 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
129 ; ZVFH-NEXT: vmv1r.v v0, v10
130 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
131 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
134 ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked:
136 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
137 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
138 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
139 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
140 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
141 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
142 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
143 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
144 ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
145 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
146 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
147 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
148 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
149 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
151 %v = call <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
152 ret <vscale x 2 x half> %v
155 declare <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x i1>, i32)
157 define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
158 ; ZVFH-LABEL: vfmax_vv_nxv4f16:
160 ; ZVFH-NEXT: vmv1r.v v10, v0
161 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
162 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
163 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
164 ; ZVFH-NEXT: vmv1r.v v0, v10
165 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
166 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
167 ; ZVFH-NEXT: vmv1r.v v0, v10
168 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
171 ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16:
173 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
174 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
175 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
176 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
177 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
178 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
179 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
180 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
181 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
182 ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0
183 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
184 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
185 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
186 ; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0
187 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
188 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t
189 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
190 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
192 %v = call <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
193 ret <vscale x 4 x half> %v
196 define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, i32 zeroext %evl) {
197 ; ZVFH-LABEL: vfmax_vv_nxv4f16_unmasked:
199 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
200 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
201 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
202 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
203 ; ZVFH-NEXT: vmv.v.v v0, v10
204 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
205 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
208 ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked:
210 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
211 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
212 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
213 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
214 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
215 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
216 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
217 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
218 ; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0
219 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
220 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0
221 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
222 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
223 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
225 %v = call <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
226 ret <vscale x 4 x half> %v
229 declare <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, i32)
231 define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
232 ; ZVFH-LABEL: vfmax_vv_nxv8f16:
234 ; ZVFH-NEXT: vmv1r.v v12, v0
235 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
236 ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t
237 ; ZVFH-NEXT: vmv1r.v v0, v13
238 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
239 ; ZVFH-NEXT: vmv1r.v v0, v12
240 ; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t
241 ; ZVFH-NEXT: vmv1r.v v0, v13
242 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
243 ; ZVFH-NEXT: vmv1r.v v0, v12
244 ; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t
247 ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16:
249 ; ZVFHMIN-NEXT: vmv1r.v v12, v0
250 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
251 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
252 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
253 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
254 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
255 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
256 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
257 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
258 ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0
259 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
260 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t
261 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
262 ; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0
263 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
264 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t
265 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
266 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
268 %v = call <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
269 ret <vscale x 8 x half> %v
272 define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, i32 zeroext %evl) {
273 ; ZVFH-LABEL: vfmax_vv_nxv8f16_unmasked:
275 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
276 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
277 ; ZVFH-NEXT: vmfeq.vv v12, v10, v10
278 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
279 ; ZVFH-NEXT: vmv1r.v v0, v12
280 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
281 ; ZVFH-NEXT: vfmax.vv v8, v8, v14
284 ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked:
286 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
287 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
288 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
289 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
290 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
291 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
292 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
293 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16
294 ; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0
295 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
296 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0
297 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
298 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
299 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
301 %v = call <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
302 ret <vscale x 8 x half> %v
305 declare <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x i1>, i32)
307 define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
308 ; ZVFH-LABEL: vfmax_vv_nxv16f16:
310 ; ZVFH-NEXT: vmv1r.v v16, v0
311 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
312 ; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t
313 ; ZVFH-NEXT: vmv1r.v v0, v17
314 ; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0
315 ; ZVFH-NEXT: vmv1r.v v0, v16
316 ; ZVFH-NEXT: vmfeq.vv v17, v12, v12, v0.t
317 ; ZVFH-NEXT: vmv1r.v v0, v17
318 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0
319 ; ZVFH-NEXT: vmv1r.v v0, v16
320 ; ZVFH-NEXT: vfmax.vv v8, v8, v20, v0.t
323 ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16:
325 ; ZVFHMIN-NEXT: addi sp, sp, -16
326 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
327 ; ZVFHMIN-NEXT: csrr a1, vlenb
328 ; ZVFHMIN-NEXT: slli a1, a1, 3
329 ; ZVFHMIN-NEXT: sub sp, sp, a1
330 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
331 ; ZVFHMIN-NEXT: vmv1r.v v7, v0
332 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
333 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
334 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
335 ; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t
336 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
337 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
338 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
339 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
340 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
341 ; ZVFHMIN-NEXT: addi a0, sp, 16
342 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
343 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
344 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
345 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
346 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0
347 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
348 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
349 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t
350 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
351 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
352 ; ZVFHMIN-NEXT: csrr a0, vlenb
353 ; ZVFHMIN-NEXT: slli a0, a0, 3
354 ; ZVFHMIN-NEXT: add sp, sp, a0
355 ; ZVFHMIN-NEXT: addi sp, sp, 16
357 %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
358 ret <vscale x 16 x half> %v
361 define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, i32 zeroext %evl) {
362 ; ZVFH-LABEL: vfmax_vv_nxv16f16_unmasked:
364 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
365 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
366 ; ZVFH-NEXT: vmfeq.vv v16, v12, v12
367 ; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0
368 ; ZVFH-NEXT: vmv1r.v v0, v16
369 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0
370 ; ZVFH-NEXT: vfmax.vv v8, v8, v20
373 ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked:
375 ; ZVFHMIN-NEXT: addi sp, sp, -16
376 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
377 ; ZVFHMIN-NEXT: csrr a1, vlenb
378 ; ZVFHMIN-NEXT: slli a1, a1, 3
379 ; ZVFHMIN-NEXT: sub sp, sp, a1
380 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
381 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
382 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
383 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
384 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
385 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
386 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
387 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
388 ; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24
389 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0
390 ; ZVFHMIN-NEXT: addi a0, sp, 16
391 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
392 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
393 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
394 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
395 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16
396 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
397 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
398 ; ZVFHMIN-NEXT: csrr a0, vlenb
399 ; ZVFHMIN-NEXT: slli a0, a0, 3
400 ; ZVFHMIN-NEXT: add sp, sp, a0
401 ; ZVFHMIN-NEXT: addi sp, sp, 16
403 %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
404 ret <vscale x 16 x half> %v
407 declare <vscale x 32 x half> @llvm.vp.maximum.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x i1>, i32)
409 define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
410 ; ZVFH-LABEL: vfmax_vv_nxv32f16:
412 ; ZVFH-NEXT: addi sp, sp, -16
413 ; ZVFH-NEXT: .cfi_def_cfa_offset 16
414 ; ZVFH-NEXT: csrr a1, vlenb
415 ; ZVFH-NEXT: slli a1, a1, 3
416 ; ZVFH-NEXT: sub sp, sp, a1
417 ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
418 ; ZVFH-NEXT: vmv1r.v v7, v0
419 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
420 ; ZVFH-NEXT: vmfeq.vv v25, v8, v8, v0.t
421 ; ZVFH-NEXT: vmv1r.v v0, v25
422 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
423 ; ZVFH-NEXT: addi a0, sp, 16
424 ; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
425 ; ZVFH-NEXT: vmv1r.v v0, v7
426 ; ZVFH-NEXT: vmfeq.vv v25, v16, v16, v0.t
427 ; ZVFH-NEXT: vmv1r.v v0, v25
428 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
429 ; ZVFH-NEXT: vmv1r.v v0, v7
430 ; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
431 ; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t
432 ; ZVFH-NEXT: csrr a0, vlenb
433 ; ZVFH-NEXT: slli a0, a0, 3
434 ; ZVFH-NEXT: add sp, sp, a0
435 ; ZVFH-NEXT: addi sp, sp, 16
438 ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16:
440 ; ZVFHMIN-NEXT: addi sp, sp, -16
441 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
442 ; ZVFHMIN-NEXT: csrr a1, vlenb
443 ; ZVFHMIN-NEXT: li a2, 34
444 ; ZVFHMIN-NEXT: mul a1, a1, a2
445 ; ZVFHMIN-NEXT: sub sp, sp, a1
446 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
447 ; ZVFHMIN-NEXT: vmv1r.v v24, v0
448 ; ZVFHMIN-NEXT: vmv8r.v v0, v8
449 ; ZVFHMIN-NEXT: csrr a2, vlenb
450 ; ZVFHMIN-NEXT: slli a1, a2, 1
451 ; ZVFHMIN-NEXT: sub a3, a0, a1
452 ; ZVFHMIN-NEXT: sltu a4, a0, a3
453 ; ZVFHMIN-NEXT: addi a4, a4, -1
454 ; ZVFHMIN-NEXT: and a3, a4, a3
455 ; ZVFHMIN-NEXT: srli a2, a2, 2
456 ; ZVFHMIN-NEXT: csrr a4, vlenb
457 ; ZVFHMIN-NEXT: slli a4, a4, 5
458 ; ZVFHMIN-NEXT: add a4, sp, a4
459 ; ZVFHMIN-NEXT: addi a4, a4, 16
460 ; ZVFHMIN-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill
461 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
462 ; ZVFHMIN-NEXT: vslidedown.vx v8, v24, a2
463 ; ZVFHMIN-NEXT: addi a2, sp, 16
464 ; ZVFHMIN-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
465 ; ZVFHMIN-NEXT: csrr a2, vlenb
466 ; ZVFHMIN-NEXT: slli a2, a2, 4
467 ; ZVFHMIN-NEXT: add a2, sp, a2
468 ; ZVFHMIN-NEXT: addi a2, a2, 16
469 ; ZVFHMIN-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill
470 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
471 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
472 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
473 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
474 ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t
475 ; ZVFHMIN-NEXT: vmv8r.v v0, v16
476 ; ZVFHMIN-NEXT: csrr a2, vlenb
477 ; ZVFHMIN-NEXT: li a4, 24
478 ; ZVFHMIN-NEXT: mul a2, a2, a4
479 ; ZVFHMIN-NEXT: add a2, sp, a2
480 ; ZVFHMIN-NEXT: addi a2, a2, 16
481 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
482 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
483 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
484 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
485 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
486 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
487 ; ZVFHMIN-NEXT: csrr a2, vlenb
488 ; ZVFHMIN-NEXT: slli a2, a2, 3
489 ; ZVFHMIN-NEXT: add a2, sp, a2
490 ; ZVFHMIN-NEXT: addi a2, a2, 16
491 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
492 ; ZVFHMIN-NEXT: addi a2, sp, 16
493 ; ZVFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
494 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
495 ; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t
496 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
497 ; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0
498 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
499 ; ZVFHMIN-NEXT: csrr a2, vlenb
500 ; ZVFHMIN-NEXT: slli a2, a2, 3
501 ; ZVFHMIN-NEXT: add a2, sp, a2
502 ; ZVFHMIN-NEXT: addi a2, a2, 16
503 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
504 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t
505 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
506 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
507 ; ZVFHMIN-NEXT: csrr a2, vlenb
508 ; ZVFHMIN-NEXT: slli a2, a2, 3
509 ; ZVFHMIN-NEXT: add a2, sp, a2
510 ; ZVFHMIN-NEXT: addi a2, a2, 16
511 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
512 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
513 ; ZVFHMIN-NEXT: # %bb.1:
514 ; ZVFHMIN-NEXT: mv a0, a1
515 ; ZVFHMIN-NEXT: .LBB10_2:
516 ; ZVFHMIN-NEXT: csrr a1, vlenb
517 ; ZVFHMIN-NEXT: slli a1, a1, 4
518 ; ZVFHMIN-NEXT: add a1, sp, a1
519 ; ZVFHMIN-NEXT: addi a1, a1, 16
520 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
521 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
522 ; ZVFHMIN-NEXT: csrr a1, vlenb
523 ; ZVFHMIN-NEXT: slli a1, a1, 5
524 ; ZVFHMIN-NEXT: add a1, sp, a1
525 ; ZVFHMIN-NEXT: addi a1, a1, 16
526 ; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
527 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
528 ; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t
529 ; ZVFHMIN-NEXT: addi a1, sp, 16
530 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
531 ; ZVFHMIN-NEXT: csrr a1, vlenb
532 ; ZVFHMIN-NEXT: li a2, 24
533 ; ZVFHMIN-NEXT: mul a1, a1, a2
534 ; ZVFHMIN-NEXT: add a1, sp, a1
535 ; ZVFHMIN-NEXT: addi a1, a1, 16
536 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
537 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
538 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
539 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
540 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
541 ; ZVFHMIN-NEXT: vmerge.vvm v24, v24, v16, v0
542 ; ZVFHMIN-NEXT: csrr a0, vlenb
543 ; ZVFHMIN-NEXT: li a1, 24
544 ; ZVFHMIN-NEXT: mul a0, a0, a1
545 ; ZVFHMIN-NEXT: add a0, sp, a0
546 ; ZVFHMIN-NEXT: addi a0, a0, 16
547 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
548 ; ZVFHMIN-NEXT: csrr a0, vlenb
549 ; ZVFHMIN-NEXT: slli a0, a0, 5
550 ; ZVFHMIN-NEXT: add a0, sp, a0
551 ; ZVFHMIN-NEXT: addi a0, a0, 16
552 ; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
553 ; ZVFHMIN-NEXT: vmv1r.v v0, v9
554 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
555 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
556 ; ZVFHMIN-NEXT: addi a0, sp, 16
557 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
558 ; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0
559 ; ZVFHMIN-NEXT: vmv1r.v v0, v9
560 ; ZVFHMIN-NEXT: csrr a0, vlenb
561 ; ZVFHMIN-NEXT: li a1, 24
562 ; ZVFHMIN-NEXT: mul a0, a0, a1
563 ; ZVFHMIN-NEXT: add a0, sp, a0
564 ; ZVFHMIN-NEXT: addi a0, a0, 16
565 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
566 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t
567 ; ZVFHMIN-NEXT: csrr a0, vlenb
568 ; ZVFHMIN-NEXT: slli a0, a0, 3
569 ; ZVFHMIN-NEXT: add a0, sp, a0
570 ; ZVFHMIN-NEXT: addi a0, a0, 16
571 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
572 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
573 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
574 ; ZVFHMIN-NEXT: csrr a0, vlenb
575 ; ZVFHMIN-NEXT: li a1, 34
576 ; ZVFHMIN-NEXT: mul a0, a0, a1
577 ; ZVFHMIN-NEXT: add sp, sp, a0
578 ; ZVFHMIN-NEXT: addi sp, sp, 16
580 %v = call <vscale x 32 x half> @llvm.vp.maximum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl)
581 ret <vscale x 32 x half> %v
584 define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, i32 zeroext %evl) {
585 ; ZVFH-LABEL: vfmax_vv_nxv32f16_unmasked:
587 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
588 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
589 ; ZVFH-NEXT: vmfeq.vv v7, v16, v16
590 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
591 ; ZVFH-NEXT: vmv1r.v v0, v7
592 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
593 ; ZVFH-NEXT: vfmax.vv v8, v8, v24
596 ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16_unmasked:
598 ; ZVFHMIN-NEXT: addi sp, sp, -16
599 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
600 ; ZVFHMIN-NEXT: csrr a1, vlenb
601 ; ZVFHMIN-NEXT: slli a1, a1, 5
602 ; ZVFHMIN-NEXT: sub sp, sp, a1
603 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
604 ; ZVFHMIN-NEXT: csrr a2, vlenb
605 ; ZVFHMIN-NEXT: slli a1, a2, 1
606 ; ZVFHMIN-NEXT: sub a3, a0, a1
607 ; ZVFHMIN-NEXT: sltu a4, a0, a3
608 ; ZVFHMIN-NEXT: addi a4, a4, -1
609 ; ZVFHMIN-NEXT: and a3, a4, a3
610 ; ZVFHMIN-NEXT: srli a2, a2, 2
611 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma
612 ; ZVFHMIN-NEXT: vmset.m v24
613 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
614 ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2
615 ; ZVFHMIN-NEXT: csrr a2, vlenb
616 ; ZVFHMIN-NEXT: slli a2, a2, 4
617 ; ZVFHMIN-NEXT: add a2, sp, a2
618 ; ZVFHMIN-NEXT: addi a2, a2, 16
619 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
620 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
621 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
622 ; ZVFHMIN-NEXT: addi a2, sp, 16
623 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
624 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
625 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
626 ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t
627 ; ZVFHMIN-NEXT: vmv4r.v v8, v16
628 ; ZVFHMIN-NEXT: csrr a2, vlenb
629 ; ZVFHMIN-NEXT: li a4, 24
630 ; ZVFHMIN-NEXT: mul a2, a2, a4
631 ; ZVFHMIN-NEXT: add a2, sp, a2
632 ; ZVFHMIN-NEXT: addi a2, a2, 16
633 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
634 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
635 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
636 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
637 ; ZVFHMIN-NEXT: addi a2, sp, 16
638 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
639 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
640 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0
641 ; ZVFHMIN-NEXT: csrr a2, vlenb
642 ; ZVFHMIN-NEXT: slli a2, a2, 3
643 ; ZVFHMIN-NEXT: add a2, sp, a2
644 ; ZVFHMIN-NEXT: addi a2, a2, 16
645 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
646 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
647 ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t
648 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
649 ; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0
650 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
651 ; ZVFHMIN-NEXT: csrr a2, vlenb
652 ; ZVFHMIN-NEXT: slli a2, a2, 3
653 ; ZVFHMIN-NEXT: add a2, sp, a2
654 ; ZVFHMIN-NEXT: addi a2, a2, 16
655 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
656 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t
657 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
658 ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16
659 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
660 ; ZVFHMIN-NEXT: # %bb.1:
661 ; ZVFHMIN-NEXT: mv a0, a1
662 ; ZVFHMIN-NEXT: .LBB11_2:
663 ; ZVFHMIN-NEXT: csrr a1, vlenb
664 ; ZVFHMIN-NEXT: slli a1, a1, 4
665 ; ZVFHMIN-NEXT: add a1, sp, a1
666 ; ZVFHMIN-NEXT: addi a1, a1, 16
667 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
668 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
669 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
670 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
671 ; ZVFHMIN-NEXT: csrr a1, vlenb
672 ; ZVFHMIN-NEXT: li a2, 24
673 ; ZVFHMIN-NEXT: mul a1, a1, a2
674 ; ZVFHMIN-NEXT: add a1, sp, a1
675 ; ZVFHMIN-NEXT: addi a1, a1, 16
676 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
677 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
678 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
679 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
680 ; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16
681 ; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0
682 ; ZVFHMIN-NEXT: vmv1r.v v0, v3
683 ; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0
684 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24
685 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
686 ; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16
687 ; ZVFHMIN-NEXT: vmv8r.v v8, v0
688 ; ZVFHMIN-NEXT: csrr a0, vlenb
689 ; ZVFHMIN-NEXT: slli a0, a0, 5
690 ; ZVFHMIN-NEXT: add sp, sp, a0
691 ; ZVFHMIN-NEXT: addi sp, sp, 16
693 %v = call <vscale x 32 x half> @llvm.vp.maximum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
694 ret <vscale x 32 x half> %v
697 declare <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
699 define <vscale x 1 x float> @vfmax_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
700 ; CHECK-LABEL: vfmax_vv_nxv1f32:
702 ; CHECK-NEXT: vmv1r.v v10, v0
703 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
704 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
705 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
706 ; CHECK-NEXT: vmv1r.v v0, v10
707 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
708 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
709 ; CHECK-NEXT: vmv1r.v v0, v10
710 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
712 %v = call <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
713 ret <vscale x 1 x float> %v
716 define <vscale x 1 x float> @vfmax_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, i32 zeroext %evl) {
717 ; CHECK-LABEL: vfmax_vv_nxv1f32_unmasked:
719 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
720 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
721 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
722 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
723 ; CHECK-NEXT: vmv1r.v v0, v10
724 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
725 ; CHECK-NEXT: vfmax.vv v8, v8, v11
727 %v = call <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
728 ret <vscale x 1 x float> %v
731 declare <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
733 define <vscale x 2 x float> @vfmax_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
734 ; CHECK-LABEL: vfmax_vv_nxv2f32:
736 ; CHECK-NEXT: vmv1r.v v10, v0
737 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
738 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
739 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
740 ; CHECK-NEXT: vmv1r.v v0, v10
741 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
742 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
743 ; CHECK-NEXT: vmv1r.v v0, v10
744 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
746 %v = call <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
747 ret <vscale x 2 x float> %v
750 define <vscale x 2 x float> @vfmax_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, i32 zeroext %evl) {
751 ; CHECK-LABEL: vfmax_vv_nxv2f32_unmasked:
753 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
754 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
755 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
756 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
757 ; CHECK-NEXT: vmv.v.v v0, v10
758 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
759 ; CHECK-NEXT: vfmax.vv v8, v8, v11
761 %v = call <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
762 ret <vscale x 2 x float> %v
765 declare <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
767 define <vscale x 4 x float> @vfmax_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
768 ; CHECK-LABEL: vfmax_vv_nxv4f32:
770 ; CHECK-NEXT: vmv1r.v v12, v0
771 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
772 ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t
773 ; CHECK-NEXT: vmv1r.v v0, v13
774 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
775 ; CHECK-NEXT: vmv1r.v v0, v12
776 ; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t
777 ; CHECK-NEXT: vmv1r.v v0, v13
778 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
779 ; CHECK-NEXT: vmv1r.v v0, v12
780 ; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t
782 %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
783 ret <vscale x 4 x float> %v
786 define <vscale x 4 x float> @vfmax_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, i32 zeroext %evl) {
787 ; CHECK-LABEL: vfmax_vv_nxv4f32_unmasked:
789 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
790 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
791 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
792 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
793 ; CHECK-NEXT: vmv1r.v v0, v12
794 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
795 ; CHECK-NEXT: vfmax.vv v8, v8, v14
797 %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
798 ret <vscale x 4 x float> %v
801 declare <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
803 define <vscale x 8 x float> @vfmax_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
804 ; CHECK-LABEL: vfmax_vv_nxv8f32:
806 ; CHECK-NEXT: vmv1r.v v16, v0
807 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
808 ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t
809 ; CHECK-NEXT: vmv1r.v v0, v17
810 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
811 ; CHECK-NEXT: vmv1r.v v0, v16
812 ; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t
813 ; CHECK-NEXT: vmv1r.v v0, v17
814 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
815 ; CHECK-NEXT: vmv1r.v v0, v16
816 ; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t
818 %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
819 ret <vscale x 8 x float> %v
822 define <vscale x 8 x float> @vfmax_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, i32 zeroext %evl) {
823 ; CHECK-LABEL: vfmax_vv_nxv8f32_unmasked:
825 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
826 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
827 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
828 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
829 ; CHECK-NEXT: vmv1r.v v0, v16
830 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
831 ; CHECK-NEXT: vfmax.vv v8, v8, v20
833 %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
834 ret <vscale x 8 x float> %v
837 declare <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
839 define <vscale x 1 x double> @vfmax_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
840 ; CHECK-LABEL: vfmax_vv_nxv1f64:
842 ; CHECK-NEXT: vmv1r.v v10, v0
843 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
844 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
845 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
846 ; CHECK-NEXT: vmv1r.v v0, v10
847 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
848 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
849 ; CHECK-NEXT: vmv1r.v v0, v10
850 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
852 %v = call <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl)
853 ret <vscale x 1 x double> %v
856 define <vscale x 1 x double> @vfmax_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, i32 zeroext %evl) {
857 ; CHECK-LABEL: vfmax_vv_nxv1f64_unmasked:
859 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
860 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
861 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
862 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
863 ; CHECK-NEXT: vmv.v.v v0, v10
864 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
865 ; CHECK-NEXT: vfmax.vv v8, v8, v11
867 %v = call <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
868 ret <vscale x 1 x double> %v
871 declare <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
873 define <vscale x 2 x double> @vfmax_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
874 ; CHECK-LABEL: vfmax_vv_nxv2f64:
876 ; CHECK-NEXT: vmv1r.v v12, v0
877 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
878 ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t
879 ; CHECK-NEXT: vmv1r.v v0, v13
880 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
881 ; CHECK-NEXT: vmv1r.v v0, v12
882 ; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t
883 ; CHECK-NEXT: vmv1r.v v0, v13
884 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
885 ; CHECK-NEXT: vmv1r.v v0, v12
886 ; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t
888 %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl)
889 ret <vscale x 2 x double> %v
892 define <vscale x 2 x double> @vfmax_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, i32 zeroext %evl) {
893 ; CHECK-LABEL: vfmax_vv_nxv2f64_unmasked:
895 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
896 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
897 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
898 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
899 ; CHECK-NEXT: vmv1r.v v0, v12
900 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
901 ; CHECK-NEXT: vfmax.vv v8, v8, v14
903 %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
904 ret <vscale x 2 x double> %v
907 declare <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
909 define <vscale x 4 x double> @vfmax_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
910 ; CHECK-LABEL: vfmax_vv_nxv4f64:
912 ; CHECK-NEXT: vmv1r.v v16, v0
913 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
914 ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t
915 ; CHECK-NEXT: vmv1r.v v0, v17
916 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
917 ; CHECK-NEXT: vmv1r.v v0, v16
918 ; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t
919 ; CHECK-NEXT: vmv1r.v v0, v17
920 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
921 ; CHECK-NEXT: vmv1r.v v0, v16
922 ; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t
924 %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl)
925 ret <vscale x 4 x double> %v
928 define <vscale x 4 x double> @vfmax_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, i32 zeroext %evl) {
929 ; CHECK-LABEL: vfmax_vv_nxv4f64_unmasked:
931 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
932 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
933 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
934 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
935 ; CHECK-NEXT: vmv1r.v v0, v16
936 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
937 ; CHECK-NEXT: vfmax.vv v8, v8, v20
939 %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
940 ret <vscale x 4 x double> %v
943 declare <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
945 define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
946 ; CHECK-LABEL: vfmax_vv_nxv8f64:
948 ; CHECK-NEXT: addi sp, sp, -16
949 ; CHECK-NEXT: .cfi_def_cfa_offset 16
950 ; CHECK-NEXT: csrr a1, vlenb
951 ; CHECK-NEXT: slli a1, a1, 3
952 ; CHECK-NEXT: sub sp, sp, a1
953 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
954 ; CHECK-NEXT: vmv1r.v v7, v0
955 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
956 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
957 ; CHECK-NEXT: vmv1r.v v0, v25
958 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
959 ; CHECK-NEXT: addi a0, sp, 16
960 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
961 ; CHECK-NEXT: vmv1r.v v0, v7
962 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
963 ; CHECK-NEXT: vmv1r.v v0, v25
964 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
965 ; CHECK-NEXT: vmv1r.v v0, v7
966 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
967 ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
968 ; CHECK-NEXT: csrr a0, vlenb
969 ; CHECK-NEXT: slli a0, a0, 3
970 ; CHECK-NEXT: add sp, sp, a0
971 ; CHECK-NEXT: addi sp, sp, 16
973 %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
974 ret <vscale x 8 x double> %v
977 define <vscale x 8 x double> @vfmax_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 zeroext %evl) {
978 ; CHECK-LABEL: vfmax_vv_nxv8f64_unmasked:
980 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
981 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
982 ; CHECK-NEXT: vmfeq.vv v7, v16, v16
983 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
984 ; CHECK-NEXT: vmv1r.v v0, v7
985 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
986 ; CHECK-NEXT: vfmax.vv v8, v8, v24
988 %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
989 ret <vscale x 8 x double> %v
992 declare <vscale x 16 x double> @llvm.vp.maximum.nxv16f64(<vscale x 16 x double>, <vscale x 16 x double>, <vscale x 16 x i1>, i32)
994 define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
995 ; CHECK-LABEL: vfmax_vv_nxv16f64:
997 ; CHECK-NEXT: addi sp, sp, -16
998 ; CHECK-NEXT: .cfi_def_cfa_offset 16
999 ; CHECK-NEXT: csrr a1, vlenb
1000 ; CHECK-NEXT: li a3, 36
1001 ; CHECK-NEXT: mul a1, a1, a3
1002 ; CHECK-NEXT: sub sp, sp, a1
1003 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x24, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 36 * vlenb
1004 ; CHECK-NEXT: csrr a1, vlenb
1005 ; CHECK-NEXT: slli a3, a1, 3
1006 ; CHECK-NEXT: add a3, a0, a3
1007 ; CHECK-NEXT: vl8re64.v v24, (a3)
1008 ; CHECK-NEXT: csrr a3, vlenb
1009 ; CHECK-NEXT: li a4, 27
1010 ; CHECK-NEXT: mul a3, a3, a4
1011 ; CHECK-NEXT: add a3, sp, a3
1012 ; CHECK-NEXT: addi a3, a3, 16
1013 ; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
1014 ; CHECK-NEXT: srli a3, a1, 3
1015 ; CHECK-NEXT: csrr a4, vlenb
1016 ; CHECK-NEXT: li a5, 18
1017 ; CHECK-NEXT: mul a4, a4, a5
1018 ; CHECK-NEXT: add a4, sp, a4
1019 ; CHECK-NEXT: addi a4, a4, 16
1020 ; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
1021 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
1022 ; CHECK-NEXT: vslidedown.vx v7, v0, a3
1023 ; CHECK-NEXT: csrr a3, vlenb
1024 ; CHECK-NEXT: add a3, sp, a3
1025 ; CHECK-NEXT: addi a3, a3, 16
1026 ; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill
1027 ; CHECK-NEXT: sub a3, a2, a1
1028 ; CHECK-NEXT: sltu a4, a2, a3
1029 ; CHECK-NEXT: addi a4, a4, -1
1030 ; CHECK-NEXT: and a3, a4, a3
1031 ; CHECK-NEXT: vmv1r.v v0, v7
1032 ; CHECK-NEXT: csrr a4, vlenb
1033 ; CHECK-NEXT: slli a4, a4, 1
1034 ; CHECK-NEXT: add a4, sp, a4
1035 ; CHECK-NEXT: addi a4, a4, 16
1036 ; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1037 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
1038 ; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
1039 ; CHECK-NEXT: vmv1r.v v0, v26
1040 ; CHECK-NEXT: csrr a3, vlenb
1041 ; CHECK-NEXT: li a4, 27
1042 ; CHECK-NEXT: mul a3, a3, a4
1043 ; CHECK-NEXT: add a3, sp, a3
1044 ; CHECK-NEXT: addi a3, a3, 16
1045 ; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1046 ; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0
1047 ; CHECK-NEXT: csrr a3, vlenb
1048 ; CHECK-NEXT: li a4, 10
1049 ; CHECK-NEXT: mul a3, a3, a4
1050 ; CHECK-NEXT: add a3, sp, a3
1051 ; CHECK-NEXT: addi a3, a3, 16
1052 ; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
1053 ; CHECK-NEXT: vmv1r.v v0, v7
1054 ; CHECK-NEXT: csrr a3, vlenb
1055 ; CHECK-NEXT: li a4, 27
1056 ; CHECK-NEXT: mul a3, a3, a4
1057 ; CHECK-NEXT: add a3, sp, a3
1058 ; CHECK-NEXT: addi a3, a3, 16
1059 ; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1060 ; CHECK-NEXT: vmfeq.vv v16, v24, v24, v0.t
1061 ; CHECK-NEXT: addi a3, sp, 16
1062 ; CHECK-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill
1063 ; CHECK-NEXT: vl8re64.v v0, (a0)
1064 ; CHECK-NEXT: csrr a0, vlenb
1065 ; CHECK-NEXT: li a3, 19
1066 ; CHECK-NEXT: mul a0, a0, a3
1067 ; CHECK-NEXT: add a0, sp, a0
1068 ; CHECK-NEXT: addi a0, a0, 16
1069 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
1070 ; CHECK-NEXT: vmv8r.v v16, v8
1071 ; CHECK-NEXT: addi a0, sp, 16
1072 ; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
1073 ; CHECK-NEXT: csrr a0, vlenb
1074 ; CHECK-NEXT: slli a0, a0, 1
1075 ; CHECK-NEXT: add a0, sp, a0
1076 ; CHECK-NEXT: addi a0, a0, 16
1077 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1078 ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
1079 ; CHECK-NEXT: csrr a0, vlenb
1080 ; CHECK-NEXT: add a0, sp, a0
1081 ; CHECK-NEXT: addi a0, a0, 16
1082 ; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
1083 ; CHECK-NEXT: csrr a0, vlenb
1084 ; CHECK-NEXT: li a3, 10
1085 ; CHECK-NEXT: mul a0, a0, a3
1086 ; CHECK-NEXT: add a0, sp, a0
1087 ; CHECK-NEXT: addi a0, a0, 16
1088 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1089 ; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t
1090 ; CHECK-NEXT: csrr a0, vlenb
1091 ; CHECK-NEXT: li a3, 10
1092 ; CHECK-NEXT: mul a0, a0, a3
1093 ; CHECK-NEXT: add a0, sp, a0
1094 ; CHECK-NEXT: addi a0, a0, 16
1095 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1096 ; CHECK-NEXT: bltu a2, a1, .LBB28_2
1097 ; CHECK-NEXT: # %bb.1:
1098 ; CHECK-NEXT: mv a2, a1
1099 ; CHECK-NEXT: .LBB28_2:
1100 ; CHECK-NEXT: csrr a0, vlenb
1101 ; CHECK-NEXT: li a1, 18
1102 ; CHECK-NEXT: mul a0, a0, a1
1103 ; CHECK-NEXT: add a0, sp, a0
1104 ; CHECK-NEXT: addi a0, a0, 16
1105 ; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload
1106 ; CHECK-NEXT: vmv1r.v v0, v24
1107 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1108 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
1109 ; CHECK-NEXT: vmv1r.v v0, v25
1110 ; CHECK-NEXT: csrr a0, vlenb
1111 ; CHECK-NEXT: li a1, 19
1112 ; CHECK-NEXT: mul a0, a0, a1
1113 ; CHECK-NEXT: add a0, sp, a0
1114 ; CHECK-NEXT: addi a0, a0, 16
1115 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1116 ; CHECK-NEXT: csrr a0, vlenb
1117 ; CHECK-NEXT: li a1, 27
1118 ; CHECK-NEXT: mul a0, a0, a1
1119 ; CHECK-NEXT: add a0, sp, a0
1120 ; CHECK-NEXT: addi a0, a0, 16
1121 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1122 ; CHECK-NEXT: csrr a0, vlenb
1123 ; CHECK-NEXT: li a1, 27
1124 ; CHECK-NEXT: mul a0, a0, a1
1125 ; CHECK-NEXT: add a0, sp, a0
1126 ; CHECK-NEXT: addi a0, a0, 16
1127 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1128 ; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
1129 ; CHECK-NEXT: csrr a0, vlenb
1130 ; CHECK-NEXT: slli a0, a0, 1
1131 ; CHECK-NEXT: add a0, sp, a0
1132 ; CHECK-NEXT: addi a0, a0, 16
1133 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1134 ; CHECK-NEXT: vmv1r.v v0, v24
1135 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
1136 ; CHECK-NEXT: vmv1r.v v0, v25
1137 ; CHECK-NEXT: csrr a0, vlenb
1138 ; CHECK-NEXT: li a1, 27
1139 ; CHECK-NEXT: mul a0, a0, a1
1140 ; CHECK-NEXT: add a0, sp, a0
1141 ; CHECK-NEXT: addi a0, a0, 16
1142 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1143 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
1144 ; CHECK-NEXT: vmv1r.v v0, v24
1145 ; CHECK-NEXT: csrr a0, vlenb
1146 ; CHECK-NEXT: slli a0, a0, 1
1147 ; CHECK-NEXT: add a0, sp, a0
1148 ; CHECK-NEXT: addi a0, a0, 16
1149 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1150 ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
1151 ; CHECK-NEXT: csrr a0, vlenb
1152 ; CHECK-NEXT: li a1, 10
1153 ; CHECK-NEXT: mul a0, a0, a1
1154 ; CHECK-NEXT: add a0, sp, a0
1155 ; CHECK-NEXT: addi a0, a0, 16
1156 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1157 ; CHECK-NEXT: csrr a0, vlenb
1158 ; CHECK-NEXT: li a1, 36
1159 ; CHECK-NEXT: mul a0, a0, a1
1160 ; CHECK-NEXT: add sp, sp, a0
1161 ; CHECK-NEXT: addi sp, sp, 16
1163 %v = call <vscale x 16 x double> @llvm.vp.maximum.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, <vscale x 16 x i1> %m, i32 %evl)
1164 ret <vscale x 16 x double> %v
1167 define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, i32 zeroext %evl) {
1168 ; CHECK-LABEL: vfmax_vv_nxv16f64_unmasked:
1170 ; CHECK-NEXT: addi sp, sp, -16
1171 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1172 ; CHECK-NEXT: csrr a1, vlenb
1173 ; CHECK-NEXT: li a3, 24
1174 ; CHECK-NEXT: mul a1, a1, a3
1175 ; CHECK-NEXT: sub sp, sp, a1
1176 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1177 ; CHECK-NEXT: csrr a1, vlenb
1178 ; CHECK-NEXT: slli a1, a1, 4
1179 ; CHECK-NEXT: add a1, sp, a1
1180 ; CHECK-NEXT: addi a1, a1, 16
1181 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1182 ; CHECK-NEXT: csrr a1, vlenb
1183 ; CHECK-NEXT: slli a3, a1, 3
1184 ; CHECK-NEXT: add a3, a0, a3
1185 ; CHECK-NEXT: vl8re64.v v24, (a3)
1186 ; CHECK-NEXT: sub a3, a2, a1
1187 ; CHECK-NEXT: sltu a4, a2, a3
1188 ; CHECK-NEXT: addi a4, a4, -1
1189 ; CHECK-NEXT: and a3, a4, a3
1190 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
1191 ; CHECK-NEXT: vmfeq.vv v0, v16, v16
1192 ; CHECK-NEXT: vmfeq.vv v7, v24, v24
1193 ; CHECK-NEXT: vl8re64.v v8, (a0)
1194 ; CHECK-NEXT: csrr a0, vlenb
1195 ; CHECK-NEXT: slli a0, a0, 3
1196 ; CHECK-NEXT: add a0, sp, a0
1197 ; CHECK-NEXT: addi a0, a0, 16
1198 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1199 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
1200 ; CHECK-NEXT: vmv1r.v v0, v7
1201 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
1202 ; CHECK-NEXT: vfmax.vv v8, v16, v8
1203 ; CHECK-NEXT: addi a0, sp, 16
1204 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1205 ; CHECK-NEXT: bltu a2, a1, .LBB29_2
1206 ; CHECK-NEXT: # %bb.1:
1207 ; CHECK-NEXT: mv a2, a1
1208 ; CHECK-NEXT: .LBB29_2:
1209 ; CHECK-NEXT: csrr a0, vlenb
1210 ; CHECK-NEXT: slli a0, a0, 4
1211 ; CHECK-NEXT: add a0, sp, a0
1212 ; CHECK-NEXT: addi a0, a0, 16
1213 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1214 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1215 ; CHECK-NEXT: vmfeq.vv v0, v16, v16
1216 ; CHECK-NEXT: csrr a0, vlenb
1217 ; CHECK-NEXT: slli a0, a0, 3
1218 ; CHECK-NEXT: add a0, sp, a0
1219 ; CHECK-NEXT: addi a0, a0, 16
1220 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1221 ; CHECK-NEXT: vmfeq.vv v7, v8, v8
1222 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
1223 ; CHECK-NEXT: vmv1r.v v0, v7
1224 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
1225 ; CHECK-NEXT: vfmax.vv v8, v8, v24
1226 ; CHECK-NEXT: addi a0, sp, 16
1227 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1228 ; CHECK-NEXT: csrr a0, vlenb
1229 ; CHECK-NEXT: li a1, 24
1230 ; CHECK-NEXT: mul a0, a0, a1
1231 ; CHECK-NEXT: add sp, sp, a0
1232 ; CHECK-NEXT: addi sp, sp, 16
1234 %v = call <vscale x 16 x double> @llvm.vp.maximum.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x double> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1235 ret <vscale x 16 x double> %v