1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
13 define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfmax_vv_v2f16:
16 ; ZVFH-NEXT: vmv1r.v v10, v0
17 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
18 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
19 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
20 ; ZVFH-NEXT: vmv1r.v v0, v10
21 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
22 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
23 ; ZVFH-NEXT: vmv1r.v v0, v10
24 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
27 ; ZVFHMIN-LABEL: vfmax_vv_v2f16:
29 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
30 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
31 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
32 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
33 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
34 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
35 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
36 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
37 ; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
38 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
39 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
40 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
41 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
42 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
43 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
44 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
46 %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
50 define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 zeroext %evl) {
51 ; ZVFH-LABEL: vfmax_vv_v2f16_unmasked:
53 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
54 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
55 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
56 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
57 ; ZVFH-NEXT: vmv1r.v v0, v10
58 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
59 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
62 ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked:
64 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
65 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
66 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
67 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
68 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
69 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
70 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
71 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
72 ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
73 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
74 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
75 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
76 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
77 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
79 %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl)
83 declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32)
85 define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
86 ; ZVFH-LABEL: vfmax_vv_v4f16:
88 ; ZVFH-NEXT: vmv1r.v v10, v0
89 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
90 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
91 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
92 ; ZVFH-NEXT: vmv1r.v v0, v10
93 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
94 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
95 ; ZVFH-NEXT: vmv1r.v v0, v10
96 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
99 ; ZVFHMIN-LABEL: vfmax_vv_v4f16:
101 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
102 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
103 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
104 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
105 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
106 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
107 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
108 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
109 ; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
110 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
111 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
112 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
113 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
114 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
115 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
116 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
118 %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
122 define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 zeroext %evl) {
123 ; ZVFH-LABEL: vfmax_vv_v4f16_unmasked:
125 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
126 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
127 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
128 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
129 ; ZVFH-NEXT: vmv1r.v v0, v10
130 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
131 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
134 ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked:
136 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
137 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
138 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
139 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
140 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
141 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
142 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
143 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
144 ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
145 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
146 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
147 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
148 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
149 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
151 %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl)
155 declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32)
157 define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
158 ; ZVFH-LABEL: vfmax_vv_v8f16:
160 ; ZVFH-NEXT: vmv1r.v v10, v0
161 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
162 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
163 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
164 ; ZVFH-NEXT: vmv1r.v v0, v10
165 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
166 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
167 ; ZVFH-NEXT: vmv1r.v v0, v10
168 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
171 ; ZVFHMIN-LABEL: vfmax_vv_v8f16:
173 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
174 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
175 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
176 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
177 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
178 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
179 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
180 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
181 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
182 ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0
183 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
184 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
185 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
186 ; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0
187 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
188 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t
189 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
190 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
192 %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
196 define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 zeroext %evl) {
197 ; ZVFH-LABEL: vfmax_vv_v8f16_unmasked:
199 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
200 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
201 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
202 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
203 ; ZVFH-NEXT: vmv.v.v v0, v10
204 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
205 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
208 ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked:
210 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
211 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
212 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
213 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
214 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
215 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
216 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
217 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
218 ; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0
219 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
220 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0
221 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
222 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
223 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
225 %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl)
229 declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, i32)
231 define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
232 ; ZVFH-LABEL: vfmax_vv_v16f16:
234 ; ZVFH-NEXT: vmv1r.v v12, v0
235 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
236 ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t
237 ; ZVFH-NEXT: vmv1r.v v0, v13
238 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
239 ; ZVFH-NEXT: vmv1r.v v0, v12
240 ; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t
241 ; ZVFH-NEXT: vmv1r.v v0, v13
242 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
243 ; ZVFH-NEXT: vmv1r.v v0, v12
244 ; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t
247 ; ZVFHMIN-LABEL: vfmax_vv_v16f16:
249 ; ZVFHMIN-NEXT: vmv1r.v v12, v0
250 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
251 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
252 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
253 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
254 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
255 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
256 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
257 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
258 ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0
259 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
260 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t
261 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
262 ; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0
263 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
264 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t
265 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
266 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
268 %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
272 define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i32 zeroext %evl) {
273 ; ZVFH-LABEL: vfmax_vv_v16f16_unmasked:
275 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
276 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
277 ; ZVFH-NEXT: vmfeq.vv v12, v10, v10
278 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
279 ; ZVFH-NEXT: vmv1r.v v0, v12
280 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
281 ; ZVFH-NEXT: vfmax.vv v8, v8, v14
284 ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked:
286 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
287 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
288 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
289 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
290 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
291 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
292 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
293 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16
294 ; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0
295 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
296 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0
297 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
298 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
299 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
301 %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl)
305 declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32)
307 define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
308 ; CHECK-LABEL: vfmax_vv_v2f32:
310 ; CHECK-NEXT: vmv1r.v v10, v0
311 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
312 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
313 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
314 ; CHECK-NEXT: vmv1r.v v0, v10
315 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
316 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
317 ; CHECK-NEXT: vmv1r.v v0, v10
318 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
320 %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl)
324 define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i32 zeroext %evl) {
325 ; CHECK-LABEL: vfmax_vv_v2f32_unmasked:
327 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
328 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
329 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
330 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
331 ; CHECK-NEXT: vmv1r.v v0, v10
332 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
333 ; CHECK-NEXT: vfmax.vv v8, v8, v11
335 %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl)
339 declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
341 define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
342 ; CHECK-LABEL: vfmax_vv_v4f32:
344 ; CHECK-NEXT: vmv1r.v v10, v0
345 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
346 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
347 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
348 ; CHECK-NEXT: vmv1r.v v0, v10
349 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
350 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
351 ; CHECK-NEXT: vmv1r.v v0, v10
352 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
354 %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
358 define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i32 zeroext %evl) {
359 ; CHECK-LABEL: vfmax_vv_v4f32_unmasked:
361 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
362 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
363 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
364 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
365 ; CHECK-NEXT: vmv.v.v v0, v10
366 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
367 ; CHECK-NEXT: vfmax.vv v8, v8, v11
369 %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl)
373 declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
375 define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
376 ; CHECK-LABEL: vfmax_vv_v8f32:
378 ; CHECK-NEXT: vmv1r.v v12, v0
379 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
380 ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t
381 ; CHECK-NEXT: vmv1r.v v0, v13
382 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
383 ; CHECK-NEXT: vmv1r.v v0, v12
384 ; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t
385 ; CHECK-NEXT: vmv1r.v v0, v13
386 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
387 ; CHECK-NEXT: vmv1r.v v0, v12
388 ; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t
390 %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
394 define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i32 zeroext %evl) {
395 ; CHECK-LABEL: vfmax_vv_v8f32_unmasked:
397 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
398 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
399 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
400 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
401 ; CHECK-NEXT: vmv1r.v v0, v12
402 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
403 ; CHECK-NEXT: vfmax.vv v8, v8, v14
405 %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl)
409 declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i1>, i32)
411 define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
412 ; CHECK-LABEL: vfmax_vv_v16f32:
414 ; CHECK-NEXT: vmv1r.v v16, v0
415 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
416 ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t
417 ; CHECK-NEXT: vmv1r.v v0, v17
418 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
419 ; CHECK-NEXT: vmv1r.v v0, v16
420 ; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t
421 ; CHECK-NEXT: vmv1r.v v0, v17
422 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
423 ; CHECK-NEXT: vmv1r.v v0, v16
424 ; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t
426 %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl)
430 define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb, i32 zeroext %evl) {
431 ; CHECK-LABEL: vfmax_vv_v16f32_unmasked:
433 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
434 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
435 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
436 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
437 ; CHECK-NEXT: vmv1r.v v0, v16
438 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
439 ; CHECK-NEXT: vfmax.vv v8, v8, v20
441 %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl)
445 declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1>, i32)
447 define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
448 ; CHECK-LABEL: vfmax_vv_v2f64:
450 ; CHECK-NEXT: vmv1r.v v10, v0
451 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
452 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
453 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
454 ; CHECK-NEXT: vmv1r.v v0, v10
455 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
456 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
457 ; CHECK-NEXT: vmv1r.v v0, v10
458 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
460 %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl)
464 define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, i32 zeroext %evl) {
465 ; CHECK-LABEL: vfmax_vv_v2f64_unmasked:
467 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
468 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
469 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
470 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
471 ; CHECK-NEXT: vmv.v.v v0, v10
472 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
473 ; CHECK-NEXT: vfmax.vv v8, v8, v11
475 %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl)
479 declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1>, i32)
481 define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
482 ; CHECK-LABEL: vfmax_vv_v4f64:
484 ; CHECK-NEXT: vmv1r.v v12, v0
485 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
486 ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t
487 ; CHECK-NEXT: vmv1r.v v0, v13
488 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
489 ; CHECK-NEXT: vmv1r.v v0, v12
490 ; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t
491 ; CHECK-NEXT: vmv1r.v v0, v13
492 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
493 ; CHECK-NEXT: vmv1r.v v0, v12
494 ; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t
496 %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl)
500 define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, i32 zeroext %evl) {
501 ; CHECK-LABEL: vfmax_vv_v4f64_unmasked:
503 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
504 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
505 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
506 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
507 ; CHECK-NEXT: vmv1r.v v0, v12
508 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
509 ; CHECK-NEXT: vfmax.vv v8, v8, v14
511 %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl)
515 declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1>, i32)
517 define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
518 ; CHECK-LABEL: vfmax_vv_v8f64:
520 ; CHECK-NEXT: vmv1r.v v16, v0
521 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
522 ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t
523 ; CHECK-NEXT: vmv1r.v v0, v17
524 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
525 ; CHECK-NEXT: vmv1r.v v0, v16
526 ; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t
527 ; CHECK-NEXT: vmv1r.v v0, v17
528 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
529 ; CHECK-NEXT: vmv1r.v v0, v16
530 ; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t
532 %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl)
536 define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, i32 zeroext %evl) {
537 ; CHECK-LABEL: vfmax_vv_v8f64_unmasked:
539 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
540 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
541 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
542 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
543 ; CHECK-NEXT: vmv1r.v v0, v16
544 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
545 ; CHECK-NEXT: vfmax.vv v8, v8, v20
547 %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl)
551 declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16 x i1>, i32)
553 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
554 ; CHECK-LABEL: vfmax_vv_v16f64:
556 ; CHECK-NEXT: addi sp, sp, -16
557 ; CHECK-NEXT: .cfi_def_cfa_offset 16
558 ; CHECK-NEXT: csrr a1, vlenb
559 ; CHECK-NEXT: slli a1, a1, 3
560 ; CHECK-NEXT: sub sp, sp, a1
561 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
562 ; CHECK-NEXT: vmv1r.v v7, v0
563 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
564 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
565 ; CHECK-NEXT: vmv1r.v v0, v25
566 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
567 ; CHECK-NEXT: addi a0, sp, 16
568 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
569 ; CHECK-NEXT: vmv1r.v v0, v7
570 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
571 ; CHECK-NEXT: vmv1r.v v0, v25
572 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
573 ; CHECK-NEXT: vmv1r.v v0, v7
574 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
575 ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
576 ; CHECK-NEXT: csrr a0, vlenb
577 ; CHECK-NEXT: slli a0, a0, 3
578 ; CHECK-NEXT: add sp, sp, a0
579 ; CHECK-NEXT: addi sp, sp, 16
581 %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
585 define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) {
586 ; CHECK-LABEL: vfmax_vv_v16f64_unmasked:
588 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
589 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
590 ; CHECK-NEXT: vmfeq.vv v7, v16, v16
591 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
592 ; CHECK-NEXT: vmv1r.v v0, v7
593 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
594 ; CHECK-NEXT: vfmax.vv v8, v8, v24
596 %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl)
600 declare <32 x double> @llvm.vp.maximum.v32f64(<32 x double>, <32 x double>, <32 x i1>, i32)
602 define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
603 ; CHECK-LABEL: vfmax_vv_v32f64:
605 ; CHECK-NEXT: addi sp, sp, -16
606 ; CHECK-NEXT: .cfi_def_cfa_offset 16
607 ; CHECK-NEXT: csrr a1, vlenb
608 ; CHECK-NEXT: slli a1, a1, 5
609 ; CHECK-NEXT: sub sp, sp, a1
610 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
611 ; CHECK-NEXT: addi a1, a0, 128
612 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
613 ; CHECK-NEXT: vle64.v v24, (a1)
614 ; CHECK-NEXT: csrr a1, vlenb
615 ; CHECK-NEXT: li a3, 24
616 ; CHECK-NEXT: mul a1, a1, a3
617 ; CHECK-NEXT: add a1, sp, a1
618 ; CHECK-NEXT: addi a1, a1, 16
619 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
620 ; CHECK-NEXT: vle64.v v24, (a0)
621 ; CHECK-NEXT: csrr a0, vlenb
622 ; CHECK-NEXT: slli a0, a0, 4
623 ; CHECK-NEXT: add a0, sp, a0
624 ; CHECK-NEXT: addi a0, a0, 16
625 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
626 ; CHECK-NEXT: vmv1r.v v6, v0
627 ; CHECK-NEXT: csrr a0, vlenb
628 ; CHECK-NEXT: slli a0, a0, 3
629 ; CHECK-NEXT: add a0, sp, a0
630 ; CHECK-NEXT: addi a0, a0, 16
631 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
632 ; CHECK-NEXT: li a1, 16
633 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
634 ; CHECK-NEXT: vslidedown.vi v7, v0, 2
635 ; CHECK-NEXT: mv a0, a2
636 ; CHECK-NEXT: bltu a2, a1, .LBB24_2
637 ; CHECK-NEXT: # %bb.1:
638 ; CHECK-NEXT: li a0, 16
639 ; CHECK-NEXT: .LBB24_2:
640 ; CHECK-NEXT: vmv1r.v v0, v6
641 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
642 ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
643 ; CHECK-NEXT: vmv1r.v v0, v26
644 ; CHECK-NEXT: csrr a0, vlenb
645 ; CHECK-NEXT: slli a0, a0, 4
646 ; CHECK-NEXT: add a0, sp, a0
647 ; CHECK-NEXT: addi a0, a0, 16
648 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
649 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
650 ; CHECK-NEXT: addi a0, sp, 16
651 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
652 ; CHECK-NEXT: vmv1r.v v0, v6
653 ; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
654 ; CHECK-NEXT: vmv1r.v v0, v26
655 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
656 ; CHECK-NEXT: vmv1r.v v0, v6
657 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
658 ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
659 ; CHECK-NEXT: csrr a0, vlenb
660 ; CHECK-NEXT: slli a0, a0, 4
661 ; CHECK-NEXT: add a0, sp, a0
662 ; CHECK-NEXT: addi a0, a0, 16
663 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
664 ; CHECK-NEXT: addi a0, a2, -16
665 ; CHECK-NEXT: sltu a1, a2, a0
666 ; CHECK-NEXT: addi a1, a1, -1
667 ; CHECK-NEXT: and a0, a1, a0
668 ; CHECK-NEXT: vmv1r.v v0, v7
669 ; CHECK-NEXT: csrr a1, vlenb
670 ; CHECK-NEXT: slli a1, a1, 3
671 ; CHECK-NEXT: add a1, sp, a1
672 ; CHECK-NEXT: addi a1, a1, 16
673 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
674 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
675 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
676 ; CHECK-NEXT: vmv1r.v v0, v25
677 ; CHECK-NEXT: csrr a0, vlenb
678 ; CHECK-NEXT: li a1, 24
679 ; CHECK-NEXT: mul a0, a0, a1
680 ; CHECK-NEXT: add a0, sp, a0
681 ; CHECK-NEXT: addi a0, a0, 16
682 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
683 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
684 ; CHECK-NEXT: addi a0, sp, 16
685 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
686 ; CHECK-NEXT: vmv1r.v v0, v7
687 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
688 ; CHECK-NEXT: vmv1r.v v0, v25
689 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
690 ; CHECK-NEXT: vmv1r.v v0, v7
691 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
692 ; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t
693 ; CHECK-NEXT: csrr a0, vlenb
694 ; CHECK-NEXT: slli a0, a0, 4
695 ; CHECK-NEXT: add a0, sp, a0
696 ; CHECK-NEXT: addi a0, a0, 16
697 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
698 ; CHECK-NEXT: csrr a0, vlenb
699 ; CHECK-NEXT: slli a0, a0, 5
700 ; CHECK-NEXT: add sp, sp, a0
701 ; CHECK-NEXT: addi sp, sp, 16
703 %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl)
707 define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) {
708 ; CHECK-LABEL: vfmax_vv_v32f64_unmasked:
710 ; CHECK-NEXT: addi sp, sp, -16
711 ; CHECK-NEXT: .cfi_def_cfa_offset 16
712 ; CHECK-NEXT: csrr a1, vlenb
713 ; CHECK-NEXT: li a3, 24
714 ; CHECK-NEXT: mul a1, a1, a3
715 ; CHECK-NEXT: sub sp, sp, a1
716 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
717 ; CHECK-NEXT: addi a1, a0, 128
718 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
719 ; CHECK-NEXT: vle64.v v24, (a1)
720 ; CHECK-NEXT: csrr a1, vlenb
721 ; CHECK-NEXT: slli a1, a1, 4
722 ; CHECK-NEXT: add a1, sp, a1
723 ; CHECK-NEXT: addi a1, a1, 16
724 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
725 ; CHECK-NEXT: vle64.v v24, (a0)
726 ; CHECK-NEXT: li a1, 16
727 ; CHECK-NEXT: addi a0, sp, 16
728 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
729 ; CHECK-NEXT: mv a0, a2
730 ; CHECK-NEXT: bltu a2, a1, .LBB25_2
731 ; CHECK-NEXT: # %bb.1:
732 ; CHECK-NEXT: li a0, 16
733 ; CHECK-NEXT: .LBB25_2:
734 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
735 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
736 ; CHECK-NEXT: vmfeq.vv v7, v24, v24
737 ; CHECK-NEXT: vmv8r.v v16, v24
738 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
739 ; CHECK-NEXT: csrr a0, vlenb
740 ; CHECK-NEXT: slli a0, a0, 3
741 ; CHECK-NEXT: add a0, sp, a0
742 ; CHECK-NEXT: addi a0, a0, 16
743 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
744 ; CHECK-NEXT: vmv1r.v v0, v7
745 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
746 ; CHECK-NEXT: csrr a0, vlenb
747 ; CHECK-NEXT: slli a0, a0, 3
748 ; CHECK-NEXT: add a0, sp, a0
749 ; CHECK-NEXT: addi a0, a0, 16
750 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
751 ; CHECK-NEXT: vfmax.vv v8, v8, v24
752 ; CHECK-NEXT: csrr a0, vlenb
753 ; CHECK-NEXT: slli a0, a0, 3
754 ; CHECK-NEXT: add a0, sp, a0
755 ; CHECK-NEXT: addi a0, a0, 16
756 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
757 ; CHECK-NEXT: addi a0, a2, -16
758 ; CHECK-NEXT: sltu a1, a2, a0
759 ; CHECK-NEXT: addi a1, a1, -1
760 ; CHECK-NEXT: and a0, a1, a0
761 ; CHECK-NEXT: addi a1, sp, 16
762 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
763 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
764 ; CHECK-NEXT: vmfeq.vv v0, v16, v16
765 ; CHECK-NEXT: csrr a0, vlenb
766 ; CHECK-NEXT: slli a0, a0, 4
767 ; CHECK-NEXT: add a0, sp, a0
768 ; CHECK-NEXT: addi a0, a0, 16
769 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
770 ; CHECK-NEXT: vmfeq.vv v7, v8, v8
771 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
772 ; CHECK-NEXT: vmv1r.v v0, v7
773 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
774 ; CHECK-NEXT: vfmax.vv v16, v16, v24
775 ; CHECK-NEXT: csrr a0, vlenb
776 ; CHECK-NEXT: slli a0, a0, 3
777 ; CHECK-NEXT: add a0, sp, a0
778 ; CHECK-NEXT: addi a0, a0, 16
779 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
780 ; CHECK-NEXT: csrr a0, vlenb
781 ; CHECK-NEXT: li a1, 24
782 ; CHECK-NEXT: mul a0, a0, a1
783 ; CHECK-NEXT: add sp, sp, a0
784 ; CHECK-NEXT: addi sp, sp, 16
786 %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl)