1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
13 define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfmax_vv_v2f16:
16 ; ZVFH-NEXT: vmv1r.v v10, v0
17 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
18 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
19 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
20 ; ZVFH-NEXT: vmv1r.v v0, v10
21 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
22 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
23 ; ZVFH-NEXT: vmv1r.v v0, v10
24 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
27 ; ZVFHMIN-LABEL: vfmax_vv_v2f16:
29 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
30 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
31 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
32 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
33 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
34 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
35 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
36 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
37 ; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
38 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
39 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
40 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
41 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
42 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
43 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
44 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
46 %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
50 define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 zeroext %evl) {
51 ; ZVFH-LABEL: vfmax_vv_v2f16_unmasked:
53 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
54 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
55 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
56 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
57 ; ZVFH-NEXT: vmv1r.v v0, v10
58 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
59 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
62 ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked:
64 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
65 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
66 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
67 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
68 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
69 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
70 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
71 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
72 ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
73 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
74 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
75 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
76 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
77 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
79 %head = insertelement <2 x i1> poison, i1 true, i32 0
80 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
81 %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
85 declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32)
87 define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
88 ; ZVFH-LABEL: vfmax_vv_v4f16:
90 ; ZVFH-NEXT: vmv1r.v v10, v0
91 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
92 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
93 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
94 ; ZVFH-NEXT: vmv1r.v v0, v10
95 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
96 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
97 ; ZVFH-NEXT: vmv1r.v v0, v10
98 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
101 ; ZVFHMIN-LABEL: vfmax_vv_v4f16:
103 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
104 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
105 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
106 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
107 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t
108 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
109 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
110 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
111 ; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0
112 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
113 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t
114 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0
115 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
116 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t
117 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
118 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
120 %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
124 define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 zeroext %evl) {
125 ; ZVFH-LABEL: vfmax_vv_v4f16_unmasked:
127 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
128 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
129 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
130 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
131 ; ZVFH-NEXT: vmv1r.v v0, v10
132 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
133 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
136 ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked:
138 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
139 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
140 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
141 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
142 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
143 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
144 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
145 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
146 ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0
147 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
148 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0
149 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
150 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
151 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
153 %head = insertelement <4 x i1> poison, i1 true, i32 0
154 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
155 %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
159 declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32)
161 define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
162 ; ZVFH-LABEL: vfmax_vv_v8f16:
164 ; ZVFH-NEXT: vmv1r.v v10, v0
165 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
166 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t
167 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
168 ; ZVFH-NEXT: vmv1r.v v0, v10
169 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t
170 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
171 ; ZVFH-NEXT: vmv1r.v v0, v10
172 ; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t
175 ; ZVFHMIN-LABEL: vfmax_vv_v8f16:
177 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
178 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
179 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
180 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
181 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
182 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
183 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
184 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
185 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
186 ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0
187 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
188 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
189 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
190 ; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0
191 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
192 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t
193 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
194 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
196 %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
200 define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 zeroext %evl) {
201 ; ZVFH-LABEL: vfmax_vv_v8f16_unmasked:
203 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
204 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
205 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
206 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
207 ; ZVFH-NEXT: vmv.v.v v0, v10
208 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
209 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
212 ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked:
214 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
215 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
216 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
217 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
218 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
219 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
220 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
221 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
222 ; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0
223 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
224 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0
225 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
226 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
227 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
229 %head = insertelement <8 x i1> poison, i1 true, i32 0
230 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
231 %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
235 declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, i32)
237 define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
238 ; ZVFH-LABEL: vfmax_vv_v16f16:
240 ; ZVFH-NEXT: vmv1r.v v12, v0
241 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
242 ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t
243 ; ZVFH-NEXT: vmv1r.v v0, v13
244 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
245 ; ZVFH-NEXT: vmv1r.v v0, v12
246 ; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t
247 ; ZVFH-NEXT: vmv1r.v v0, v13
248 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
249 ; ZVFH-NEXT: vmv1r.v v0, v12
250 ; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t
253 ; ZVFHMIN-LABEL: vfmax_vv_v16f16:
255 ; ZVFHMIN-NEXT: vmv1r.v v12, v0
256 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
257 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
258 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
259 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
260 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
261 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
262 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
263 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
264 ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0
265 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
266 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t
267 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
268 ; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0
269 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
270 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t
271 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
272 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
274 %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
278 define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i32 zeroext %evl) {
279 ; ZVFH-LABEL: vfmax_vv_v16f16_unmasked:
281 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
282 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
283 ; ZVFH-NEXT: vmfeq.vv v12, v10, v10
284 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
285 ; ZVFH-NEXT: vmv1r.v v0, v12
286 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
287 ; ZVFH-NEXT: vfmax.vv v8, v8, v14
290 ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked:
292 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
293 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
294 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
295 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
296 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
297 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
298 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
299 ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16
300 ; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0
301 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
302 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0
303 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
304 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
305 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
307 %head = insertelement <16 x i1> poison, i1 true, i32 0
308 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
309 %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
313 declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32)
315 define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
316 ; CHECK-LABEL: vfmax_vv_v2f32:
318 ; CHECK-NEXT: vmv1r.v v10, v0
319 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
320 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
321 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
322 ; CHECK-NEXT: vmv1r.v v0, v10
323 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
324 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
325 ; CHECK-NEXT: vmv1r.v v0, v10
326 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
328 %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl)
332 define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i32 zeroext %evl) {
333 ; CHECK-LABEL: vfmax_vv_v2f32_unmasked:
335 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
336 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
337 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
338 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
339 ; CHECK-NEXT: vmv1r.v v0, v10
340 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
341 ; CHECK-NEXT: vfmax.vv v8, v8, v11
343 %head = insertelement <2 x i1> poison, i1 true, i32 0
344 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
345 %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl)
349 declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
351 define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
352 ; CHECK-LABEL: vfmax_vv_v4f32:
354 ; CHECK-NEXT: vmv1r.v v10, v0
355 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
356 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
357 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
358 ; CHECK-NEXT: vmv1r.v v0, v10
359 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
360 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
361 ; CHECK-NEXT: vmv1r.v v0, v10
362 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
364 %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
368 define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i32 zeroext %evl) {
369 ; CHECK-LABEL: vfmax_vv_v4f32_unmasked:
371 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
372 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
373 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
374 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
375 ; CHECK-NEXT: vmv.v.v v0, v10
376 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
377 ; CHECK-NEXT: vfmax.vv v8, v8, v11
379 %head = insertelement <4 x i1> poison, i1 true, i32 0
380 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
381 %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
385 declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
387 define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
388 ; CHECK-LABEL: vfmax_vv_v8f32:
390 ; CHECK-NEXT: vmv1r.v v12, v0
391 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
392 ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t
393 ; CHECK-NEXT: vmv1r.v v0, v13
394 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
395 ; CHECK-NEXT: vmv1r.v v0, v12
396 ; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t
397 ; CHECK-NEXT: vmv1r.v v0, v13
398 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
399 ; CHECK-NEXT: vmv1r.v v0, v12
400 ; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t
402 %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
406 define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i32 zeroext %evl) {
407 ; CHECK-LABEL: vfmax_vv_v8f32_unmasked:
409 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
410 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
411 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
412 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
413 ; CHECK-NEXT: vmv1r.v v0, v12
414 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
415 ; CHECK-NEXT: vfmax.vv v8, v8, v14
417 %head = insertelement <8 x i1> poison, i1 true, i32 0
418 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
419 %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
423 declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i1>, i32)
425 define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
426 ; CHECK-LABEL: vfmax_vv_v16f32:
428 ; CHECK-NEXT: vmv1r.v v16, v0
429 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
430 ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t
431 ; CHECK-NEXT: vmv1r.v v0, v17
432 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
433 ; CHECK-NEXT: vmv1r.v v0, v16
434 ; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t
435 ; CHECK-NEXT: vmv1r.v v0, v17
436 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
437 ; CHECK-NEXT: vmv1r.v v0, v16
438 ; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t
440 %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl)
444 define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb, i32 zeroext %evl) {
445 ; CHECK-LABEL: vfmax_vv_v16f32_unmasked:
447 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
448 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
449 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
450 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
451 ; CHECK-NEXT: vmv1r.v v0, v16
452 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
453 ; CHECK-NEXT: vfmax.vv v8, v8, v20
455 %head = insertelement <16 x i1> poison, i1 true, i32 0
456 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
457 %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl)
461 declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1>, i32)
463 define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
464 ; CHECK-LABEL: vfmax_vv_v2f64:
466 ; CHECK-NEXT: vmv1r.v v10, v0
467 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
468 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
469 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
470 ; CHECK-NEXT: vmv1r.v v0, v10
471 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
472 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
473 ; CHECK-NEXT: vmv1r.v v0, v10
474 ; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t
476 %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl)
480 define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, i32 zeroext %evl) {
481 ; CHECK-LABEL: vfmax_vv_v2f64_unmasked:
483 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
484 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
485 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
486 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
487 ; CHECK-NEXT: vmv.v.v v0, v10
488 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
489 ; CHECK-NEXT: vfmax.vv v8, v8, v11
491 %head = insertelement <2 x i1> poison, i1 true, i32 0
492 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
493 %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl)
497 declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1>, i32)
499 define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
500 ; CHECK-LABEL: vfmax_vv_v4f64:
502 ; CHECK-NEXT: vmv1r.v v12, v0
503 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
504 ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t
505 ; CHECK-NEXT: vmv1r.v v0, v13
506 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
507 ; CHECK-NEXT: vmv1r.v v0, v12
508 ; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t
509 ; CHECK-NEXT: vmv1r.v v0, v13
510 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
511 ; CHECK-NEXT: vmv1r.v v0, v12
512 ; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t
514 %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl)
518 define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, i32 zeroext %evl) {
519 ; CHECK-LABEL: vfmax_vv_v4f64_unmasked:
521 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
522 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
523 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
524 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
525 ; CHECK-NEXT: vmv1r.v v0, v12
526 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
527 ; CHECK-NEXT: vfmax.vv v8, v8, v14
529 %head = insertelement <4 x i1> poison, i1 true, i32 0
530 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
531 %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl)
535 declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1>, i32)
537 define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
538 ; CHECK-LABEL: vfmax_vv_v8f64:
540 ; CHECK-NEXT: vmv1r.v v16, v0
541 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
542 ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t
543 ; CHECK-NEXT: vmv1r.v v0, v17
544 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
545 ; CHECK-NEXT: vmv1r.v v0, v16
546 ; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t
547 ; CHECK-NEXT: vmv1r.v v0, v17
548 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
549 ; CHECK-NEXT: vmv1r.v v0, v16
550 ; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t
552 %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl)
556 define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, i32 zeroext %evl) {
557 ; CHECK-LABEL: vfmax_vv_v8f64_unmasked:
559 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
560 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
561 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
562 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
563 ; CHECK-NEXT: vmv1r.v v0, v16
564 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
565 ; CHECK-NEXT: vfmax.vv v8, v8, v20
567 %head = insertelement <8 x i1> poison, i1 true, i32 0
568 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
569 %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl)
573 declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16 x i1>, i32)
575 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
576 ; CHECK-LABEL: vfmax_vv_v16f64:
578 ; CHECK-NEXT: addi sp, sp, -16
579 ; CHECK-NEXT: .cfi_def_cfa_offset 16
580 ; CHECK-NEXT: csrr a1, vlenb
581 ; CHECK-NEXT: slli a1, a1, 3
582 ; CHECK-NEXT: sub sp, sp, a1
583 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
584 ; CHECK-NEXT: vmv1r.v v1, v0
585 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
586 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
587 ; CHECK-NEXT: vmv1r.v v0, v25
588 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
589 ; CHECK-NEXT: addi a0, sp, 16
590 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
591 ; CHECK-NEXT: vmv1r.v v0, v1
592 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
593 ; CHECK-NEXT: vmv1r.v v0, v25
594 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
595 ; CHECK-NEXT: vmv1r.v v0, v1
596 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
597 ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
598 ; CHECK-NEXT: csrr a0, vlenb
599 ; CHECK-NEXT: slli a0, a0, 3
600 ; CHECK-NEXT: add sp, sp, a0
601 ; CHECK-NEXT: addi sp, sp, 16
603 %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
607 define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) {
608 ; CHECK-LABEL: vfmax_vv_v16f64_unmasked:
610 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
611 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
612 ; CHECK-NEXT: vmfeq.vv v1, v16, v16
613 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
614 ; CHECK-NEXT: vmv1r.v v0, v1
615 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
616 ; CHECK-NEXT: vfmax.vv v8, v8, v24
618 %head = insertelement <16 x i1> poison, i1 true, i32 0
619 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
620 %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
624 declare <32 x double> @llvm.vp.maximum.v32f64(<32 x double>, <32 x double>, <32 x i1>, i32)
626 define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
627 ; CHECK-LABEL: vfmax_vv_v32f64:
629 ; CHECK-NEXT: addi sp, sp, -16
630 ; CHECK-NEXT: .cfi_def_cfa_offset 16
631 ; CHECK-NEXT: csrr a1, vlenb
632 ; CHECK-NEXT: slli a1, a1, 5
633 ; CHECK-NEXT: sub sp, sp, a1
634 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
635 ; CHECK-NEXT: vmv1r.v v2, v0
636 ; CHECK-NEXT: addi a1, a0, 128
637 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
638 ; CHECK-NEXT: vle64.v v24, (a1)
639 ; CHECK-NEXT: csrr a1, vlenb
640 ; CHECK-NEXT: li a3, 24
641 ; CHECK-NEXT: mul a1, a1, a3
642 ; CHECK-NEXT: add a1, sp, a1
643 ; CHECK-NEXT: addi a1, a1, 16
644 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
645 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
646 ; CHECK-NEXT: vslidedown.vi v1, v0, 2
647 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
648 ; CHECK-NEXT: vle64.v v24, (a0)
649 ; CHECK-NEXT: csrr a0, vlenb
650 ; CHECK-NEXT: slli a0, a0, 4
651 ; CHECK-NEXT: add a0, sp, a0
652 ; CHECK-NEXT: addi a0, a0, 16
653 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
654 ; CHECK-NEXT: li a1, 16
655 ; CHECK-NEXT: csrr a0, vlenb
656 ; CHECK-NEXT: slli a0, a0, 3
657 ; CHECK-NEXT: add a0, sp, a0
658 ; CHECK-NEXT: addi a0, a0, 16
659 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
660 ; CHECK-NEXT: mv a0, a2
661 ; CHECK-NEXT: bltu a2, a1, .LBB24_2
662 ; CHECK-NEXT: # %bb.1:
663 ; CHECK-NEXT: li a0, 16
664 ; CHECK-NEXT: .LBB24_2:
665 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
666 ; CHECK-NEXT: vmv1r.v v0, v2
667 ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
668 ; CHECK-NEXT: vmv1r.v v0, v26
669 ; CHECK-NEXT: csrr a0, vlenb
670 ; CHECK-NEXT: slli a0, a0, 4
671 ; CHECK-NEXT: add a0, sp, a0
672 ; CHECK-NEXT: addi a0, a0, 16
673 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
674 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
675 ; CHECK-NEXT: addi a0, sp, 16
676 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
677 ; CHECK-NEXT: vmv1r.v v0, v2
678 ; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
679 ; CHECK-NEXT: vmv1r.v v0, v26
680 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
681 ; CHECK-NEXT: vmv1r.v v0, v2
682 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
683 ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
684 ; CHECK-NEXT: csrr a0, vlenb
685 ; CHECK-NEXT: slli a0, a0, 4
686 ; CHECK-NEXT: add a0, sp, a0
687 ; CHECK-NEXT: addi a0, a0, 16
688 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
689 ; CHECK-NEXT: addi a0, a2, -16
690 ; CHECK-NEXT: sltu a1, a2, a0
691 ; CHECK-NEXT: addi a1, a1, -1
692 ; CHECK-NEXT: and a0, a1, a0
693 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
694 ; CHECK-NEXT: vmv1r.v v0, v1
695 ; CHECK-NEXT: csrr a0, vlenb
696 ; CHECK-NEXT: slli a0, a0, 3
697 ; CHECK-NEXT: add a0, sp, a0
698 ; CHECK-NEXT: addi a0, a0, 16
699 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
700 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
701 ; CHECK-NEXT: vmv1r.v v0, v25
702 ; CHECK-NEXT: csrr a0, vlenb
703 ; CHECK-NEXT: li a1, 24
704 ; CHECK-NEXT: mul a0, a0, a1
705 ; CHECK-NEXT: add a0, sp, a0
706 ; CHECK-NEXT: addi a0, a0, 16
707 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
708 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
709 ; CHECK-NEXT: addi a0, sp, 16
710 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
711 ; CHECK-NEXT: vmv1r.v v0, v1
712 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
713 ; CHECK-NEXT: vmv1r.v v0, v25
714 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
715 ; CHECK-NEXT: vmv1r.v v0, v1
716 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
717 ; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t
718 ; CHECK-NEXT: csrr a0, vlenb
719 ; CHECK-NEXT: slli a0, a0, 4
720 ; CHECK-NEXT: add a0, sp, a0
721 ; CHECK-NEXT: addi a0, a0, 16
722 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
723 ; CHECK-NEXT: csrr a0, vlenb
724 ; CHECK-NEXT: slli a0, a0, 5
725 ; CHECK-NEXT: add sp, sp, a0
726 ; CHECK-NEXT: addi sp, sp, 16
728 %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl)
732 define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) {
733 ; CHECK-LABEL: vfmax_vv_v32f64_unmasked:
735 ; CHECK-NEXT: addi sp, sp, -16
736 ; CHECK-NEXT: .cfi_def_cfa_offset 16
737 ; CHECK-NEXT: csrr a1, vlenb
738 ; CHECK-NEXT: li a3, 24
739 ; CHECK-NEXT: mul a1, a1, a3
740 ; CHECK-NEXT: sub sp, sp, a1
741 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
742 ; CHECK-NEXT: addi a1, a0, 128
743 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
744 ; CHECK-NEXT: vle64.v v24, (a1)
745 ; CHECK-NEXT: csrr a1, vlenb
746 ; CHECK-NEXT: slli a1, a1, 4
747 ; CHECK-NEXT: add a1, sp, a1
748 ; CHECK-NEXT: addi a1, a1, 16
749 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
750 ; CHECK-NEXT: vle64.v v24, (a0)
751 ; CHECK-NEXT: li a1, 16
752 ; CHECK-NEXT: addi a0, sp, 16
753 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
754 ; CHECK-NEXT: mv a0, a2
755 ; CHECK-NEXT: bltu a2, a1, .LBB25_2
756 ; CHECK-NEXT: # %bb.1:
757 ; CHECK-NEXT: li a0, 16
758 ; CHECK-NEXT: .LBB25_2:
759 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
760 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
761 ; CHECK-NEXT: vmfeq.vv v1, v24, v24
762 ; CHECK-NEXT: vmv8r.v v16, v24
763 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
764 ; CHECK-NEXT: csrr a0, vlenb
765 ; CHECK-NEXT: slli a0, a0, 3
766 ; CHECK-NEXT: add a0, sp, a0
767 ; CHECK-NEXT: addi a0, a0, 16
768 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
769 ; CHECK-NEXT: vmv1r.v v0, v1
770 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
771 ; CHECK-NEXT: csrr a0, vlenb
772 ; CHECK-NEXT: slli a0, a0, 3
773 ; CHECK-NEXT: add a0, sp, a0
774 ; CHECK-NEXT: addi a0, a0, 16
775 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
776 ; CHECK-NEXT: vfmax.vv v8, v8, v24
777 ; CHECK-NEXT: csrr a0, vlenb
778 ; CHECK-NEXT: slli a0, a0, 3
779 ; CHECK-NEXT: add a0, sp, a0
780 ; CHECK-NEXT: addi a0, a0, 16
781 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
782 ; CHECK-NEXT: addi a0, a2, -16
783 ; CHECK-NEXT: sltu a1, a2, a0
784 ; CHECK-NEXT: addi a1, a1, -1
785 ; CHECK-NEXT: and a0, a1, a0
786 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
787 ; CHECK-NEXT: addi a0, sp, 16
788 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
789 ; CHECK-NEXT: vmfeq.vv v0, v16, v16
790 ; CHECK-NEXT: csrr a0, vlenb
791 ; CHECK-NEXT: slli a0, a0, 4
792 ; CHECK-NEXT: add a0, sp, a0
793 ; CHECK-NEXT: addi a0, a0, 16
794 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
795 ; CHECK-NEXT: vmfeq.vv v1, v8, v8
796 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
797 ; CHECK-NEXT: vmv1r.v v0, v1
798 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
799 ; CHECK-NEXT: vfmax.vv v16, v16, v24
800 ; CHECK-NEXT: csrr a0, vlenb
801 ; CHECK-NEXT: slli a0, a0, 3
802 ; CHECK-NEXT: add a0, sp, a0
803 ; CHECK-NEXT: addi a0, a0, 16
804 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
805 ; CHECK-NEXT: csrr a0, vlenb
806 ; CHECK-NEXT: li a1, 24
807 ; CHECK-NEXT: mul a0, a0, a1
808 ; CHECK-NEXT: add sp, sp, a0
809 ; CHECK-NEXT: addi sp, sp, 16
811 %head = insertelement <32 x i1> poison, i1 true, i32 0
812 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
813 %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl)