1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
5 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
10 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
11 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
12 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
13 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
14 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
15 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
16 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
17 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
19 declare <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
21 define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
22 ; ZVFH-LABEL: vfmax_nxv1f16_vv:
24 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
25 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
26 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
27 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
28 ; ZVFH-NEXT: vmv1r.v v0, v10
29 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
30 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
33 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv:
35 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
36 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
37 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
38 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
39 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
40 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
41 ; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
42 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
43 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
44 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
45 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
46 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
48 %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
49 ret <vscale x 1 x half> %v
52 declare <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
54 define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
55 ; ZVFH-LABEL: vfmax_nxv2f16_vv:
57 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
58 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
59 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
60 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
61 ; ZVFH-NEXT: vmv1r.v v0, v10
62 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
63 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
66 ; ZVFHMIN-LABEL: vfmax_nxv2f16_vv:
68 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
69 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
70 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
71 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
72 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
73 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
74 ; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
75 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
76 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
77 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
78 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
79 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
81 %v = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
82 ret <vscale x 2 x half> %v
85 declare <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
87 define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
88 ; ZVFH-LABEL: vfmax_nxv4f16_vv:
90 ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
91 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
92 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
93 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
94 ; ZVFH-NEXT: vmv.v.v v0, v10
95 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
96 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
99 ; ZVFHMIN-LABEL: vfmax_nxv4f16_vv:
101 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
102 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
103 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
104 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
105 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
106 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
107 ; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
108 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
109 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
110 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
111 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
112 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
114 %v = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
115 ret <vscale x 4 x half> %v
118 declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
120 define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
121 ; ZVFH-LABEL: vfmax_nxv8f16_vv:
123 ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
124 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
125 ; ZVFH-NEXT: vmfeq.vv v12, v10, v10
126 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
127 ; ZVFH-NEXT: vmv1r.v v0, v12
128 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
129 ; ZVFH-NEXT: vfmax.vv v8, v8, v14
132 ; ZVFHMIN-LABEL: vfmax_nxv8f16_vv:
134 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
135 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
136 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
137 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
138 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
139 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
140 ; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
141 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
142 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
143 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
144 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
145 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
147 %v = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
148 ret <vscale x 8 x half> %v
151 declare <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
153 define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
154 ; ZVFH-LABEL: vfmax_nxv16f16_vv:
156 ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
157 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
158 ; ZVFH-NEXT: vmfeq.vv v16, v12, v12
159 ; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0
160 ; ZVFH-NEXT: vmv1r.v v0, v16
161 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0
162 ; ZVFH-NEXT: vfmax.vv v8, v8, v20
165 ; ZVFHMIN-LABEL: vfmax_nxv16f16_vv:
167 ; ZVFHMIN-NEXT: addi sp, sp, -16
168 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
169 ; ZVFHMIN-NEXT: csrr a0, vlenb
170 ; ZVFHMIN-NEXT: slli a0, a0, 3
171 ; ZVFHMIN-NEXT: sub sp, sp, a0
172 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
173 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
174 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
175 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
176 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
177 ; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24
178 ; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16
179 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
180 ; ZVFHMIN-NEXT: addi a0, sp, 16
181 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
182 ; ZVFHMIN-NEXT: vmv1r.v v0, v1
183 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0
184 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
185 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16
186 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
187 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
188 ; ZVFHMIN-NEXT: csrr a0, vlenb
189 ; ZVFHMIN-NEXT: slli a0, a0, 3
190 ; ZVFHMIN-NEXT: add sp, sp, a0
191 ; ZVFHMIN-NEXT: addi sp, sp, 16
193 %v = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
194 ret <vscale x 16 x half> %v
197 declare <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>)
199 define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
200 ; ZVFH-LABEL: vfmax_nxv32f16_vv:
202 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
203 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
204 ; ZVFH-NEXT: vmfeq.vv v1, v16, v16
205 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
206 ; ZVFH-NEXT: vmv1r.v v0, v1
207 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
208 ; ZVFH-NEXT: vfmax.vv v8, v8, v24
211 ; ZVFHMIN-LABEL: vfmax_nxv32f16_vv:
213 ; ZVFHMIN-NEXT: addi sp, sp, -16
214 ; ZVFHMIN-NEXT: csrr a0, vlenb
215 ; ZVFHMIN-NEXT: slli a0, a0, 4
216 ; ZVFHMIN-NEXT: sub sp, sp, a0
217 ; ZVFHMIN-NEXT: csrr a0, vlenb
218 ; ZVFHMIN-NEXT: slli a0, a0, 3
219 ; ZVFHMIN-NEXT: add a0, sp, a0
220 ; ZVFHMIN-NEXT: addi a0, a0, 16
221 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
222 ; ZVFHMIN-NEXT: vmv8r.v v0, v8
223 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
224 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
225 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
226 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
227 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
228 ; ZVFHMIN-NEXT: vmfeq.vv v1, v24, v24
229 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
230 ; ZVFHMIN-NEXT: vmv1r.v v0, v1
231 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
232 ; ZVFHMIN-NEXT: vfmax.vv v24, v8, v16
233 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
234 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
235 ; ZVFHMIN-NEXT: addi a0, sp, 16
236 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
237 ; ZVFHMIN-NEXT: csrr a0, vlenb
238 ; ZVFHMIN-NEXT: slli a0, a0, 3
239 ; ZVFHMIN-NEXT: add a0, sp, a0
240 ; ZVFHMIN-NEXT: addi a0, a0, 16
241 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
242 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
243 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
244 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
245 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
246 ; ZVFHMIN-NEXT: vmfeq.vv v1, v8, v8
247 ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0
248 ; ZVFHMIN-NEXT: csrr a0, vlenb
249 ; ZVFHMIN-NEXT: slli a0, a0, 3
250 ; ZVFHMIN-NEXT: add a0, sp, a0
251 ; ZVFHMIN-NEXT: addi a0, a0, 16
252 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
253 ; ZVFHMIN-NEXT: vmv1r.v v0, v1
254 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v16, v0
255 ; ZVFHMIN-NEXT: csrr a0, vlenb
256 ; ZVFHMIN-NEXT: slli a0, a0, 3
257 ; ZVFHMIN-NEXT: add a0, sp, a0
258 ; ZVFHMIN-NEXT: addi a0, a0, 16
259 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
260 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24
261 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
262 ; ZVFHMIN-NEXT: addi a0, sp, 16
263 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
264 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
265 ; ZVFHMIN-NEXT: csrr a0, vlenb
266 ; ZVFHMIN-NEXT: slli a0, a0, 4
267 ; ZVFHMIN-NEXT: add sp, sp, a0
268 ; ZVFHMIN-NEXT: addi sp, sp, 16
270 %v = call <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
271 ret <vscale x 32 x half> %v
274 declare <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>)
276 define <vscale x 1 x float> @vfmax_nxv1f32_vv(<vscale x 1 x float> %a, <vscale x 1 x float> %b) {
277 ; CHECK-LABEL: vfmax_nxv1f32_vv:
279 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
280 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
281 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
282 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
283 ; CHECK-NEXT: vmv1r.v v0, v10
284 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
285 ; CHECK-NEXT: vfmax.vv v8, v8, v11
287 %v = call <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x float> %b)
288 ret <vscale x 1 x float> %v
291 declare <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
293 define <vscale x 2 x float> @vfmax_nxv2f32_vv(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
294 ; CHECK-LABEL: vfmax_nxv2f32_vv:
296 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
297 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
298 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
299 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
300 ; CHECK-NEXT: vmv.v.v v0, v10
301 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
302 ; CHECK-NEXT: vfmax.vv v8, v8, v11
304 %v = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
305 ret <vscale x 2 x float> %v
308 declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
310 define <vscale x 4 x float> @vfmax_nxv4f32_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
311 ; CHECK-LABEL: vfmax_nxv4f32_vv:
313 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
314 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
315 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
316 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
317 ; CHECK-NEXT: vmv1r.v v0, v12
318 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
319 ; CHECK-NEXT: vfmax.vv v8, v8, v14
321 %v = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
322 ret <vscale x 4 x float> %v
325 declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
327 define <vscale x 8 x float> @vfmax_nxv8f32_vv(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
328 ; CHECK-LABEL: vfmax_nxv8f32_vv:
330 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
331 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
332 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
333 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
334 ; CHECK-NEXT: vmv1r.v v0, v16
335 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
336 ; CHECK-NEXT: vfmax.vv v8, v8, v20
338 %v = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
339 ret <vscale x 8 x float> %v
342 declare <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>)
344 define <vscale x 16 x float> @vfmax_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
345 ; CHECK-LABEL: vfmax_nxv16f32_vv:
347 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
348 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
349 ; CHECK-NEXT: vmfeq.vv v1, v16, v16
350 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
351 ; CHECK-NEXT: vmv1r.v v0, v1
352 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
353 ; CHECK-NEXT: vfmax.vv v8, v8, v24
355 %v = call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
356 ret <vscale x 16 x float> %v
359 declare <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>)
361 define <vscale x 1 x double> @vfmax_nxv1f64_vv(<vscale x 1 x double> %a, <vscale x 1 x double> %b) {
362 ; CHECK-LABEL: vfmax_nxv1f64_vv:
364 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
365 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
366 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
367 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
368 ; CHECK-NEXT: vmv.v.v v0, v10
369 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
370 ; CHECK-NEXT: vfmax.vv v8, v8, v11
372 %v = call <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b)
373 ret <vscale x 1 x double> %v
376 declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
378 define <vscale x 2 x double> @vfmax_nxv2f64_vv(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
379 ; CHECK-LABEL: vfmax_nxv2f64_vv:
381 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
382 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
383 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
384 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
385 ; CHECK-NEXT: vmv1r.v v0, v12
386 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
387 ; CHECK-NEXT: vfmax.vv v8, v8, v14
389 %v = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
390 ret <vscale x 2 x double> %v
393 declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
395 define <vscale x 4 x double> @vfmax_nxv4f64_vv(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
396 ; CHECK-LABEL: vfmax_nxv4f64_vv:
398 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
399 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
400 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
401 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
402 ; CHECK-NEXT: vmv1r.v v0, v16
403 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
404 ; CHECK-NEXT: vfmax.vv v8, v8, v20
406 %v = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
407 ret <vscale x 4 x double> %v
410 declare <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>)
412 define <vscale x 8 x double> @vfmax_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
413 ; CHECK-LABEL: vfmax_nxv8f64_vv:
415 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
416 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
417 ; CHECK-NEXT: vmfeq.vv v1, v16, v16
418 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
419 ; CHECK-NEXT: vmv1r.v v0, v1
420 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
421 ; CHECK-NEXT: vfmax.vv v8, v8, v24
423 %v = call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
424 ret <vscale x 8 x double> %v
427 define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
428 ; ZVFH-LABEL: vfmax_nxv1f16_vv_nnan:
430 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
431 ; ZVFH-NEXT: vfmax.vv v8, v8, v9
434 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnan:
436 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
437 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
438 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
439 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
440 ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
441 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
442 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
444 %v = call nnan <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
445 ret <vscale x 1 x half> %v
448 define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
449 ; ZVFH-LABEL: vfmax_nxv1f16_vv_nnana:
451 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
452 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9
453 ; ZVFH-NEXT: vmv1r.v v10, v9
454 ; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
455 ; ZVFH-NEXT: vfmax.vv v8, v10, v9
458 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnana:
460 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
461 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
462 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
463 ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
464 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
465 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
466 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
467 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
468 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
469 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
470 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
471 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
472 ; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
473 ; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
474 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
475 ; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
476 ; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
477 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
478 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
480 %c = fadd nnan <vscale x 1 x half> %a, %a
481 %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
482 ret <vscale x 1 x half> %v
485 define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
486 ; ZVFH-LABEL: vfmax_nxv1f16_vv_nnanb:
488 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
489 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
490 ; ZVFH-NEXT: vmv1r.v v10, v8
491 ; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
492 ; ZVFH-NEXT: vfmax.vv v8, v8, v10
495 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnanb:
497 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
498 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
499 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
500 ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10
501 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
502 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
503 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
504 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
505 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
506 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
507 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
508 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
509 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
510 ; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
511 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
512 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
513 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
514 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
515 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
517 %c = fadd nnan <vscale x 1 x half> %b, %b
518 %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
519 ret <vscale x 1 x half> %v