1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
5 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
10 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
11 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
12 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
13 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
14 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
15 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
16 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
17 ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
19 declare <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
21 define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
22 ; ZVFH-LABEL: vfmax_nxv1f16_vv:
24 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
25 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
26 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
27 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
28 ; ZVFH-NEXT: vmv1r.v v0, v10
29 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
30 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
33 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv:
35 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
36 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
37 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
38 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
39 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
40 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
41 ; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
42 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
43 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
44 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
45 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
46 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
48 %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
49 ret <vscale x 1 x half> %v
52 declare <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
54 define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
55 ; ZVFH-LABEL: vfmax_nxv2f16_vv:
57 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
58 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
59 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
60 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
61 ; ZVFH-NEXT: vmv1r.v v0, v10
62 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
63 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
66 ; ZVFHMIN-LABEL: vfmax_nxv2f16_vv:
68 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
69 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
70 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
71 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
72 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
73 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
74 ; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
75 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
76 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
77 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
78 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
79 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
81 %v = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
82 ret <vscale x 2 x half> %v
85 declare <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
87 define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
88 ; ZVFH-LABEL: vfmax_nxv4f16_vv:
90 ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
91 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
92 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
93 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
94 ; ZVFH-NEXT: vmv.v.v v0, v10
95 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
96 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
99 ; ZVFHMIN-LABEL: vfmax_nxv4f16_vv:
101 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
102 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
103 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
104 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
105 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
106 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
107 ; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
108 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
109 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
110 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
111 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
112 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
114 %v = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
115 ret <vscale x 4 x half> %v
118 declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
120 define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
121 ; ZVFH-LABEL: vfmax_nxv8f16_vv:
123 ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
124 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
125 ; ZVFH-NEXT: vmfeq.vv v12, v10, v10
126 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
127 ; ZVFH-NEXT: vmv1r.v v0, v12
128 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
129 ; ZVFH-NEXT: vfmax.vv v8, v8, v14
132 ; ZVFHMIN-LABEL: vfmax_nxv8f16_vv:
134 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
135 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
136 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
137 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
138 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
139 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
140 ; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
141 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
142 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
143 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
144 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
145 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
147 %v = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
148 ret <vscale x 8 x half> %v
151 declare <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
153 define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
154 ; ZVFH-LABEL: vfmax_nxv16f16_vv:
156 ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
157 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
158 ; ZVFH-NEXT: vmfeq.vv v16, v12, v12
159 ; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0
160 ; ZVFH-NEXT: vmv1r.v v0, v16
161 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0
162 ; ZVFH-NEXT: vfmax.vv v8, v8, v20
165 ; ZVFHMIN-LABEL: vfmax_nxv16f16_vv:
167 ; ZVFHMIN-NEXT: addi sp, sp, -16
168 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
169 ; ZVFHMIN-NEXT: csrr a0, vlenb
170 ; ZVFHMIN-NEXT: slli a0, a0, 3
171 ; ZVFHMIN-NEXT: sub sp, sp, a0
172 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
173 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
174 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
175 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
176 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
177 ; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24
178 ; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16
179 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
180 ; ZVFHMIN-NEXT: addi a0, sp, 16
181 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
182 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
183 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0
184 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
185 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16
186 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
187 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
188 ; ZVFHMIN-NEXT: csrr a0, vlenb
189 ; ZVFHMIN-NEXT: slli a0, a0, 3
190 ; ZVFHMIN-NEXT: add sp, sp, a0
191 ; ZVFHMIN-NEXT: addi sp, sp, 16
193 %v = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
194 ret <vscale x 16 x half> %v
197 declare <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>)
199 define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
200 ; ZVFH-LABEL: vfmax_nxv32f16_vv:
202 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
203 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
204 ; ZVFH-NEXT: vmfeq.vv v7, v16, v16
205 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
206 ; ZVFH-NEXT: vmv1r.v v0, v7
207 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
208 ; ZVFH-NEXT: vfmax.vv v8, v8, v24
211 ; ZVFHMIN-LABEL: vfmax_nxv32f16_vv:
213 ; ZVFHMIN-NEXT: addi sp, sp, -16
214 ; ZVFHMIN-NEXT: csrr a0, vlenb
215 ; ZVFHMIN-NEXT: slli a0, a0, 4
216 ; ZVFHMIN-NEXT: sub sp, sp, a0
217 ; ZVFHMIN-NEXT: addi a0, sp, 16
218 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
219 ; ZVFHMIN-NEXT: vmv8r.v v0, v8
220 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
221 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
222 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
223 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
224 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
225 ; ZVFHMIN-NEXT: vmfeq.vv v3, v24, v24
226 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
227 ; ZVFHMIN-NEXT: vmv1r.v v0, v3
228 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
229 ; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16
230 ; ZVFHMIN-NEXT: csrr a0, vlenb
231 ; ZVFHMIN-NEXT: slli a0, a0, 3
232 ; ZVFHMIN-NEXT: add a0, sp, a0
233 ; ZVFHMIN-NEXT: addi a0, a0, 16
234 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
235 ; ZVFHMIN-NEXT: addi a0, sp, 16
236 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
237 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
238 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
239 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
240 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
241 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
242 ; ZVFHMIN-NEXT: vmfeq.vv v7, v8, v8
243 ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0
244 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
245 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
246 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v24
247 ; ZVFHMIN-NEXT: csrr a0, vlenb
248 ; ZVFHMIN-NEXT: slli a0, a0, 3
249 ; ZVFHMIN-NEXT: add a0, sp, a0
250 ; ZVFHMIN-NEXT: addi a0, a0, 16
251 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
252 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
253 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
254 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
255 ; ZVFHMIN-NEXT: csrr a0, vlenb
256 ; ZVFHMIN-NEXT: slli a0, a0, 4
257 ; ZVFHMIN-NEXT: add sp, sp, a0
258 ; ZVFHMIN-NEXT: addi sp, sp, 16
260 %v = call <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
261 ret <vscale x 32 x half> %v
264 declare <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>)
266 define <vscale x 1 x float> @vfmax_nxv1f32_vv(<vscale x 1 x float> %a, <vscale x 1 x float> %b) {
267 ; CHECK-LABEL: vfmax_nxv1f32_vv:
269 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
270 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
271 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
272 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
273 ; CHECK-NEXT: vmv1r.v v0, v10
274 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
275 ; CHECK-NEXT: vfmax.vv v8, v8, v11
277 %v = call <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x float> %b)
278 ret <vscale x 1 x float> %v
281 declare <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
283 define <vscale x 2 x float> @vfmax_nxv2f32_vv(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
284 ; CHECK-LABEL: vfmax_nxv2f32_vv:
286 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
287 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
288 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
289 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
290 ; CHECK-NEXT: vmv.v.v v0, v10
291 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
292 ; CHECK-NEXT: vfmax.vv v8, v8, v11
294 %v = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
295 ret <vscale x 2 x float> %v
298 declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
300 define <vscale x 4 x float> @vfmax_nxv4f32_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
301 ; CHECK-LABEL: vfmax_nxv4f32_vv:
303 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
304 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
305 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
306 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
307 ; CHECK-NEXT: vmv1r.v v0, v12
308 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
309 ; CHECK-NEXT: vfmax.vv v8, v8, v14
311 %v = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
312 ret <vscale x 4 x float> %v
315 declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
317 define <vscale x 8 x float> @vfmax_nxv8f32_vv(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
318 ; CHECK-LABEL: vfmax_nxv8f32_vv:
320 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
321 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
322 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
323 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
324 ; CHECK-NEXT: vmv1r.v v0, v16
325 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
326 ; CHECK-NEXT: vfmax.vv v8, v8, v20
328 %v = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
329 ret <vscale x 8 x float> %v
332 declare <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>)
334 define <vscale x 16 x float> @vfmax_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
335 ; CHECK-LABEL: vfmax_nxv16f32_vv:
337 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
338 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
339 ; CHECK-NEXT: vmfeq.vv v7, v16, v16
340 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
341 ; CHECK-NEXT: vmv1r.v v0, v7
342 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
343 ; CHECK-NEXT: vfmax.vv v8, v8, v24
345 %v = call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
346 ret <vscale x 16 x float> %v
349 declare <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>)
351 define <vscale x 1 x double> @vfmax_nxv1f64_vv(<vscale x 1 x double> %a, <vscale x 1 x double> %b) {
352 ; CHECK-LABEL: vfmax_nxv1f64_vv:
354 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
355 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
356 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
357 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
358 ; CHECK-NEXT: vmv.v.v v0, v10
359 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
360 ; CHECK-NEXT: vfmax.vv v8, v8, v11
362 %v = call <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b)
363 ret <vscale x 1 x double> %v
366 declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
368 define <vscale x 2 x double> @vfmax_nxv2f64_vv(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
369 ; CHECK-LABEL: vfmax_nxv2f64_vv:
371 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
372 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
373 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
374 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
375 ; CHECK-NEXT: vmv1r.v v0, v12
376 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
377 ; CHECK-NEXT: vfmax.vv v8, v8, v14
379 %v = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
380 ret <vscale x 2 x double> %v
383 declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
385 define <vscale x 4 x double> @vfmax_nxv4f64_vv(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
386 ; CHECK-LABEL: vfmax_nxv4f64_vv:
388 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
389 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
390 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
391 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
392 ; CHECK-NEXT: vmv1r.v v0, v16
393 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
394 ; CHECK-NEXT: vfmax.vv v8, v8, v20
396 %v = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
397 ret <vscale x 4 x double> %v
400 declare <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>)
402 define <vscale x 8 x double> @vfmax_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
403 ; CHECK-LABEL: vfmax_nxv8f64_vv:
405 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
406 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
407 ; CHECK-NEXT: vmfeq.vv v7, v16, v16
408 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
409 ; CHECK-NEXT: vmv1r.v v0, v7
410 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
411 ; CHECK-NEXT: vfmax.vv v8, v8, v24
413 %v = call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
414 ret <vscale x 8 x double> %v
417 define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
418 ; ZVFH-LABEL: vfmax_nxv1f16_vv_nnan:
420 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
421 ; ZVFH-NEXT: vfmax.vv v8, v8, v9
424 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnan:
426 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
427 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
428 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
429 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
430 ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
431 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
432 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
434 %v = call nnan <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
435 ret <vscale x 1 x half> %v
438 define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
439 ; ZVFH-LABEL: vfmax_nxv1f16_vv_nnana:
441 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
442 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9
443 ; ZVFH-NEXT: vmv1r.v v10, v9
444 ; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
445 ; ZVFH-NEXT: vfmax.vv v8, v10, v9
448 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnana:
450 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
451 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
452 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
453 ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
454 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
455 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
456 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
457 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
458 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
459 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
460 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
461 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
462 ; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
463 ; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
464 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
465 ; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
466 ; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
467 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
468 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
470 %c = fadd nnan <vscale x 1 x half> %a, %a
471 %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
472 ret <vscale x 1 x half> %v
475 define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
476 ; ZVFH-LABEL: vfmax_nxv1f16_vv_nnanb:
478 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
479 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
480 ; ZVFH-NEXT: vmv1r.v v10, v8
481 ; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
482 ; ZVFH-NEXT: vfmax.vv v8, v8, v10
485 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnanb:
487 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
488 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
489 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
490 ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10
491 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
492 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
493 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
494 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
495 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
496 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
497 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
498 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
499 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
500 ; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
501 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
502 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
503 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
504 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
505 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
507 %c = fadd nnan <vscale x 1 x half> %b, %b
508 %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
509 ret <vscale x 1 x half> %v