1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>)
13 define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
14 ; ZVFH-LABEL: vfmax_v2f16_vv:
16 ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
18 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
19 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
20 ; ZVFH-NEXT: vmv1r.v v0, v10
21 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
22 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
25 ; ZVFHMIN-LABEL: vfmax_v2f16_vv:
27 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
28 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
29 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
30 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
31 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
32 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
33 ; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
34 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
35 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
36 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
37 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
38 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
40 %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
44 declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
46 define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
47 ; ZVFH-LABEL: vfmax_v4f16_vv:
49 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
50 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
51 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
52 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
53 ; ZVFH-NEXT: vmv1r.v v0, v10
54 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
55 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
58 ; ZVFHMIN-LABEL: vfmax_v4f16_vv:
60 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
61 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
62 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
63 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
64 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
65 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
66 ; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
67 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
68 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
69 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
70 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
71 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
73 %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
77 declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
79 define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
80 ; ZVFH-LABEL: vfmax_v8f16_vv:
82 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
83 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
84 ; ZVFH-NEXT: vmfeq.vv v10, v9, v9
85 ; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
86 ; ZVFH-NEXT: vmv.v.v v0, v10
87 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
88 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
91 ; ZVFHMIN-LABEL: vfmax_v8f16_vv:
93 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
94 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
95 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
96 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
97 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
98 ; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
99 ; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
100 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
101 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
102 ; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
103 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
104 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
106 %v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
110 declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)
112 define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
113 ; ZVFH-LABEL: vfmax_v16f16_vv:
115 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
116 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
117 ; ZVFH-NEXT: vmfeq.vv v12, v10, v10
118 ; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
119 ; ZVFH-NEXT: vmv1r.v v0, v12
120 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
121 ; ZVFH-NEXT: vfmax.vv v8, v8, v14
124 ; ZVFHMIN-LABEL: vfmax_v16f16_vv:
126 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
127 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
128 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
129 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
130 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
131 ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
132 ; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
133 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
134 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
135 ; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
137 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
139 %v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b)
143 declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
145 define <2 x float> @vfmax_v2f32_vv(<2 x float> %a, <2 x float> %b) {
146 ; CHECK-LABEL: vfmax_v2f32_vv:
148 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
149 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
150 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
151 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
152 ; CHECK-NEXT: vmv1r.v v0, v10
153 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
154 ; CHECK-NEXT: vfmax.vv v8, v8, v11
156 %v = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
160 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
162 define <4 x float> @vfmax_v4f32_vv(<4 x float> %a, <4 x float> %b) {
163 ; CHECK-LABEL: vfmax_v4f32_vv:
165 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
166 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
167 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
168 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
169 ; CHECK-NEXT: vmv.v.v v0, v10
170 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
171 ; CHECK-NEXT: vfmax.vv v8, v8, v11
173 %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
177 declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
179 define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) {
180 ; CHECK-LABEL: vfmax_v8f32_vv:
182 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
183 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
184 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
185 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
186 ; CHECK-NEXT: vmv1r.v v0, v12
187 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
188 ; CHECK-NEXT: vfmax.vv v8, v8, v14
190 %v = call <8 x float> @llvm.maximum.v8f32(<8 x float> %a, <8 x float> %b)
194 declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
196 define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) {
197 ; CHECK-LABEL: vfmax_v16f32_vv:
199 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
200 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
201 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
202 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
203 ; CHECK-NEXT: vmv1r.v v0, v16
204 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
205 ; CHECK-NEXT: vfmax.vv v8, v8, v20
207 %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b)
211 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
213 define <2 x double> @vfmax_v2f64_vv(<2 x double> %a, <2 x double> %b) {
214 ; CHECK-LABEL: vfmax_v2f64_vv:
216 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
217 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
218 ; CHECK-NEXT: vmfeq.vv v10, v9, v9
219 ; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
220 ; CHECK-NEXT: vmv.v.v v0, v10
221 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
222 ; CHECK-NEXT: vfmax.vv v8, v8, v11
224 %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
228 declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
230 define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) {
231 ; CHECK-LABEL: vfmax_v4f64_vv:
233 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
234 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
235 ; CHECK-NEXT: vmfeq.vv v12, v10, v10
236 ; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
237 ; CHECK-NEXT: vmv1r.v v0, v12
238 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
239 ; CHECK-NEXT: vfmax.vv v8, v8, v14
241 %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b)
245 declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>)
247 define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) {
248 ; CHECK-LABEL: vfmax_v8f64_vv:
250 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
251 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
252 ; CHECK-NEXT: vmfeq.vv v16, v12, v12
253 ; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0
254 ; CHECK-NEXT: vmv1r.v v0, v16
255 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
256 ; CHECK-NEXT: vfmax.vv v8, v8, v20
258 %v = call <8 x double> @llvm.maximum.v8f64(<8 x double> %a, <8 x double> %b)
262 declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
264 define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind {
265 ; CHECK-LABEL: vfmax_v16f64_vv:
267 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
268 ; CHECK-NEXT: vmfeq.vv v0, v8, v8
269 ; CHECK-NEXT: vmfeq.vv v7, v16, v16
270 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
271 ; CHECK-NEXT: vmv1r.v v0, v7
272 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
273 ; CHECK-NEXT: vfmax.vv v8, v8, v24
275 %v = call <16 x double> @llvm.maximum.v16f64(<16 x double> %a, <16 x double> %b)
279 define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
280 ; ZVFH-LABEL: vfmax_v2f16_vv_nnan:
282 ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
283 ; ZVFH-NEXT: vfmax.vv v8, v8, v9
286 ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan:
288 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
289 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
290 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
291 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
292 ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
293 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
294 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
296 %v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
300 ; FIXME: The nnan from fadd isn't propagating.
301 define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
302 ; ZVFH-LABEL: vfmax_v2f16_vv_nnana:
304 ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
305 ; ZVFH-NEXT: vfadd.vv v10, v8, v8
306 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9
307 ; ZVFH-NEXT: vmfeq.vv v8, v10, v10
308 ; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0
309 ; ZVFH-NEXT: vmv1r.v v0, v8
310 ; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0
311 ; ZVFH-NEXT: vfmax.vv v8, v11, v8
314 ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana:
316 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
317 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
318 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
319 ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
320 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
321 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
322 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
323 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
324 ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
325 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
326 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
327 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
328 ; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
329 ; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
330 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
331 ; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
332 ; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
333 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
334 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
336 %c = fadd nnan <2 x half> %a, %a
337 %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b)
341 ; FIXME: The nnan from fadd isn't propagating.
342 define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
343 ; ZVFH-LABEL: vfmax_v2f16_vv_nnanb:
345 ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
346 ; ZVFH-NEXT: vfadd.vv v10, v9, v9
347 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
348 ; ZVFH-NEXT: vmfeq.vv v9, v10, v10
349 ; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0
350 ; ZVFH-NEXT: vmv1r.v v0, v9
351 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
352 ; ZVFH-NEXT: vfmax.vv v8, v8, v11
355 ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb:
357 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
358 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
359 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
360 ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10
361 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
362 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
363 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
364 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
365 ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
366 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
367 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
368 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
369 ; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
370 ; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
371 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
372 ; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
373 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
374 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
375 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
377 %c = fadd nnan <2 x half> %b, %b
378 %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)