1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
4 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64
6 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
7 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
8 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32
9 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64
11 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32
12 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64
13 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32
14 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64
15 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32
16 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64
17 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32
18 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64
20 define void @fadd_v8f16(ptr %x, ptr %y) {
21 ; ZVFH-LABEL: fadd_v8f16:
23 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
24 ; ZVFH-NEXT: vle16.v v8, (a0)
25 ; ZVFH-NEXT: vle16.v v9, (a1)
26 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
27 ; ZVFH-NEXT: vse16.v v8, (a0)
30 ; ZVFHMIN-LABEL: fadd_v8f16:
32 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
33 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
34 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
35 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
36 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
37 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
38 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
39 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
40 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
41 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
43 %a = load <8 x half>, ptr %x
44 %b = load <8 x half>, ptr %y
45 %c = fadd <8 x half> %a, %b
46 store <8 x half> %c, ptr %x
50 define void @fadd_v6f16(ptr %x, ptr %y) {
51 ; ZVFH-LABEL: fadd_v6f16:
53 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
54 ; ZVFH-NEXT: vle16.v v8, (a0)
55 ; ZVFH-NEXT: vle16.v v9, (a1)
56 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
57 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
58 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
59 ; ZVFH-NEXT: vse16.v v8, (a0)
62 ; ZVFHMINLMULMAX2-RV32-LABEL: fadd_v6f16:
63 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
64 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
65 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
66 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
67 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
68 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
69 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
70 ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v10
71 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
72 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
73 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
74 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
75 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
76 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
77 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
78 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
79 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
81 ; ZVFHMINLMULMAX2-RV64-LABEL: fadd_v6f16:
82 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
83 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
84 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
85 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
86 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
87 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
88 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
89 ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v10
90 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
91 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
92 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
93 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
94 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
95 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
96 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
97 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
99 ; ZVFHMINLMULMAX1-RV32-LABEL: fadd_v6f16:
100 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
101 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
102 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
103 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
104 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
105 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
106 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
107 ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v10
108 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
109 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
110 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
111 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
112 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
113 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
114 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
115 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
116 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
118 ; ZVFHMINLMULMAX1-RV64-LABEL: fadd_v6f16:
119 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
120 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
121 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
122 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
123 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
124 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
125 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
126 ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v10
127 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
128 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
129 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
130 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
131 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
132 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
133 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
134 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
135 %a = load <6 x half>, ptr %x
136 %b = load <6 x half>, ptr %y
137 %c = fadd <6 x half> %a, %b
138 store <6 x half> %c, ptr %x
142 define void @fadd_v4f32(ptr %x, ptr %y) {
143 ; ZVFH-LABEL: fadd_v4f32:
145 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
146 ; ZVFH-NEXT: vle32.v v8, (a0)
147 ; ZVFH-NEXT: vle32.v v9, (a1)
148 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
149 ; ZVFH-NEXT: vse32.v v8, (a0)
152 ; ZVFHMIN-LABEL: fadd_v4f32:
154 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
155 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
156 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
157 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
158 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
160 %a = load <4 x float>, ptr %x
161 %b = load <4 x float>, ptr %y
162 %c = fadd <4 x float> %a, %b
163 store <4 x float> %c, ptr %x
167 define void @fadd_v2f64(ptr %x, ptr %y) {
168 ; CHECK-LABEL: fadd_v2f64:
170 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
171 ; CHECK-NEXT: vle64.v v8, (a0)
172 ; CHECK-NEXT: vle64.v v9, (a1)
173 ; CHECK-NEXT: vfadd.vv v8, v8, v9
174 ; CHECK-NEXT: vse64.v v8, (a0)
176 %a = load <2 x double>, ptr %x
177 %b = load <2 x double>, ptr %y
178 %c = fadd <2 x double> %a, %b
179 store <2 x double> %c, ptr %x
183 define void @fsub_v8f16(ptr %x, ptr %y) {
184 ; ZVFH-LABEL: fsub_v8f16:
186 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
187 ; ZVFH-NEXT: vle16.v v8, (a0)
188 ; ZVFH-NEXT: vle16.v v9, (a1)
189 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
190 ; ZVFH-NEXT: vse16.v v8, (a0)
193 ; ZVFHMIN-LABEL: fsub_v8f16:
195 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
196 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
197 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
198 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
199 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
200 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
201 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
202 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
203 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
204 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
206 %a = load <8 x half>, ptr %x
207 %b = load <8 x half>, ptr %y
208 %c = fsub <8 x half> %a, %b
209 store <8 x half> %c, ptr %x
213 define void @fsub_v6f16(ptr %x, ptr %y) {
214 ; ZVFH-LABEL: fsub_v6f16:
216 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
217 ; ZVFH-NEXT: vle16.v v8, (a0)
218 ; ZVFH-NEXT: vle16.v v9, (a1)
219 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
220 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
221 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
222 ; ZVFH-NEXT: vse16.v v8, (a0)
225 ; ZVFHMINLMULMAX2-RV32-LABEL: fsub_v6f16:
226 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
227 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
228 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
229 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
230 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
231 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
232 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
233 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v10
234 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
235 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
236 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
237 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
238 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
239 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
240 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
241 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
242 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
244 ; ZVFHMINLMULMAX2-RV64-LABEL: fsub_v6f16:
245 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
246 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
247 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
248 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
249 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
250 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
251 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
252 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v10
253 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
254 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
255 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
256 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
257 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
258 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
259 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
260 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
262 ; ZVFHMINLMULMAX1-RV32-LABEL: fsub_v6f16:
263 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
264 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
265 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
266 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
267 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
268 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
269 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
270 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v10
271 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
272 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
273 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
274 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
275 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
276 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
277 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
278 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
279 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
281 ; ZVFHMINLMULMAX1-RV64-LABEL: fsub_v6f16:
282 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
283 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
284 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
285 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
286 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
287 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
288 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
289 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v10
290 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
291 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
292 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
293 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
294 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
295 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
296 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
297 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
298 %a = load <6 x half>, ptr %x
299 %b = load <6 x half>, ptr %y
300 %c = fsub <6 x half> %a, %b
301 store <6 x half> %c, ptr %x
305 define void @fsub_v4f32(ptr %x, ptr %y) {
306 ; ZVFH-LABEL: fsub_v4f32:
308 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
309 ; ZVFH-NEXT: vle32.v v8, (a0)
310 ; ZVFH-NEXT: vle32.v v9, (a1)
311 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
312 ; ZVFH-NEXT: vse32.v v8, (a0)
315 ; ZVFHMIN-LABEL: fsub_v4f32:
317 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
318 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
319 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
320 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
321 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
323 %a = load <4 x float>, ptr %x
324 %b = load <4 x float>, ptr %y
325 %c = fsub <4 x float> %a, %b
326 store <4 x float> %c, ptr %x
330 define void @fsub_v2f64(ptr %x, ptr %y) {
331 ; CHECK-LABEL: fsub_v2f64:
333 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
334 ; CHECK-NEXT: vle64.v v8, (a0)
335 ; CHECK-NEXT: vle64.v v9, (a1)
336 ; CHECK-NEXT: vfsub.vv v8, v8, v9
337 ; CHECK-NEXT: vse64.v v8, (a0)
339 %a = load <2 x double>, ptr %x
340 %b = load <2 x double>, ptr %y
341 %c = fsub <2 x double> %a, %b
342 store <2 x double> %c, ptr %x
346 define void @fmul_v8f16(ptr %x, ptr %y) {
347 ; ZVFH-LABEL: fmul_v8f16:
349 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
350 ; ZVFH-NEXT: vle16.v v8, (a0)
351 ; ZVFH-NEXT: vle16.v v9, (a1)
352 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
353 ; ZVFH-NEXT: vse16.v v8, (a0)
356 ; ZVFHMIN-LABEL: fmul_v8f16:
358 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
359 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
360 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
361 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
362 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
363 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
364 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10
365 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
366 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
367 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
369 %a = load <8 x half>, ptr %x
370 %b = load <8 x half>, ptr %y
371 %c = fmul <8 x half> %a, %b
372 store <8 x half> %c, ptr %x
376 define void @fmul_v6f16(ptr %x, ptr %y) {
377 ; ZVFH-LABEL: fmul_v6f16:
379 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
380 ; ZVFH-NEXT: vle16.v v8, (a0)
381 ; ZVFH-NEXT: vle16.v v9, (a1)
382 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
383 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
384 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
385 ; ZVFH-NEXT: vse16.v v8, (a0)
388 ; ZVFHMINLMULMAX2-RV32-LABEL: fmul_v6f16:
389 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
390 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
391 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
392 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
393 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
394 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
395 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
396 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v10
397 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
398 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
399 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
400 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
401 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
402 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
403 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
404 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
405 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
407 ; ZVFHMINLMULMAX2-RV64-LABEL: fmul_v6f16:
408 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
409 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
410 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
411 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
412 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
413 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
414 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
415 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v10
416 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
417 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
418 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
419 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
420 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
421 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
422 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
423 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
425 ; ZVFHMINLMULMAX1-RV32-LABEL: fmul_v6f16:
426 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
427 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
428 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
429 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
430 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
431 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
432 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
433 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v10
434 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
435 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
436 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
437 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
438 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
439 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
440 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
441 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
442 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
444 ; ZVFHMINLMULMAX1-RV64-LABEL: fmul_v6f16:
445 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
446 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
447 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
448 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
449 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
450 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
451 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
452 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v10
453 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
454 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
455 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
456 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
457 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
458 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
459 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
460 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
461 %a = load <6 x half>, ptr %x
462 %b = load <6 x half>, ptr %y
463 %c = fmul <6 x half> %a, %b
464 store <6 x half> %c, ptr %x
468 define void @fmul_v4f32(ptr %x, ptr %y) {
469 ; ZVFH-LABEL: fmul_v4f32:
471 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
472 ; ZVFH-NEXT: vle32.v v8, (a0)
473 ; ZVFH-NEXT: vle32.v v9, (a1)
474 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
475 ; ZVFH-NEXT: vse32.v v8, (a0)
478 ; ZVFHMIN-LABEL: fmul_v4f32:
480 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
481 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
482 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
483 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
484 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
486 %a = load <4 x float>, ptr %x
487 %b = load <4 x float>, ptr %y
488 %c = fmul <4 x float> %a, %b
489 store <4 x float> %c, ptr %x
493 define void @fmul_v2f64(ptr %x, ptr %y) {
494 ; CHECK-LABEL: fmul_v2f64:
496 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
497 ; CHECK-NEXT: vle64.v v8, (a0)
498 ; CHECK-NEXT: vle64.v v9, (a1)
499 ; CHECK-NEXT: vfmul.vv v8, v8, v9
500 ; CHECK-NEXT: vse64.v v8, (a0)
502 %a = load <2 x double>, ptr %x
503 %b = load <2 x double>, ptr %y
504 %c = fmul <2 x double> %a, %b
505 store <2 x double> %c, ptr %x
509 define void @fdiv_v8f16(ptr %x, ptr %y) {
510 ; ZVFH-LABEL: fdiv_v8f16:
512 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
513 ; ZVFH-NEXT: vle16.v v8, (a0)
514 ; ZVFH-NEXT: vle16.v v9, (a1)
515 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
516 ; ZVFH-NEXT: vse16.v v8, (a0)
519 ; ZVFHMIN-LABEL: fdiv_v8f16:
521 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
522 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
523 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
524 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
525 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
526 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
527 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10
528 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
529 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
530 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
532 %a = load <8 x half>, ptr %x
533 %b = load <8 x half>, ptr %y
534 %c = fdiv <8 x half> %a, %b
535 store <8 x half> %c, ptr %x
539 define void @fdiv_v6f16(ptr %x, ptr %y) {
540 ; ZVFH-LABEL: fdiv_v6f16:
542 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
543 ; ZVFH-NEXT: vle16.v v8, (a0)
544 ; ZVFH-NEXT: vle16.v v9, (a1)
545 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
546 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
547 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
548 ; ZVFH-NEXT: vse16.v v8, (a0)
551 ; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_v6f16:
552 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
553 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
554 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
555 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
556 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
557 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
558 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
559 ; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v10
560 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
561 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
562 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
563 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
564 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
565 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
566 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
567 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
568 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
570 ; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_v6f16:
571 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
572 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
573 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
574 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
575 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
576 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
577 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
578 ; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v10
579 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
580 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
581 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
582 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
583 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
584 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
585 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
586 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
588 ; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_v6f16:
589 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
590 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
591 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
592 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
593 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
594 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
595 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
596 ; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v10
597 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
598 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
599 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
600 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
601 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
602 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
603 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
604 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
605 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
607 ; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_v6f16:
608 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
609 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
610 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
611 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
612 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
613 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
614 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
615 ; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v10
616 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
617 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
618 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
619 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
620 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
621 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
622 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
623 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
624 %a = load <6 x half>, ptr %x
625 %b = load <6 x half>, ptr %y
626 %c = fdiv <6 x half> %a, %b
627 store <6 x half> %c, ptr %x
631 define void @fdiv_v4f32(ptr %x, ptr %y) {
632 ; ZVFH-LABEL: fdiv_v4f32:
634 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
635 ; ZVFH-NEXT: vle32.v v8, (a0)
636 ; ZVFH-NEXT: vle32.v v9, (a1)
637 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
638 ; ZVFH-NEXT: vse32.v v8, (a0)
641 ; ZVFHMIN-LABEL: fdiv_v4f32:
643 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
644 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
645 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
646 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
647 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
649 %a = load <4 x float>, ptr %x
650 %b = load <4 x float>, ptr %y
651 %c = fdiv <4 x float> %a, %b
652 store <4 x float> %c, ptr %x
656 define void @fdiv_v2f64(ptr %x, ptr %y) {
657 ; CHECK-LABEL: fdiv_v2f64:
659 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
660 ; CHECK-NEXT: vle64.v v8, (a0)
661 ; CHECK-NEXT: vle64.v v9, (a1)
662 ; CHECK-NEXT: vfdiv.vv v8, v8, v9
663 ; CHECK-NEXT: vse64.v v8, (a0)
665 %a = load <2 x double>, ptr %x
666 %b = load <2 x double>, ptr %y
667 %c = fdiv <2 x double> %a, %b
668 store <2 x double> %c, ptr %x
672 define void @fneg_v8f16(ptr %x) {
673 ; ZVFH-LABEL: fneg_v8f16:
675 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
676 ; ZVFH-NEXT: vle16.v v8, (a0)
677 ; ZVFH-NEXT: vfneg.v v8, v8
678 ; ZVFH-NEXT: vse16.v v8, (a0)
681 ; ZVFHMIN-LABEL: fneg_v8f16:
683 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
684 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
685 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
686 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
687 ; ZVFHMIN-NEXT: vfneg.v v8, v9
688 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
689 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
690 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
692 %a = load <8 x half>, ptr %x
693 %b = fneg <8 x half> %a
694 store <8 x half> %b, ptr %x
698 define void @fneg_v6f16(ptr %x) {
699 ; ZVFH-LABEL: fneg_v6f16:
701 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
702 ; ZVFH-NEXT: vle16.v v8, (a0)
703 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
704 ; ZVFH-NEXT: vfneg.v v8, v8
705 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
706 ; ZVFH-NEXT: vse16.v v8, (a0)
709 ; ZVFHMINLMULMAX2-RV32-LABEL: fneg_v6f16:
710 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
711 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
712 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
713 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
714 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
715 ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9
716 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
717 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
718 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
719 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
720 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
721 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
722 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
723 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
724 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
726 ; ZVFHMINLMULMAX2-RV64-LABEL: fneg_v6f16:
727 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
728 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
729 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
730 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
731 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
732 ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9
733 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
734 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
735 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
736 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
737 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
738 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
739 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
740 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
742 ; ZVFHMINLMULMAX1-RV32-LABEL: fneg_v6f16:
743 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
744 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
745 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
746 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
747 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
748 ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9
749 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
750 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
751 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
752 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
753 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
754 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
755 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
756 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
757 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
759 ; ZVFHMINLMULMAX1-RV64-LABEL: fneg_v6f16:
760 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
761 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
762 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
763 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
764 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
765 ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9
766 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
767 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
768 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
769 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
770 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
771 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
772 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
773 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
774 %a = load <6 x half>, ptr %x
775 %b = fneg <6 x half> %a
776 store <6 x half> %b, ptr %x
780 define void @fneg_v4f32(ptr %x) {
781 ; ZVFH-LABEL: fneg_v4f32:
783 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
784 ; ZVFH-NEXT: vle32.v v8, (a0)
785 ; ZVFH-NEXT: vfneg.v v8, v8
786 ; ZVFH-NEXT: vse32.v v8, (a0)
789 ; ZVFHMIN-LABEL: fneg_v4f32:
791 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
792 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
793 ; ZVFHMIN-NEXT: vfneg.v v8, v8
794 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
796 %a = load <4 x float>, ptr %x
797 %b = fneg <4 x float> %a
798 store <4 x float> %b, ptr %x
802 define void @fneg_v2f64(ptr %x) {
803 ; CHECK-LABEL: fneg_v2f64:
805 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
806 ; CHECK-NEXT: vle64.v v8, (a0)
807 ; CHECK-NEXT: vfneg.v v8, v8
808 ; CHECK-NEXT: vse64.v v8, (a0)
810 %a = load <2 x double>, ptr %x
811 %b = fneg <2 x double> %a
812 store <2 x double> %b, ptr %x
816 define void @fabs_v8f16(ptr %x) {
817 ; ZVFH-LABEL: fabs_v8f16:
819 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
820 ; ZVFH-NEXT: vle16.v v8, (a0)
821 ; ZVFH-NEXT: vfabs.v v8, v8
822 ; ZVFH-NEXT: vse16.v v8, (a0)
825 ; ZVFHMIN-LABEL: fabs_v8f16:
827 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
828 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
829 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
830 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
831 ; ZVFHMIN-NEXT: vfabs.v v8, v9
832 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
833 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
834 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
836 %a = load <8 x half>, ptr %x
837 %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
838 store <8 x half> %b, ptr %x
841 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
843 define void @fabs_v6f16(ptr %x) {
844 ; ZVFH-LABEL: fabs_v6f16:
846 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
847 ; ZVFH-NEXT: vle16.v v8, (a0)
848 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
849 ; ZVFH-NEXT: vfabs.v v8, v8
850 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
851 ; ZVFH-NEXT: vse16.v v8, (a0)
854 ; ZVFHMINLMULMAX2-RV32-LABEL: fabs_v6f16:
855 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
856 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
857 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
858 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
859 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
860 ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9
861 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
862 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
863 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
864 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
865 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
866 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
867 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
868 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
869 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
871 ; ZVFHMINLMULMAX2-RV64-LABEL: fabs_v6f16:
872 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
873 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
874 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
875 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
876 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
877 ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9
878 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
879 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
880 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
881 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
882 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
883 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
884 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
885 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
887 ; ZVFHMINLMULMAX1-RV32-LABEL: fabs_v6f16:
888 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
889 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
890 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
891 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
892 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
893 ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9
894 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
895 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
896 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
897 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
898 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
899 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
900 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
901 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
902 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
904 ; ZVFHMINLMULMAX1-RV64-LABEL: fabs_v6f16:
905 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
906 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
907 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
908 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
909 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
910 ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9
911 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
912 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
913 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
914 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
915 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
916 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
917 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
918 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
919 %a = load <6 x half>, ptr %x
920 %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
921 store <6 x half> %b, ptr %x
924 declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
926 define void @fabs_v4f32(ptr %x) {
927 ; ZVFH-LABEL: fabs_v4f32:
929 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
930 ; ZVFH-NEXT: vle32.v v8, (a0)
931 ; ZVFH-NEXT: vfabs.v v8, v8
932 ; ZVFH-NEXT: vse32.v v8, (a0)
935 ; ZVFHMIN-LABEL: fabs_v4f32:
937 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
938 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
939 ; ZVFHMIN-NEXT: vfabs.v v8, v8
940 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
942 %a = load <4 x float>, ptr %x
943 %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
944 store <4 x float> %b, ptr %x
947 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
949 define void @fabs_v2f64(ptr %x) {
950 ; CHECK-LABEL: fabs_v2f64:
952 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
953 ; CHECK-NEXT: vle64.v v8, (a0)
954 ; CHECK-NEXT: vfabs.v v8, v8
955 ; CHECK-NEXT: vse64.v v8, (a0)
957 %a = load <2 x double>, ptr %x
958 %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
959 store <2 x double> %b, ptr %x
962 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
964 define void @copysign_v8f16(ptr %x, ptr %y) {
965 ; ZVFH-LABEL: copysign_v8f16:
967 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
968 ; ZVFH-NEXT: vle16.v v8, (a0)
969 ; ZVFH-NEXT: vle16.v v9, (a1)
970 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
971 ; ZVFH-NEXT: vse16.v v8, (a0)
974 ; ZVFHMIN-LABEL: copysign_v8f16:
976 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
977 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
978 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
979 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
980 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
981 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
982 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v10
983 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
984 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
985 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
987 %a = load <8 x half>, ptr %x
988 %b = load <8 x half>, ptr %y
989 %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
990 store <8 x half> %c, ptr %x
993 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
995 define void @copysign_v6f16(ptr %x, ptr %y) {
996 ; ZVFH-LABEL: copysign_v6f16:
998 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
999 ; ZVFH-NEXT: vle16.v v8, (a0)
1000 ; ZVFH-NEXT: vle16.v v9, (a1)
1001 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1002 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
1003 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1004 ; ZVFH-NEXT: vse16.v v8, (a0)
1007 ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_v6f16:
1008 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
1009 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1010 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
1011 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
1012 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
1013 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
1014 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1015 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v10
1016 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1017 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
1018 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1019 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
1020 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
1021 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
1022 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1023 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
1024 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
1026 ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_v6f16:
1027 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
1028 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1029 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
1030 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
1031 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
1032 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
1033 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1034 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v10
1035 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1036 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
1037 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1038 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
1039 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
1040 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
1041 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
1042 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
1044 ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_v6f16:
1045 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
1046 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1047 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
1048 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
1049 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
1050 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
1051 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1052 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v10
1053 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1054 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
1055 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1056 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
1057 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
1058 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
1059 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1060 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
1061 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
1063 ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_v6f16:
1064 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
1065 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1066 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
1067 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
1068 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
1069 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
1070 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1071 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v10
1072 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1073 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
1074 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1075 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
1076 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
1077 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
1078 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
1079 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
1080 %a = load <6 x half>, ptr %x
1081 %b = load <6 x half>, ptr %y
1082 %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
1083 store <6 x half> %c, ptr %x
1086 declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>)
1088 define void @copysign_v4f32(ptr %x, ptr %y) {
1089 ; ZVFH-LABEL: copysign_v4f32:
1091 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1092 ; ZVFH-NEXT: vle32.v v8, (a0)
1093 ; ZVFH-NEXT: vle32.v v9, (a1)
1094 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
1095 ; ZVFH-NEXT: vse32.v v8, (a0)
1098 ; ZVFHMIN-LABEL: copysign_v4f32:
1100 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1101 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1102 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1103 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9
1104 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1106 %a = load <4 x float>, ptr %x
1107 %b = load <4 x float>, ptr %y
1108 %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
1109 store <4 x float> %c, ptr %x
1112 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
1114 define void @copysign_v2f64(ptr %x, ptr %y) {
1115 ; CHECK-LABEL: copysign_v2f64:
1117 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1118 ; CHECK-NEXT: vle64.v v8, (a0)
1119 ; CHECK-NEXT: vle64.v v9, (a1)
1120 ; CHECK-NEXT: vfsgnj.vv v8, v8, v9
1121 ; CHECK-NEXT: vse64.v v8, (a0)
1123 %a = load <2 x double>, ptr %x
1124 %b = load <2 x double>, ptr %y
1125 %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
1126 store <2 x double> %c, ptr %x
1129 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
1131 define void @copysign_vf_v8f16(ptr %x, half %y) {
1132 ; ZVFH-LABEL: copysign_vf_v8f16:
1134 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1135 ; ZVFH-NEXT: vle16.v v8, (a0)
1136 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
1137 ; ZVFH-NEXT: vse16.v v8, (a0)
1140 ; ZVFHMIN-LABEL: copysign_vf_v8f16:
1142 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1143 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1144 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
1145 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1146 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
1147 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1148 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
1149 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1150 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1151 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
1152 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1153 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8
1154 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1155 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1156 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1158 %a = load <8 x half>, ptr %x
1159 %b = insertelement <8 x half> poison, half %y, i32 0
1160 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
1161 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
1162 store <8 x half> %d, ptr %x
1166 define void @copysign_vf_v6f16(ptr %x, half %y) {
1167 ; ZVFH-LABEL: copysign_vf_v6f16:
1169 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1170 ; ZVFH-NEXT: vle16.v v8, (a0)
1171 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1172 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
1173 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1174 ; ZVFH-NEXT: vse16.v v8, (a0)
1177 ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_vf_v6f16:
1178 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
1179 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1180 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
1181 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
1182 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1183 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
1184 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1185 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
1186 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1187 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
1188 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
1189 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1190 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v9, v8
1191 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1192 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
1193 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1194 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
1195 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
1196 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
1197 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1198 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
1199 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
1201 ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_vf_v6f16:
1202 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
1203 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1204 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
1205 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
1206 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1207 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
1208 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1209 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
1210 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1211 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
1212 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
1213 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1214 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v9, v8
1215 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1216 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
1217 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1218 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
1219 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
1220 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
1221 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
1222 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
1224 ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_vf_v6f16:
1225 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
1226 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1227 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
1228 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
1229 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1230 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
1231 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1232 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
1233 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1234 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
1235 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
1236 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1237 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8
1238 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1239 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
1240 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1241 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
1242 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
1243 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
1244 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1245 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
1246 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
1248 ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_vf_v6f16:
1249 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
1250 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1251 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
1252 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
1253 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1254 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
1255 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1256 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
1257 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1258 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
1259 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
1260 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1261 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8
1262 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1263 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
1264 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1265 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
1266 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
1267 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
1268 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
1269 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
1270 %a = load <6 x half>, ptr %x
1271 %b = insertelement <6 x half> poison, half %y, i32 0
1272 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
1273 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
1274 store <6 x half> %d, ptr %x
1278 define void @copysign_vf_v4f32(ptr %x, float %y) {
1279 ; ZVFH-LABEL: copysign_vf_v4f32:
1281 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1282 ; ZVFH-NEXT: vle32.v v8, (a0)
1283 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
1284 ; ZVFH-NEXT: vse32.v v8, (a0)
1287 ; ZVFHMIN-LABEL: copysign_vf_v4f32:
1289 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1290 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1291 ; ZVFHMIN-NEXT: vfsgnj.vf v8, v8, fa0
1292 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1294 %a = load <4 x float>, ptr %x
1295 %b = insertelement <4 x float> poison, float %y, i32 0
1296 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
1297 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1298 store <4 x float> %d, ptr %x
1302 define void @copysign_vf_v2f64(ptr %x, double %y) {
1303 ; CHECK-LABEL: copysign_vf_v2f64:
1305 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1306 ; CHECK-NEXT: vle64.v v8, (a0)
1307 ; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
1308 ; CHECK-NEXT: vse64.v v8, (a0)
1310 %a = load <2 x double>, ptr %x
1311 %b = insertelement <2 x double> poison, double %y, i32 0
1312 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1313 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1314 store <2 x double> %d, ptr %x
1318 define void @copysign_neg_v8f16(ptr %x, ptr %y) {
1319 ; ZVFH-LABEL: copysign_neg_v8f16:
1321 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1322 ; ZVFH-NEXT: vle16.v v8, (a0)
1323 ; ZVFH-NEXT: vle16.v v9, (a1)
1324 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1325 ; ZVFH-NEXT: vse16.v v8, (a0)
1328 ; ZVFHMIN-LABEL: copysign_neg_v8f16:
1330 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1331 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1332 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1333 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1334 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1335 ; ZVFHMIN-NEXT: vfneg.v v8, v10
1336 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1337 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1338 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1339 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
1340 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1341 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9
1342 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1343 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1344 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1346 %a = load <8 x half>, ptr %x
1347 %b = load <8 x half>, ptr %y
1348 %c = fneg <8 x half> %b
1349 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
1350 store <8 x half> %d, ptr %x
1354 define void @copysign_neg_v6f16(ptr %x, ptr %y) {
1355 ; ZVFH-LABEL: copysign_neg_v6f16:
1357 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1358 ; ZVFH-NEXT: vle16.v v8, (a0)
1359 ; ZVFH-NEXT: vle16.v v9, (a1)
1360 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1361 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1362 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1363 ; ZVFH-NEXT: vse16.v v8, (a0)
1366 ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_v6f16:
1367 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
1368 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1369 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
1370 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
1371 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
1372 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1373 ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v10
1374 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1375 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v8
1376 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
1377 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10
1378 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1379 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v9
1380 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1381 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
1382 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1383 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
1384 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
1385 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
1386 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1387 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
1388 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
1390 ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_v6f16:
1391 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
1392 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1393 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
1394 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
1395 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
1396 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1397 ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v10
1398 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1399 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v8
1400 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
1401 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10
1402 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1403 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v9
1404 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1405 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
1406 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1407 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
1408 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
1409 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
1410 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
1411 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
1413 ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_v6f16:
1414 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
1415 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1416 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
1417 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
1418 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
1419 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1420 ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v10
1421 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1422 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8
1423 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
1424 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10
1425 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1426 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9
1427 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1428 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
1429 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1430 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
1431 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
1432 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
1433 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1434 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
1435 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
1437 ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_v6f16:
1438 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
1439 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1440 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
1441 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
1442 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
1443 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1444 ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v10
1445 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1446 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v8
1447 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
1448 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10
1449 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1450 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9
1451 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1452 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
1453 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1454 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
1455 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
1456 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
1457 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
1458 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
1459 %a = load <6 x half>, ptr %x
1460 %b = load <6 x half>, ptr %y
1461 %c = fneg <6 x half> %b
1462 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
1463 store <6 x half> %d, ptr %x
1467 define void @copysign_neg_v4f32(ptr %x, ptr %y) {
1468 ; ZVFH-LABEL: copysign_neg_v4f32:
1470 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1471 ; ZVFH-NEXT: vle32.v v8, (a0)
1472 ; ZVFH-NEXT: vle32.v v9, (a1)
1473 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1474 ; ZVFH-NEXT: vse32.v v8, (a0)
1477 ; ZVFHMIN-LABEL: copysign_neg_v4f32:
1479 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1480 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1481 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1482 ; ZVFHMIN-NEXT: vfsgnjn.vv v8, v8, v9
1483 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1485 %a = load <4 x float>, ptr %x
1486 %b = load <4 x float>, ptr %y
1487 %c = fneg <4 x float> %b
1488 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1489 store <4 x float> %d, ptr %x
1493 define void @copysign_neg_v2f64(ptr %x, ptr %y) {
1494 ; CHECK-LABEL: copysign_neg_v2f64:
1496 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1497 ; CHECK-NEXT: vle64.v v8, (a0)
1498 ; CHECK-NEXT: vle64.v v9, (a1)
1499 ; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
1500 ; CHECK-NEXT: vse64.v v8, (a0)
1502 %a = load <2 x double>, ptr %x
1503 %b = load <2 x double>, ptr %y
1504 %c = fneg <2 x double> %b
1505 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1506 store <2 x double> %d, ptr %x
1510 define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
1511 ; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
1513 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1514 ; ZVFH-NEXT: vle32.v v8, (a1)
1515 ; ZVFH-NEXT: vle16.v v9, (a0)
1516 ; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1517 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
1518 ; ZVFH-NEXT: vse16.v v8, (a0)
1521 ; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
1523 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1524 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1525 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1526 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1527 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
1528 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1529 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1530 ; ZVFHMIN-NEXT: vfneg.v v8, v9
1531 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1532 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1533 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1534 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1535 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8
1536 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1537 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1538 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1540 %a = load <4 x half>, ptr %x
1541 %b = load <4 x float>, ptr %y
1542 %c = fneg <4 x float> %b
1543 %d = fptrunc <4 x float> %c to <4 x half>
1544 %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d)
1545 store <4 x half> %e, ptr %x
1548 declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
1550 define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
1551 ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
1553 ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
1554 ; ZVFH-NEXT: vle32.v v8, (a1)
1555 ; ZVFH-NEXT: vle16.v v9, (a0)
1556 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1557 ; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1558 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
1559 ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
1560 ; ZVFH-NEXT: vse16.v v8, (a0)
1563 ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
1564 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
1565 ; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, -16
1566 ; ZVFHMINLMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16
1567 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1568 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
1569 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
1570 ; ZVFHMINLMULMAX2-RV32-NEXT: vle32.v v9, (a1)
1571 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1572 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8
1573 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9
1574 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
1575 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1576 ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9
1577 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1578 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
1579 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
1580 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1581 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v10, v8
1582 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1583 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
1584 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, sp, 8
1585 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a1)
1586 ; ZVFHMINLMULMAX2-RV32-NEXT: flh fa5, 12(sp)
1587 ; ZVFHMINLMULMAX2-RV32-NEXT: fsh fa5, 4(a0)
1588 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1589 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a0)
1590 ; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, 16
1591 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
1593 ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
1594 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
1595 ; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, -16
1596 ; ZVFHMINLMULMAX2-RV64-NEXT: .cfi_def_cfa_offset 16
1597 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1598 ; ZVFHMINLMULMAX2-RV64-NEXT: vle64.v v8, (a0)
1599 ; ZVFHMINLMULMAX2-RV64-NEXT: mv a2, sp
1600 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a2)
1601 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1602 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2)
1603 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
1604 ; ZVFHMINLMULMAX2-RV64-NEXT: vle32.v v9, (a1)
1605 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1606 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8
1607 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9
1608 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
1609 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1610 ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9
1611 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1612 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
1613 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
1614 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1615 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v10, v8
1616 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1617 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
1618 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a1, sp, 8
1619 ; ZVFHMINLMULMAX2-RV64-NEXT: vse16.v v9, (a1)
1620 ; ZVFHMINLMULMAX2-RV64-NEXT: flh fa5, 12(sp)
1621 ; ZVFHMINLMULMAX2-RV64-NEXT: fsh fa5, 4(a0)
1622 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1623 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v9, (a0)
1624 ; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, 16
1625 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
1627 ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
1628 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
1629 ; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, -16
1630 ; ZVFHMINLMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16
1631 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1632 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
1633 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
1634 ; ZVFHMINLMULMAX1-RV32-NEXT: vle32.v v9, (a1)
1635 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1636 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8
1637 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9
1638 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
1639 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1640 ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9
1641 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1642 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
1643 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
1644 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1645 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v10, v8
1646 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1647 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
1648 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, sp, 8
1649 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a1)
1650 ; ZVFHMINLMULMAX1-RV32-NEXT: flh fa5, 12(sp)
1651 ; ZVFHMINLMULMAX1-RV32-NEXT: fsh fa5, 4(a0)
1652 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1653 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a0)
1654 ; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, 16
1655 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
1657 ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
1658 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
1659 ; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, -16
1660 ; ZVFHMINLMULMAX1-RV64-NEXT: .cfi_def_cfa_offset 16
1661 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1662 ; ZVFHMINLMULMAX1-RV64-NEXT: vle64.v v8, (a0)
1663 ; ZVFHMINLMULMAX1-RV64-NEXT: mv a2, sp
1664 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a2)
1665 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1666 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2)
1667 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
1668 ; ZVFHMINLMULMAX1-RV64-NEXT: vle32.v v9, (a1)
1669 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1670 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8
1671 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9
1672 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
1673 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1674 ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9
1675 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1676 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
1677 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
1678 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1679 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v10, v8
1680 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1681 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
1682 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a1, sp, 8
1683 ; ZVFHMINLMULMAX1-RV64-NEXT: vse16.v v9, (a1)
1684 ; ZVFHMINLMULMAX1-RV64-NEXT: flh fa5, 12(sp)
1685 ; ZVFHMINLMULMAX1-RV64-NEXT: fsh fa5, 4(a0)
1686 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1687 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v9, (a0)
1688 ; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, 16
1689 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
1690 %a = load <3 x half>, ptr %x
1691 %b = load <3 x float>, ptr %y
1692 %c = fneg <3 x float> %b
1693 %d = fptrunc <3 x float> %c to <3 x half>
1694 %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d)
1695 store <3 x half> %e, ptr %x
1698 declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
1700 define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
1701 ; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
1703 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1704 ; CHECK-NEXT: vle32.v v8, (a1)
1705 ; CHECK-NEXT: vle64.v v9, (a0)
1706 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8
1707 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1708 ; CHECK-NEXT: vfsgnjn.vv v8, v9, v10
1709 ; CHECK-NEXT: vse64.v v8, (a0)
1711 %a = load <2 x double>, ptr %x
1712 %b = load <2 x float>, ptr %y
1713 %c = fneg <2 x float> %b
1714 %d = fpext <2 x float> %c to <2 x double>
1715 %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d)
1716 store <2 x double> %e, ptr %x
1720 define void @sqrt_v8f16(ptr %x) {
1721 ; ZVFH-LABEL: sqrt_v8f16:
1723 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1724 ; ZVFH-NEXT: vle16.v v8, (a0)
1725 ; ZVFH-NEXT: vfsqrt.v v8, v8
1726 ; ZVFH-NEXT: vse16.v v8, (a0)
1729 ; ZVFHMIN-LABEL: sqrt_v8f16:
1731 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1732 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1733 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1734 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1735 ; ZVFHMIN-NEXT: vfsqrt.v v8, v9
1736 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1737 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1738 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1740 %a = load <8 x half>, ptr %x
1741 %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
1742 store <8 x half> %b, ptr %x
1745 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
1747 define void @sqrt_v6f16(ptr %x) {
1748 ; ZVFH-LABEL: sqrt_v6f16:
1750 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1751 ; ZVFH-NEXT: vle16.v v8, (a0)
1752 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1753 ; ZVFH-NEXT: vfsqrt.v v8, v8
1754 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1755 ; ZVFH-NEXT: vse16.v v8, (a0)
1758 ; ZVFHMINLMULMAX2-RV32-LABEL: sqrt_v6f16:
1759 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
1760 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1761 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
1762 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
1763 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1764 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsqrt.v v8, v9
1765 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1766 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
1767 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
1768 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1769 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
1770 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
1771 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1772 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
1773 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
1775 ; ZVFHMINLMULMAX2-RV64-LABEL: sqrt_v6f16:
1776 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
1777 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1778 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
1779 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
1780 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1781 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsqrt.v v8, v9
1782 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1783 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
1784 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1785 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
1786 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
1787 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
1788 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
1789 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
1791 ; ZVFHMINLMULMAX1-RV32-LABEL: sqrt_v6f16:
1792 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
1793 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1794 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
1795 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
1796 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1797 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsqrt.v v8, v9
1798 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1799 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
1800 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
1801 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1802 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
1803 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
1804 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1805 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
1806 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
1808 ; ZVFHMINLMULMAX1-RV64-LABEL: sqrt_v6f16:
1809 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
1810 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1811 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
1812 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
1813 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1814 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsqrt.v v8, v9
1815 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1816 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
1817 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1818 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
1819 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
1820 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
1821 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
1822 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
1823 %a = load <6 x half>, ptr %x
1824 %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
1825 store <6 x half> %b, ptr %x
1828 declare <6 x half> @llvm.sqrt.v6f16(<6 x half>)
1830 define void @sqrt_v4f32(ptr %x) {
1831 ; ZVFH-LABEL: sqrt_v4f32:
1833 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1834 ; ZVFH-NEXT: vle32.v v8, (a0)
1835 ; ZVFH-NEXT: vfsqrt.v v8, v8
1836 ; ZVFH-NEXT: vse32.v v8, (a0)
1839 ; ZVFHMIN-LABEL: sqrt_v4f32:
1841 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1842 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1843 ; ZVFHMIN-NEXT: vfsqrt.v v8, v8
1844 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1846 %a = load <4 x float>, ptr %x
1847 %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
1848 store <4 x float> %b, ptr %x
1851 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1853 define void @sqrt_v2f64(ptr %x) {
1854 ; CHECK-LABEL: sqrt_v2f64:
1856 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1857 ; CHECK-NEXT: vle64.v v8, (a0)
1858 ; CHECK-NEXT: vfsqrt.v v8, v8
1859 ; CHECK-NEXT: vse64.v v8, (a0)
1861 %a = load <2 x double>, ptr %x
1862 %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
1863 store <2 x double> %b, ptr %x
1866 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1868 define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
1869 ; ZVFH-LABEL: fma_v8f16:
1871 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1872 ; ZVFH-NEXT: vle16.v v8, (a0)
1873 ; ZVFH-NEXT: vle16.v v9, (a1)
1874 ; ZVFH-NEXT: vle16.v v10, (a2)
1875 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1876 ; ZVFH-NEXT: vse16.v v10, (a0)
1879 ; ZVFHMIN-LABEL: fma_v8f16:
1881 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1882 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1883 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1884 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1885 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
1886 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1887 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
1888 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1889 ; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11
1890 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1891 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
1892 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1894 %a = load <8 x half>, ptr %x
1895 %b = load <8 x half>, ptr %y
1896 %c = load <8 x half>, ptr %z
1897 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
1898 store <8 x half> %d, ptr %x
1901 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1903 define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
1904 ; ZVFH-LABEL: fma_v6f16:
1906 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1907 ; ZVFH-NEXT: vle16.v v8, (a0)
1908 ; ZVFH-NEXT: vle16.v v9, (a1)
1909 ; ZVFH-NEXT: vle16.v v10, (a2)
1910 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1911 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1912 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1913 ; ZVFH-NEXT: vse16.v v10, (a0)
1916 ; ZVFHMINLMULMAX2-RV32-LABEL: fma_v6f16:
1917 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
1918 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1919 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2)
1920 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
1921 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1)
1922 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8
1923 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
1924 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10
1925 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1926 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v9, v8, v11
1927 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1928 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9
1929 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1930 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2
1931 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
1932 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1)
1933 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1934 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0)
1935 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
1937 ; ZVFHMINLMULMAX2-RV64-LABEL: fma_v6f16:
1938 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
1939 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1940 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2)
1941 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
1942 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1)
1943 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8
1944 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
1945 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10
1946 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1947 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v9, v8, v11
1948 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1949 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9
1950 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1951 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0)
1952 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2
1953 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
1954 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
1955 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
1957 ; ZVFHMINLMULMAX1-RV32-LABEL: fma_v6f16:
1958 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
1959 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1960 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2)
1961 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
1962 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1)
1963 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8
1964 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
1965 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10
1966 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1967 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v9, v8, v11
1968 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1969 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9
1970 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1971 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
1972 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
1973 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1)
1974 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1975 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0)
1976 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
1978 ; ZVFHMINLMULMAX1-RV64-LABEL: fma_v6f16:
1979 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
1980 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1981 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2)
1982 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
1983 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1)
1984 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8
1985 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
1986 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10
1987 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1988 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v9, v8, v11
1989 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1990 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9
1991 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1992 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0)
1993 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
1994 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
1995 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
1996 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
1997 %a = load <6 x half>, ptr %x
1998 %b = load <6 x half>, ptr %y
1999 %c = load <6 x half>, ptr %z
2000 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
2001 store <6 x half> %d, ptr %x
2004 declare <6 x half> @llvm.fma.v6f16(<6 x half>, <6 x half>, <6 x half>)
2006 define void @fma_v4f32(ptr %x, ptr %y, ptr %z) {
2007 ; ZVFH-LABEL: fma_v4f32:
2009 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2010 ; ZVFH-NEXT: vle32.v v8, (a0)
2011 ; ZVFH-NEXT: vle32.v v9, (a1)
2012 ; ZVFH-NEXT: vle32.v v10, (a2)
2013 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
2014 ; ZVFH-NEXT: vse32.v v10, (a0)
2017 ; ZVFHMIN-LABEL: fma_v4f32:
2019 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2020 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2021 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2022 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
2023 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
2024 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
2026 %a = load <4 x float>, ptr %x
2027 %b = load <4 x float>, ptr %y
2028 %c = load <4 x float>, ptr %z
2029 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
2030 store <4 x float> %d, ptr %x
2033 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2035 define void @fma_v2f64(ptr %x, ptr %y, ptr %z) {
2036 ; CHECK-LABEL: fma_v2f64:
2038 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2039 ; CHECK-NEXT: vle64.v v8, (a0)
2040 ; CHECK-NEXT: vle64.v v9, (a1)
2041 ; CHECK-NEXT: vle64.v v10, (a2)
2042 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
2043 ; CHECK-NEXT: vse64.v v10, (a0)
2045 %a = load <2 x double>, ptr %x
2046 %b = load <2 x double>, ptr %y
2047 %c = load <2 x double>, ptr %z
2048 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
2049 store <2 x double> %d, ptr %x
2052 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
2054 define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
2055 ; ZVFH-LABEL: fmsub_v8f16:
2057 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2058 ; ZVFH-NEXT: vle16.v v8, (a0)
2059 ; ZVFH-NEXT: vle16.v v9, (a1)
2060 ; ZVFH-NEXT: vle16.v v10, (a2)
2061 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
2062 ; ZVFH-NEXT: vse16.v v10, (a0)
2065 ; ZVFHMIN-LABEL: fmsub_v8f16:
2067 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2068 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
2069 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
2070 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
2071 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
2072 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2073 ; ZVFHMIN-NEXT: vfneg.v v8, v11
2074 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2075 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
2076 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
2077 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
2078 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
2079 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2080 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
2081 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2082 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
2083 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
2085 %a = load <8 x half>, ptr %x
2086 %b = load <8 x half>, ptr %y
2087 %c = load <8 x half>, ptr %z
2088 %neg = fneg <8 x half> %c
2089 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
2090 store <8 x half> %d, ptr %x
2094 define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
2095 ; ZVFH-LABEL: fmsub_v6f16:
2097 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2098 ; ZVFH-NEXT: vle16.v v8, (a0)
2099 ; ZVFH-NEXT: vle16.v v9, (a1)
2100 ; ZVFH-NEXT: vle16.v v10, (a2)
2101 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2102 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
2103 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2104 ; ZVFH-NEXT: vse16.v v10, (a0)
2107 ; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_v6f16:
2108 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
2109 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2110 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2)
2111 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
2112 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1)
2113 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8
2114 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2115 ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v11
2116 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2117 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v8
2118 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
2119 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10
2120 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v11
2121 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2122 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v10, v8, v9
2123 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2124 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v10
2125 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2126 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2
2127 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
2128 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1)
2129 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2130 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0)
2131 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
2133 ; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_v6f16:
2134 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
2135 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2136 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2)
2137 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
2138 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1)
2139 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8
2140 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2141 ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v11
2142 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2143 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v8
2144 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
2145 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10
2146 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v11
2147 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2148 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v10, v8, v9
2149 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2150 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v10
2151 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2152 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0)
2153 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2
2154 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
2155 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
2156 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
2158 ; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_v6f16:
2159 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
2160 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2161 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2)
2162 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
2163 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1)
2164 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8
2165 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2166 ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v11
2167 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2168 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v8
2169 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
2170 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10
2171 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v11
2172 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2173 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9
2174 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2175 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v10
2176 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2177 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
2178 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
2179 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1)
2180 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2181 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2182 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
2184 ; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_v6f16:
2185 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
2186 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2187 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2)
2188 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
2189 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1)
2190 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8
2191 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2192 ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v11
2193 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2194 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v8
2195 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
2196 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10
2197 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v11
2198 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2199 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9
2200 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2201 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v10
2202 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2203 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2204 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
2205 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
2206 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2207 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
2208 %a = load <6 x half>, ptr %x
2209 %b = load <6 x half>, ptr %y
2210 %c = load <6 x half>, ptr %z
2211 %neg = fneg <6 x half> %c
2212 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
2213 store <6 x half> %d, ptr %x
2217 define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) {
2218 ; ZVFH-LABEL: fnmsub_v4f32:
2220 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2221 ; ZVFH-NEXT: vle32.v v8, (a0)
2222 ; ZVFH-NEXT: vle32.v v9, (a1)
2223 ; ZVFH-NEXT: vle32.v v10, (a2)
2224 ; ZVFH-NEXT: vfnmsac.vv v10, v8, v9
2225 ; ZVFH-NEXT: vse32.v v10, (a0)
2228 ; ZVFHMIN-LABEL: fnmsub_v4f32:
2230 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2231 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2232 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2233 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
2234 ; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9
2235 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
2237 %a = load <4 x float>, ptr %x
2238 %b = load <4 x float>, ptr %y
2239 %c = load <4 x float>, ptr %z
2240 %neg = fneg <4 x float> %a
2241 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
2242 store <4 x float> %d, ptr %x
2246 define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) {
2247 ; CHECK-LABEL: fnmadd_v2f64:
2249 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2250 ; CHECK-NEXT: vle64.v v8, (a0)
2251 ; CHECK-NEXT: vle64.v v9, (a1)
2252 ; CHECK-NEXT: vle64.v v10, (a2)
2253 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
2254 ; CHECK-NEXT: vse64.v v10, (a0)
2256 %a = load <2 x double>, ptr %x
2257 %b = load <2 x double>, ptr %y
2258 %c = load <2 x double>, ptr %z
2259 %neg = fneg <2 x double> %b
2260 %neg2 = fneg <2 x double> %c
2261 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
2262 store <2 x double> %d, ptr %x
2266 define void @fadd_v16f16(ptr %x, ptr %y) {
2267 ; LMULMAX2-LABEL: fadd_v16f16:
2268 ; LMULMAX2: # %bb.0:
2269 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2270 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2271 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2272 ; LMULMAX2-NEXT: vfadd.vv v8, v8, v10
2273 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2274 ; LMULMAX2-NEXT: ret
2276 ; LMULMAX1-RV32-LABEL: fadd_v16f16:
2277 ; LMULMAX1-RV32: # %bb.0:
2278 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2279 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2280 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2281 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2282 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2283 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2284 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2285 ; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10
2286 ; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11
2287 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2288 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2289 ; LMULMAX1-RV32-NEXT: ret
2291 ; LMULMAX1-RV64-LABEL: fadd_v16f16:
2292 ; LMULMAX1-RV64: # %bb.0:
2293 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2294 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2295 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2296 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2297 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2298 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2299 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2300 ; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9
2301 ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11
2302 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2303 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2304 ; LMULMAX1-RV64-NEXT: ret
2306 ; ZVFHMINLMULMAX2-LABEL: fadd_v16f16:
2307 ; ZVFHMINLMULMAX2: # %bb.0:
2308 ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2309 ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1)
2310 ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0)
2311 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8
2312 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9
2313 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2314 ; ZVFHMINLMULMAX2-NEXT: vfadd.vv v8, v12, v10
2315 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2316 ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8
2317 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0)
2318 ; ZVFHMINLMULMAX2-NEXT: ret
2319 %a = load <16 x half>, ptr %x
2320 %b = load <16 x half>, ptr %y
2321 %c = fadd <16 x half> %a, %b
2322 store <16 x half> %c, ptr %x
2326 define void @fadd_v8f32(ptr %x, ptr %y) {
2327 ; LMULMAX2-LABEL: fadd_v8f32:
2328 ; LMULMAX2: # %bb.0:
2329 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2330 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2331 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2332 ; LMULMAX2-NEXT: vfadd.vv v8, v8, v10
2333 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2334 ; LMULMAX2-NEXT: ret
2336 ; LMULMAX1-RV32-LABEL: fadd_v8f32:
2337 ; LMULMAX1-RV32: # %bb.0:
2338 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2339 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2340 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2341 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2342 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2343 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2344 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2345 ; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10
2346 ; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11
2347 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2348 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2349 ; LMULMAX1-RV32-NEXT: ret
2351 ; LMULMAX1-RV64-LABEL: fadd_v8f32:
2352 ; LMULMAX1-RV64: # %bb.0:
2353 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2354 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2355 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2356 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2357 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2358 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2359 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2360 ; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9
2361 ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11
2362 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2363 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2364 ; LMULMAX1-RV64-NEXT: ret
2366 ; ZVFHMIN-LABEL: fadd_v8f32:
2368 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2369 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2370 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2371 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
2372 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2374 %a = load <8 x float>, ptr %x
2375 %b = load <8 x float>, ptr %y
2376 %c = fadd <8 x float> %a, %b
2377 store <8 x float> %c, ptr %x
2381 define void @fadd_v4f64(ptr %x, ptr %y) {
2382 ; LMULMAX2-LABEL: fadd_v4f64:
2383 ; LMULMAX2: # %bb.0:
2384 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2385 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2386 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2387 ; LMULMAX2-NEXT: vfadd.vv v8, v8, v10
2388 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2389 ; LMULMAX2-NEXT: ret
2391 ; LMULMAX1-RV32-LABEL: fadd_v4f64:
2392 ; LMULMAX1-RV32: # %bb.0:
2393 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2394 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2395 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2396 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2397 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2398 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2399 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2400 ; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10
2401 ; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11
2402 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2403 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2404 ; LMULMAX1-RV32-NEXT: ret
2406 ; LMULMAX1-RV64-LABEL: fadd_v4f64:
2407 ; LMULMAX1-RV64: # %bb.0:
2408 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2409 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2410 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2411 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2412 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2413 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2414 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2415 ; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9
2416 ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11
2417 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2418 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2419 ; LMULMAX1-RV64-NEXT: ret
2421 ; ZVFHMIN-LABEL: fadd_v4f64:
2423 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2424 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2425 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
2426 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
2427 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
2429 %a = load <4 x double>, ptr %x
2430 %b = load <4 x double>, ptr %y
2431 %c = fadd <4 x double> %a, %b
2432 store <4 x double> %c, ptr %x
2436 define void @fsub_v16f16(ptr %x, ptr %y) {
2437 ; LMULMAX2-LABEL: fsub_v16f16:
2438 ; LMULMAX2: # %bb.0:
2439 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2440 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2441 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2442 ; LMULMAX2-NEXT: vfsub.vv v8, v8, v10
2443 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2444 ; LMULMAX2-NEXT: ret
2446 ; LMULMAX1-RV32-LABEL: fsub_v16f16:
2447 ; LMULMAX1-RV32: # %bb.0:
2448 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2449 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2450 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2451 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2452 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2453 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2454 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2455 ; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10
2456 ; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11
2457 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2458 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2459 ; LMULMAX1-RV32-NEXT: ret
2461 ; LMULMAX1-RV64-LABEL: fsub_v16f16:
2462 ; LMULMAX1-RV64: # %bb.0:
2463 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2464 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2465 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2466 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2467 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2468 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2469 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2470 ; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9
2471 ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11
2472 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2473 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2474 ; LMULMAX1-RV64-NEXT: ret
2476 ; ZVFHMINLMULMAX2-LABEL: fsub_v16f16:
2477 ; ZVFHMINLMULMAX2: # %bb.0:
2478 ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2479 ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1)
2480 ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0)
2481 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8
2482 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9
2483 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2484 ; ZVFHMINLMULMAX2-NEXT: vfsub.vv v8, v12, v10
2485 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2486 ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8
2487 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0)
2488 ; ZVFHMINLMULMAX2-NEXT: ret
2489 %a = load <16 x half>, ptr %x
2490 %b = load <16 x half>, ptr %y
2491 %c = fsub <16 x half> %a, %b
2492 store <16 x half> %c, ptr %x
2496 define void @fsub_v8f32(ptr %x, ptr %y) {
2497 ; LMULMAX2-LABEL: fsub_v8f32:
2498 ; LMULMAX2: # %bb.0:
2499 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2500 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2501 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2502 ; LMULMAX2-NEXT: vfsub.vv v8, v8, v10
2503 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2504 ; LMULMAX2-NEXT: ret
2506 ; LMULMAX1-RV32-LABEL: fsub_v8f32:
2507 ; LMULMAX1-RV32: # %bb.0:
2508 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2509 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2510 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2511 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2512 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2513 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2514 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2515 ; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10
2516 ; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11
2517 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2518 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2519 ; LMULMAX1-RV32-NEXT: ret
2521 ; LMULMAX1-RV64-LABEL: fsub_v8f32:
2522 ; LMULMAX1-RV64: # %bb.0:
2523 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2524 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2525 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2526 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2527 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2528 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2529 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2530 ; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9
2531 ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11
2532 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2533 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2534 ; LMULMAX1-RV64-NEXT: ret
2536 ; ZVFHMIN-LABEL: fsub_v8f32:
2538 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2539 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2540 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2541 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
2542 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2544 %a = load <8 x float>, ptr %x
2545 %b = load <8 x float>, ptr %y
2546 %c = fsub <8 x float> %a, %b
2547 store <8 x float> %c, ptr %x
2551 define void @fsub_v4f64(ptr %x, ptr %y) {
2552 ; LMULMAX2-LABEL: fsub_v4f64:
2553 ; LMULMAX2: # %bb.0:
2554 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2555 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2556 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2557 ; LMULMAX2-NEXT: vfsub.vv v8, v8, v10
2558 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2559 ; LMULMAX2-NEXT: ret
2561 ; LMULMAX1-RV32-LABEL: fsub_v4f64:
2562 ; LMULMAX1-RV32: # %bb.0:
2563 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2564 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2565 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2566 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2567 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2568 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2569 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2570 ; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10
2571 ; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11
2572 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2573 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2574 ; LMULMAX1-RV32-NEXT: ret
2576 ; LMULMAX1-RV64-LABEL: fsub_v4f64:
2577 ; LMULMAX1-RV64: # %bb.0:
2578 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2579 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2580 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2581 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2582 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2583 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2584 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2585 ; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9
2586 ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11
2587 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2588 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2589 ; LMULMAX1-RV64-NEXT: ret
2591 ; ZVFHMIN-LABEL: fsub_v4f64:
2593 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2594 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2595 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
2596 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
2597 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
2599 %a = load <4 x double>, ptr %x
2600 %b = load <4 x double>, ptr %y
2601 %c = fsub <4 x double> %a, %b
2602 store <4 x double> %c, ptr %x
2606 define void @fmul_v16f16(ptr %x, ptr %y) {
2607 ; LMULMAX2-LABEL: fmul_v16f16:
2608 ; LMULMAX2: # %bb.0:
2609 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2610 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2611 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2612 ; LMULMAX2-NEXT: vfmul.vv v8, v8, v10
2613 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2614 ; LMULMAX2-NEXT: ret
2616 ; LMULMAX1-RV32-LABEL: fmul_v16f16:
2617 ; LMULMAX1-RV32: # %bb.0:
2618 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2619 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2620 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2621 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2622 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2623 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2624 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2625 ; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10
2626 ; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11
2627 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2628 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2629 ; LMULMAX1-RV32-NEXT: ret
2631 ; LMULMAX1-RV64-LABEL: fmul_v16f16:
2632 ; LMULMAX1-RV64: # %bb.0:
2633 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2634 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2635 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2636 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2637 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2638 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2639 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2640 ; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9
2641 ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11
2642 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2643 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2644 ; LMULMAX1-RV64-NEXT: ret
2646 ; ZVFHMINLMULMAX2-LABEL: fmul_v16f16:
2647 ; ZVFHMINLMULMAX2: # %bb.0:
2648 ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2649 ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1)
2650 ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0)
2651 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8
2652 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9
2653 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2654 ; ZVFHMINLMULMAX2-NEXT: vfmul.vv v8, v12, v10
2655 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2656 ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8
2657 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0)
2658 ; ZVFHMINLMULMAX2-NEXT: ret
2659 %a = load <16 x half>, ptr %x
2660 %b = load <16 x half>, ptr %y
2661 %c = fmul <16 x half> %a, %b
2662 store <16 x half> %c, ptr %x
2666 define void @fmul_v8f32(ptr %x, ptr %y) {
2667 ; LMULMAX2-LABEL: fmul_v8f32:
2668 ; LMULMAX2: # %bb.0:
2669 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2670 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2671 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2672 ; LMULMAX2-NEXT: vfmul.vv v8, v8, v10
2673 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2674 ; LMULMAX2-NEXT: ret
2676 ; LMULMAX1-RV32-LABEL: fmul_v8f32:
2677 ; LMULMAX1-RV32: # %bb.0:
2678 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2679 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2680 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2681 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2682 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2683 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2684 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2685 ; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10
2686 ; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11
2687 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2688 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2689 ; LMULMAX1-RV32-NEXT: ret
2691 ; LMULMAX1-RV64-LABEL: fmul_v8f32:
2692 ; LMULMAX1-RV64: # %bb.0:
2693 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2694 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2695 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2696 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2697 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2698 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2699 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2700 ; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9
2701 ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11
2702 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2703 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2704 ; LMULMAX1-RV64-NEXT: ret
2706 ; ZVFHMIN-LABEL: fmul_v8f32:
2708 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2709 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2710 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2711 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
2712 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2714 %a = load <8 x float>, ptr %x
2715 %b = load <8 x float>, ptr %y
2716 %c = fmul <8 x float> %a, %b
2717 store <8 x float> %c, ptr %x
2721 define void @fmul_v4f64(ptr %x, ptr %y) {
2722 ; LMULMAX2-LABEL: fmul_v4f64:
2723 ; LMULMAX2: # %bb.0:
2724 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2725 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2726 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2727 ; LMULMAX2-NEXT: vfmul.vv v8, v8, v10
2728 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2729 ; LMULMAX2-NEXT: ret
2731 ; LMULMAX1-RV32-LABEL: fmul_v4f64:
2732 ; LMULMAX1-RV32: # %bb.0:
2733 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2734 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2735 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2736 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2737 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2738 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2739 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2740 ; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10
2741 ; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11
2742 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2743 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2744 ; LMULMAX1-RV32-NEXT: ret
2746 ; LMULMAX1-RV64-LABEL: fmul_v4f64:
2747 ; LMULMAX1-RV64: # %bb.0:
2748 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2749 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2750 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2751 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2752 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2753 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2754 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2755 ; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9
2756 ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11
2757 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2758 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2759 ; LMULMAX1-RV64-NEXT: ret
2761 ; ZVFHMIN-LABEL: fmul_v4f64:
2763 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2764 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2765 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
2766 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
2767 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
2769 %a = load <4 x double>, ptr %x
2770 %b = load <4 x double>, ptr %y
2771 %c = fmul <4 x double> %a, %b
2772 store <4 x double> %c, ptr %x
2776 define void @fdiv_v16f16(ptr %x, ptr %y) {
2777 ; LMULMAX2-LABEL: fdiv_v16f16:
2778 ; LMULMAX2: # %bb.0:
2779 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2780 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2781 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2782 ; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10
2783 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2784 ; LMULMAX2-NEXT: ret
2786 ; LMULMAX1-RV32-LABEL: fdiv_v16f16:
2787 ; LMULMAX1-RV32: # %bb.0:
2788 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2789 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2790 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2791 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2792 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2793 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2794 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2795 ; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10
2796 ; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11
2797 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2798 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2799 ; LMULMAX1-RV32-NEXT: ret
2801 ; LMULMAX1-RV64-LABEL: fdiv_v16f16:
2802 ; LMULMAX1-RV64: # %bb.0:
2803 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2804 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2805 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2806 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2807 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2808 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2809 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2810 ; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9
2811 ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11
2812 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2813 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2814 ; LMULMAX1-RV64-NEXT: ret
2816 ; ZVFHMINLMULMAX2-LABEL: fdiv_v16f16:
2817 ; ZVFHMINLMULMAX2: # %bb.0:
2818 ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2819 ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1)
2820 ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0)
2821 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8
2822 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9
2823 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2824 ; ZVFHMINLMULMAX2-NEXT: vfdiv.vv v8, v12, v10
2825 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2826 ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8
2827 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0)
2828 ; ZVFHMINLMULMAX2-NEXT: ret
2829 %a = load <16 x half>, ptr %x
2830 %b = load <16 x half>, ptr %y
2831 %c = fdiv <16 x half> %a, %b
2832 store <16 x half> %c, ptr %x
2836 define void @fdiv_v8f32(ptr %x, ptr %y) {
2837 ; LMULMAX2-LABEL: fdiv_v8f32:
2838 ; LMULMAX2: # %bb.0:
2839 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2840 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2841 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2842 ; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10
2843 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2844 ; LMULMAX2-NEXT: ret
2846 ; LMULMAX1-RV32-LABEL: fdiv_v8f32:
2847 ; LMULMAX1-RV32: # %bb.0:
2848 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2849 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2850 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2851 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2852 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2853 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2854 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2855 ; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10
2856 ; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11
2857 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2858 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2859 ; LMULMAX1-RV32-NEXT: ret
2861 ; LMULMAX1-RV64-LABEL: fdiv_v8f32:
2862 ; LMULMAX1-RV64: # %bb.0:
2863 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2864 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2865 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2866 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2867 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2868 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2869 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2870 ; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9
2871 ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11
2872 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2873 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2874 ; LMULMAX1-RV64-NEXT: ret
2876 ; ZVFHMIN-LABEL: fdiv_v8f32:
2878 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2879 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2880 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2881 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
2882 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2884 %a = load <8 x float>, ptr %x
2885 %b = load <8 x float>, ptr %y
2886 %c = fdiv <8 x float> %a, %b
2887 store <8 x float> %c, ptr %x
2891 define void @fdiv_v4f64(ptr %x, ptr %y) {
2892 ; LMULMAX2-LABEL: fdiv_v4f64:
2893 ; LMULMAX2: # %bb.0:
2894 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2895 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2896 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2897 ; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10
2898 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2899 ; LMULMAX2-NEXT: ret
2901 ; LMULMAX1-RV32-LABEL: fdiv_v4f64:
2902 ; LMULMAX1-RV32: # %bb.0:
2903 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2904 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2905 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2906 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2907 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2908 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2909 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2910 ; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10
2911 ; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11
2912 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2913 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2914 ; LMULMAX1-RV32-NEXT: ret
2916 ; LMULMAX1-RV64-LABEL: fdiv_v4f64:
2917 ; LMULMAX1-RV64: # %bb.0:
2918 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2919 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2920 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2921 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2922 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2923 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2924 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2925 ; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9
2926 ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11
2927 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2928 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2929 ; LMULMAX1-RV64-NEXT: ret
2931 ; ZVFHMIN-LABEL: fdiv_v4f64:
2933 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2934 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2935 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
2936 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
2937 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
2939 %a = load <4 x double>, ptr %x
2940 %b = load <4 x double>, ptr %y
2941 %c = fdiv <4 x double> %a, %b
2942 store <4 x double> %c, ptr %x
2946 define void @fneg_v16f16(ptr %x) {
2947 ; LMULMAX2-LABEL: fneg_v16f16:
2948 ; LMULMAX2: # %bb.0:
2949 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2950 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2951 ; LMULMAX2-NEXT: vfneg.v v8, v8
2952 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2953 ; LMULMAX2-NEXT: ret
2955 ; LMULMAX1-LABEL: fneg_v16f16:
2956 ; LMULMAX1: # %bb.0:
2957 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2958 ; LMULMAX1-NEXT: addi a1, a0, 16
2959 ; LMULMAX1-NEXT: vle16.v v8, (a1)
2960 ; LMULMAX1-NEXT: vle16.v v9, (a0)
2961 ; LMULMAX1-NEXT: vfneg.v v8, v8
2962 ; LMULMAX1-NEXT: vfneg.v v9, v9
2963 ; LMULMAX1-NEXT: vse16.v v9, (a0)
2964 ; LMULMAX1-NEXT: vse16.v v8, (a1)
2965 ; LMULMAX1-NEXT: ret
2967 ; ZVFHMINLMULMAX2-LABEL: fneg_v16f16:
2968 ; ZVFHMINLMULMAX2: # %bb.0:
2969 ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2970 ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a0)
2971 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8
2972 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2973 ; ZVFHMINLMULMAX2-NEXT: vfneg.v v8, v10
2974 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2975 ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8
2976 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0)
2977 ; ZVFHMINLMULMAX2-NEXT: ret
2978 %a = load <16 x half>, ptr %x
2979 %b = fneg <16 x half> %a
2980 store <16 x half> %b, ptr %x
2984 define void @fneg_v8f32(ptr %x) {
2985 ; LMULMAX2-LABEL: fneg_v8f32:
2986 ; LMULMAX2: # %bb.0:
2987 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2988 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2989 ; LMULMAX2-NEXT: vfneg.v v8, v8
2990 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2991 ; LMULMAX2-NEXT: ret
2993 ; LMULMAX1-LABEL: fneg_v8f32:
2994 ; LMULMAX1: # %bb.0:
2995 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2996 ; LMULMAX1-NEXT: addi a1, a0, 16
2997 ; LMULMAX1-NEXT: vle32.v v8, (a1)
2998 ; LMULMAX1-NEXT: vle32.v v9, (a0)
2999 ; LMULMAX1-NEXT: vfneg.v v8, v8
3000 ; LMULMAX1-NEXT: vfneg.v v9, v9
3001 ; LMULMAX1-NEXT: vse32.v v9, (a0)
3002 ; LMULMAX1-NEXT: vse32.v v8, (a1)
3003 ; LMULMAX1-NEXT: ret
3005 ; ZVFHMIN-LABEL: fneg_v8f32:
3007 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
3008 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3009 ; ZVFHMIN-NEXT: vfneg.v v8, v8
3010 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3012 %a = load <8 x float>, ptr %x
3013 %b = fneg <8 x float> %a
3014 store <8 x float> %b, ptr %x
3018 define void @fneg_v4f64(ptr %x) {
3019 ; LMULMAX2-LABEL: fneg_v4f64:
3020 ; LMULMAX2: # %bb.0:
3021 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3022 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3023 ; LMULMAX2-NEXT: vfneg.v v8, v8
3024 ; LMULMAX2-NEXT: vse64.v v8, (a0)
3025 ; LMULMAX2-NEXT: ret
3027 ; LMULMAX1-LABEL: fneg_v4f64:
3028 ; LMULMAX1: # %bb.0:
3029 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3030 ; LMULMAX1-NEXT: addi a1, a0, 16
3031 ; LMULMAX1-NEXT: vle64.v v8, (a1)
3032 ; LMULMAX1-NEXT: vle64.v v9, (a0)
3033 ; LMULMAX1-NEXT: vfneg.v v8, v8
3034 ; LMULMAX1-NEXT: vfneg.v v9, v9
3035 ; LMULMAX1-NEXT: vse64.v v9, (a0)
3036 ; LMULMAX1-NEXT: vse64.v v8, (a1)
3037 ; LMULMAX1-NEXT: ret
3039 ; ZVFHMIN-LABEL: fneg_v4f64:
3041 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
3042 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
3043 ; ZVFHMIN-NEXT: vfneg.v v8, v8
3044 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
3046 %a = load <4 x double>, ptr %x
3047 %b = fneg <4 x double> %a
3048 store <4 x double> %b, ptr %x
3052 define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
3053 ; LMULMAX2-LABEL: fma_v16f16:
3054 ; LMULMAX2: # %bb.0:
3055 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3056 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3057 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3058 ; LMULMAX2-NEXT: vle16.v v12, (a2)
3059 ; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10
3060 ; LMULMAX2-NEXT: vse16.v v12, (a0)
3061 ; LMULMAX2-NEXT: ret
3063 ; LMULMAX1-LABEL: fma_v16f16:
3064 ; LMULMAX1: # %bb.0:
3065 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3066 ; LMULMAX1-NEXT: vle16.v v8, (a0)
3067 ; LMULMAX1-NEXT: addi a3, a0, 16
3068 ; LMULMAX1-NEXT: vle16.v v9, (a3)
3069 ; LMULMAX1-NEXT: vle16.v v10, (a1)
3070 ; LMULMAX1-NEXT: addi a1, a1, 16
3071 ; LMULMAX1-NEXT: vle16.v v11, (a1)
3072 ; LMULMAX1-NEXT: addi a1, a2, 16
3073 ; LMULMAX1-NEXT: vle16.v v12, (a1)
3074 ; LMULMAX1-NEXT: vle16.v v13, (a2)
3075 ; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11
3076 ; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10
3077 ; LMULMAX1-NEXT: vse16.v v13, (a0)
3078 ; LMULMAX1-NEXT: vse16.v v12, (a3)
3079 ; LMULMAX1-NEXT: ret
3081 ; ZVFHMINLMULMAX2-LABEL: fma_v16f16:
3082 ; ZVFHMINLMULMAX2: # %bb.0:
3083 ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma
3084 ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a2)
3085 ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0)
3086 ; ZVFHMINLMULMAX2-NEXT: vle16.v v10, (a1)
3087 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v8
3088 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v14, v9
3089 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v8, v10
3090 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3091 ; ZVFHMINLMULMAX2-NEXT: vfmadd.vv v8, v14, v12
3092 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3093 ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8
3094 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0)
3095 ; ZVFHMINLMULMAX2-NEXT: ret
3096 %a = load <16 x half>, ptr %x
3097 %b = load <16 x half>, ptr %y
3098 %c = load <16 x half>, ptr %z
3099 %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
3100 store <16 x half> %d, ptr %x
3103 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
3105 define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
3106 ; LMULMAX2-LABEL: fma_v8f32:
3107 ; LMULMAX2: # %bb.0:
3108 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3109 ; LMULMAX2-NEXT: vle32.v v8, (a0)
3110 ; LMULMAX2-NEXT: vle32.v v10, (a1)
3111 ; LMULMAX2-NEXT: vle32.v v12, (a2)
3112 ; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10
3113 ; LMULMAX2-NEXT: vse32.v v12, (a0)
3114 ; LMULMAX2-NEXT: ret
3116 ; LMULMAX1-LABEL: fma_v8f32:
3117 ; LMULMAX1: # %bb.0:
3118 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3119 ; LMULMAX1-NEXT: vle32.v v8, (a0)
3120 ; LMULMAX1-NEXT: addi a3, a0, 16
3121 ; LMULMAX1-NEXT: vle32.v v9, (a3)
3122 ; LMULMAX1-NEXT: vle32.v v10, (a1)
3123 ; LMULMAX1-NEXT: addi a1, a1, 16
3124 ; LMULMAX1-NEXT: vle32.v v11, (a1)
3125 ; LMULMAX1-NEXT: addi a1, a2, 16
3126 ; LMULMAX1-NEXT: vle32.v v12, (a1)
3127 ; LMULMAX1-NEXT: vle32.v v13, (a2)
3128 ; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11
3129 ; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10
3130 ; LMULMAX1-NEXT: vse32.v v13, (a0)
3131 ; LMULMAX1-NEXT: vse32.v v12, (a3)
3132 ; LMULMAX1-NEXT: ret
3134 ; ZVFHMIN-LABEL: fma_v8f32:
3136 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
3137 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3138 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
3139 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
3140 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
3141 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
3143 %a = load <8 x float>, ptr %x
3144 %b = load <8 x float>, ptr %y
3145 %c = load <8 x float>, ptr %z
3146 %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
3147 store <8 x float> %d, ptr %x
3150 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
3152 define void @fma_v4f64(ptr %x, ptr %y, ptr %z) {
3153 ; LMULMAX2-LABEL: fma_v4f64:
3154 ; LMULMAX2: # %bb.0:
3155 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3156 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3157 ; LMULMAX2-NEXT: vle64.v v10, (a1)
3158 ; LMULMAX2-NEXT: vle64.v v12, (a2)
3159 ; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10
3160 ; LMULMAX2-NEXT: vse64.v v12, (a0)
3161 ; LMULMAX2-NEXT: ret
3163 ; LMULMAX1-LABEL: fma_v4f64:
3164 ; LMULMAX1: # %bb.0:
3165 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3166 ; LMULMAX1-NEXT: vle64.v v8, (a0)
3167 ; LMULMAX1-NEXT: addi a3, a0, 16
3168 ; LMULMAX1-NEXT: vle64.v v9, (a3)
3169 ; LMULMAX1-NEXT: vle64.v v10, (a1)
3170 ; LMULMAX1-NEXT: addi a1, a1, 16
3171 ; LMULMAX1-NEXT: vle64.v v11, (a1)
3172 ; LMULMAX1-NEXT: addi a1, a2, 16
3173 ; LMULMAX1-NEXT: vle64.v v12, (a1)
3174 ; LMULMAX1-NEXT: vle64.v v13, (a2)
3175 ; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11
3176 ; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10
3177 ; LMULMAX1-NEXT: vse64.v v13, (a0)
3178 ; LMULMAX1-NEXT: vse64.v v12, (a3)
3179 ; LMULMAX1-NEXT: ret
3181 ; ZVFHMIN-LABEL: fma_v4f64:
3183 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
3184 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
3185 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
3186 ; ZVFHMIN-NEXT: vle64.v v10, (a2)
3187 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
3188 ; ZVFHMIN-NEXT: vse64.v v10, (a0)
3190 %a = load <4 x double>, ptr %x
3191 %b = load <4 x double>, ptr %y
3192 %c = load <4 x double>, ptr %z
3193 %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
3194 store <4 x double> %d, ptr %x
3197 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
3199 define void @fadd_vf_v8f16(ptr %x, half %y) {
3200 ; ZVFH-LABEL: fadd_vf_v8f16:
3202 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3203 ; ZVFH-NEXT: vle16.v v8, (a0)
3204 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
3205 ; ZVFH-NEXT: vse16.v v8, (a0)
3208 ; ZVFHMIN-LABEL: fadd_vf_v8f16:
3210 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3211 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3212 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3213 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3214 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3215 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3216 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3217 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3218 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3219 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3220 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3221 ; ZVFHMIN-NEXT: vfadd.vv v8, v9, v8
3222 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3223 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3224 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3226 %a = load <8 x half>, ptr %x
3227 %b = insertelement <8 x half> poison, half %y, i32 0
3228 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3229 %d = fadd <8 x half> %a, %c
3230 store <8 x half> %d, ptr %x
3234 define void @fadd_vf_v6f16(ptr %x, half %y) {
3235 ; ZVFH-LABEL: fadd_vf_v6f16:
3237 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3238 ; ZVFH-NEXT: vle16.v v8, (a0)
3239 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3240 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
3241 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3242 ; ZVFH-NEXT: vse16.v v8, (a0)
3245 ; ZVFHMINLMULMAX2-RV32-LABEL: fadd_vf_v6f16:
3246 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
3247 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3248 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
3249 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
3250 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3251 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
3252 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3253 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
3254 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3255 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
3256 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
3257 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3258 ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v9, v8
3259 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3260 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
3261 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3262 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
3263 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
3264 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
3265 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3266 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
3267 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
3269 ; ZVFHMINLMULMAX2-RV64-LABEL: fadd_vf_v6f16:
3270 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
3271 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3272 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
3273 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
3274 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3275 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
3276 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3277 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
3278 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3279 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
3280 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
3281 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3282 ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v9, v8
3283 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3284 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
3285 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3286 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
3287 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
3288 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
3289 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
3290 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
3292 ; ZVFHMINLMULMAX1-RV32-LABEL: fadd_vf_v6f16:
3293 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
3294 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3295 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3296 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
3297 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3298 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
3299 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3300 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
3301 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3302 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
3303 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
3304 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3305 ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v9, v8
3306 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3307 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
3308 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3309 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
3310 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
3311 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
3312 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3313 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
3314 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
3316 ; ZVFHMINLMULMAX1-RV64-LABEL: fadd_vf_v6f16:
3317 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
3318 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3319 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3320 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
3321 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3322 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
3323 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3324 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
3325 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3326 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
3327 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
3328 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3329 ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v9, v8
3330 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3331 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
3332 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3333 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
3334 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
3335 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
3336 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3337 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
3338 %a = load <6 x half>, ptr %x
3339 %b = insertelement <6 x half> poison, half %y, i32 0
3340 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3341 %d = fadd <6 x half> %a, %c
3342 store <6 x half> %d, ptr %x
3346 define void @fadd_vf_v4f32(ptr %x, float %y) {
3347 ; ZVFH-LABEL: fadd_vf_v4f32:
3349 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3350 ; ZVFH-NEXT: vle32.v v8, (a0)
3351 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
3352 ; ZVFH-NEXT: vse32.v v8, (a0)
3355 ; ZVFHMIN-LABEL: fadd_vf_v4f32:
3357 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3358 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3359 ; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0
3360 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3362 %a = load <4 x float>, ptr %x
3363 %b = insertelement <4 x float> poison, float %y, i32 0
3364 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3365 %d = fadd <4 x float> %a, %c
3366 store <4 x float> %d, ptr %x
3370 define void @fadd_vf_v2f64(ptr %x, double %y) {
3371 ; CHECK-LABEL: fadd_vf_v2f64:
3373 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3374 ; CHECK-NEXT: vle64.v v8, (a0)
3375 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
3376 ; CHECK-NEXT: vse64.v v8, (a0)
3378 %a = load <2 x double>, ptr %x
3379 %b = insertelement <2 x double> poison, double %y, i32 0
3380 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3381 %d = fadd <2 x double> %a, %c
3382 store <2 x double> %d, ptr %x
3386 define void @fadd_fv_v8f16(ptr %x, half %y) {
3387 ; ZVFH-LABEL: fadd_fv_v8f16:
3389 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3390 ; ZVFH-NEXT: vle16.v v8, (a0)
3391 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
3392 ; ZVFH-NEXT: vse16.v v8, (a0)
3395 ; ZVFHMIN-LABEL: fadd_fv_v8f16:
3397 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3398 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3399 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3400 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3401 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3402 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3403 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3404 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3405 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3406 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3407 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3408 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
3409 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3410 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3411 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3413 %a = load <8 x half>, ptr %x
3414 %b = insertelement <8 x half> poison, half %y, i32 0
3415 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3416 %d = fadd <8 x half> %c, %a
3417 store <8 x half> %d, ptr %x
3421 define void @fadd_fv_v6f16(ptr %x, half %y) {
3422 ; ZVFH-LABEL: fadd_fv_v6f16:
3424 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3425 ; ZVFH-NEXT: vle16.v v8, (a0)
3426 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3427 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
3428 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3429 ; ZVFH-NEXT: vse16.v v8, (a0)
3432 ; ZVFHMINLMULMAX2-RV32-LABEL: fadd_fv_v6f16:
3433 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
3434 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3435 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
3436 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
3437 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3438 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
3439 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3440 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
3441 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3442 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
3443 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
3444 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3445 ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9
3446 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3447 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
3448 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3449 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
3450 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
3451 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
3452 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3453 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
3454 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
3456 ; ZVFHMINLMULMAX2-RV64-LABEL: fadd_fv_v6f16:
3457 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
3458 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3459 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
3460 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
3461 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3462 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
3463 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3464 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
3465 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3466 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
3467 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
3468 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3469 ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9
3470 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3471 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
3472 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3473 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
3474 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
3475 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
3476 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
3477 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
3479 ; ZVFHMINLMULMAX1-RV32-LABEL: fadd_fv_v6f16:
3480 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
3481 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3482 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3483 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
3484 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3485 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
3486 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3487 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
3488 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3489 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
3490 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
3491 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3492 ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9
3493 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3494 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
3495 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3496 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
3497 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
3498 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
3499 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3500 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
3501 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
3503 ; ZVFHMINLMULMAX1-RV64-LABEL: fadd_fv_v6f16:
3504 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
3505 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3506 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3507 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
3508 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3509 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
3510 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3511 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
3512 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3513 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
3514 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
3515 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3516 ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9
3517 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3518 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
3519 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3520 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
3521 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
3522 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
3523 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3524 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
3525 %a = load <6 x half>, ptr %x
3526 %b = insertelement <6 x half> poison, half %y, i32 0
3527 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3528 %d = fadd <6 x half> %c, %a
3529 store <6 x half> %d, ptr %x
3533 define void @fadd_fv_v4f32(ptr %x, float %y) {
3534 ; ZVFH-LABEL: fadd_fv_v4f32:
3536 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3537 ; ZVFH-NEXT: vle32.v v8, (a0)
3538 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
3539 ; ZVFH-NEXT: vse32.v v8, (a0)
3542 ; ZVFHMIN-LABEL: fadd_fv_v4f32:
3544 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3545 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3546 ; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0
3547 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3549 %a = load <4 x float>, ptr %x
3550 %b = insertelement <4 x float> poison, float %y, i32 0
3551 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3552 %d = fadd <4 x float> %c, %a
3553 store <4 x float> %d, ptr %x
3557 define void @fadd_fv_v2f64(ptr %x, double %y) {
3558 ; CHECK-LABEL: fadd_fv_v2f64:
3560 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3561 ; CHECK-NEXT: vle64.v v8, (a0)
3562 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
3563 ; CHECK-NEXT: vse64.v v8, (a0)
3565 %a = load <2 x double>, ptr %x
3566 %b = insertelement <2 x double> poison, double %y, i32 0
3567 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3568 %d = fadd <2 x double> %c, %a
3569 store <2 x double> %d, ptr %x
3573 define void @fsub_vf_v8f16(ptr %x, half %y) {
3574 ; ZVFH-LABEL: fsub_vf_v8f16:
3576 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3577 ; ZVFH-NEXT: vle16.v v8, (a0)
3578 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
3579 ; ZVFH-NEXT: vse16.v v8, (a0)
3582 ; ZVFHMIN-LABEL: fsub_vf_v8f16:
3584 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3585 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3586 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3587 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3588 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3589 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3590 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3591 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3592 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3593 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3594 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3595 ; ZVFHMIN-NEXT: vfsub.vv v8, v9, v8
3596 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3597 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3598 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3600 %a = load <8 x half>, ptr %x
3601 %b = insertelement <8 x half> poison, half %y, i32 0
3602 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3603 %d = fsub <8 x half> %a, %c
3604 store <8 x half> %d, ptr %x
3608 define void @fsub_vf_v6f16(ptr %x, half %y) {
3609 ; ZVFH-LABEL: fsub_vf_v6f16:
3611 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3612 ; ZVFH-NEXT: vle16.v v8, (a0)
3613 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3614 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
3615 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3616 ; ZVFH-NEXT: vse16.v v8, (a0)
3619 ; ZVFHMINLMULMAX2-RV32-LABEL: fsub_vf_v6f16:
3620 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
3621 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3622 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
3623 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
3624 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3625 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
3626 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3627 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
3628 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3629 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
3630 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
3631 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3632 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v9, v8
3633 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3634 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
3635 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3636 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
3637 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
3638 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
3639 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3640 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
3641 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
3643 ; ZVFHMINLMULMAX2-RV64-LABEL: fsub_vf_v6f16:
3644 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
3645 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3646 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
3647 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
3648 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3649 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
3650 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3651 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
3652 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3653 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
3654 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
3655 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3656 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v9, v8
3657 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3658 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
3659 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3660 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
3661 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
3662 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
3663 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
3664 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
3666 ; ZVFHMINLMULMAX1-RV32-LABEL: fsub_vf_v6f16:
3667 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
3668 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3669 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3670 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
3671 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3672 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
3673 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3674 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
3675 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3676 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
3677 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
3678 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3679 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v9, v8
3680 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3681 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
3682 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3683 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
3684 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
3685 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
3686 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3687 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
3688 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
3690 ; ZVFHMINLMULMAX1-RV64-LABEL: fsub_vf_v6f16:
3691 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
3692 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3693 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3694 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
3695 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3696 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
3697 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3698 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
3699 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3700 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
3701 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
3702 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3703 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v9, v8
3704 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3705 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
3706 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3707 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
3708 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
3709 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
3710 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3711 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
3712 %a = load <6 x half>, ptr %x
3713 %b = insertelement <6 x half> poison, half %y, i32 0
3714 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3715 %d = fsub <6 x half> %a, %c
3716 store <6 x half> %d, ptr %x
3720 define void @fsub_vf_v4f32(ptr %x, float %y) {
3721 ; ZVFH-LABEL: fsub_vf_v4f32:
3723 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3724 ; ZVFH-NEXT: vle32.v v8, (a0)
3725 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
3726 ; ZVFH-NEXT: vse32.v v8, (a0)
3729 ; ZVFHMIN-LABEL: fsub_vf_v4f32:
3731 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3732 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3733 ; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0
3734 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3736 %a = load <4 x float>, ptr %x
3737 %b = insertelement <4 x float> poison, float %y, i32 0
3738 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3739 %d = fsub <4 x float> %a, %c
3740 store <4 x float> %d, ptr %x
3744 define void @fsub_vf_v2f64(ptr %x, double %y) {
3745 ; CHECK-LABEL: fsub_vf_v2f64:
3747 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3748 ; CHECK-NEXT: vle64.v v8, (a0)
3749 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
3750 ; CHECK-NEXT: vse64.v v8, (a0)
3752 %a = load <2 x double>, ptr %x
3753 %b = insertelement <2 x double> poison, double %y, i32 0
3754 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3755 %d = fsub <2 x double> %a, %c
3756 store <2 x double> %d, ptr %x
3760 define void @fsub_fv_v8f16(ptr %x, half %y) {
3761 ; ZVFH-LABEL: fsub_fv_v8f16:
3763 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3764 ; ZVFH-NEXT: vle16.v v8, (a0)
3765 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
3766 ; ZVFH-NEXT: vse16.v v8, (a0)
3769 ; ZVFHMIN-LABEL: fsub_fv_v8f16:
3771 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3772 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3773 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3774 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3775 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3776 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3777 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3778 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3779 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3780 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3781 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3782 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
3783 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3784 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3785 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3787 %a = load <8 x half>, ptr %x
3788 %b = insertelement <8 x half> poison, half %y, i32 0
3789 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3790 %d = fsub <8 x half> %c, %a
3791 store <8 x half> %d, ptr %x
3795 define void @fsub_fv_v6f16(ptr %x, half %y) {
3796 ; ZVFH-LABEL: fsub_fv_v6f16:
3798 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3799 ; ZVFH-NEXT: vle16.v v8, (a0)
3800 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3801 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
3802 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3803 ; ZVFH-NEXT: vse16.v v8, (a0)
3806 ; ZVFHMINLMULMAX2-RV32-LABEL: fsub_fv_v6f16:
3807 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
3808 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3809 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
3810 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
3811 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3812 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
3813 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3814 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
3815 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3816 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
3817 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
3818 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3819 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9
3820 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3821 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
3822 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3823 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
3824 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
3825 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
3826 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3827 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
3828 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
3830 ; ZVFHMINLMULMAX2-RV64-LABEL: fsub_fv_v6f16:
3831 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
3832 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3833 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
3834 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
3835 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3836 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
3837 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3838 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
3839 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3840 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
3841 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
3842 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3843 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9
3844 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3845 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
3846 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3847 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
3848 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
3849 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
3850 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
3851 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
3853 ; ZVFHMINLMULMAX1-RV32-LABEL: fsub_fv_v6f16:
3854 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
3855 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3856 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3857 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
3858 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3859 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
3860 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3861 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
3862 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3863 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
3864 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
3865 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3866 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9
3867 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3868 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
3869 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3870 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
3871 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
3872 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
3873 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3874 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
3875 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
3877 ; ZVFHMINLMULMAX1-RV64-LABEL: fsub_fv_v6f16:
3878 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
3879 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3880 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3881 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
3882 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3883 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
3884 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3885 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
3886 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3887 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
3888 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
3889 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3890 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9
3891 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3892 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
3893 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3894 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
3895 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
3896 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
3897 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3898 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
3899 %a = load <6 x half>, ptr %x
3900 %b = insertelement <6 x half> poison, half %y, i32 0
3901 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3902 %d = fsub <6 x half> %c, %a
3903 store <6 x half> %d, ptr %x
3907 define void @fsub_fv_v4f32(ptr %x, float %y) {
3908 ; ZVFH-LABEL: fsub_fv_v4f32:
3910 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3911 ; ZVFH-NEXT: vle32.v v8, (a0)
3912 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
3913 ; ZVFH-NEXT: vse32.v v8, (a0)
3916 ; ZVFHMIN-LABEL: fsub_fv_v4f32:
3918 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3919 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3920 ; ZVFHMIN-NEXT: vfrsub.vf v8, v8, fa0
3921 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3923 %a = load <4 x float>, ptr %x
3924 %b = insertelement <4 x float> poison, float %y, i32 0
3925 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3926 %d = fsub <4 x float> %c, %a
3927 store <4 x float> %d, ptr %x
3931 define void @fsub_fv_v2f64(ptr %x, double %y) {
3932 ; CHECK-LABEL: fsub_fv_v2f64:
3934 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3935 ; CHECK-NEXT: vle64.v v8, (a0)
3936 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
3937 ; CHECK-NEXT: vse64.v v8, (a0)
3939 %a = load <2 x double>, ptr %x
3940 %b = insertelement <2 x double> poison, double %y, i32 0
3941 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3942 %d = fsub <2 x double> %c, %a
3943 store <2 x double> %d, ptr %x
3947 define void @fmul_vf_v8f16(ptr %x, half %y) {
3948 ; ZVFH-LABEL: fmul_vf_v8f16:
3950 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3951 ; ZVFH-NEXT: vle16.v v8, (a0)
3952 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
3953 ; ZVFH-NEXT: vse16.v v8, (a0)
3956 ; ZVFHMIN-LABEL: fmul_vf_v8f16:
3958 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3959 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3960 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3961 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3962 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3963 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3964 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3965 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3966 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3967 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3968 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3969 ; ZVFHMIN-NEXT: vfmul.vv v8, v9, v8
3970 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3971 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3972 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3974 %a = load <8 x half>, ptr %x
3975 %b = insertelement <8 x half> poison, half %y, i32 0
3976 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3977 %d = fmul <8 x half> %a, %c
3978 store <8 x half> %d, ptr %x
3982 define void @fmul_vf_v6f16(ptr %x, half %y) {
3983 ; ZVFH-LABEL: fmul_vf_v6f16:
3985 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3986 ; ZVFH-NEXT: vle16.v v8, (a0)
3987 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3988 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
3989 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3990 ; ZVFH-NEXT: vse16.v v8, (a0)
3993 ; ZVFHMINLMULMAX2-RV32-LABEL: fmul_vf_v6f16:
3994 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
3995 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3996 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
3997 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
3998 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3999 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
4000 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4001 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
4002 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4003 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
4004 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
4005 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4006 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v9, v8
4007 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4008 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
4009 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4010 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
4011 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
4012 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
4013 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4014 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
4015 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
4017 ; ZVFHMINLMULMAX2-RV64-LABEL: fmul_vf_v6f16:
4018 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
4019 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4020 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4021 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
4022 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4023 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
4024 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4025 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
4026 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4027 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
4028 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
4029 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4030 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v9, v8
4031 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4032 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
4033 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4034 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
4035 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
4036 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
4037 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
4038 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
4040 ; ZVFHMINLMULMAX1-RV32-LABEL: fmul_vf_v6f16:
4041 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
4042 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4043 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4044 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
4045 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4046 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
4047 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4048 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
4049 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4050 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
4051 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
4052 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4053 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v9, v8
4054 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4055 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
4056 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4057 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
4058 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
4059 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
4060 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4061 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
4062 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
4064 ; ZVFHMINLMULMAX1-RV64-LABEL: fmul_vf_v6f16:
4065 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
4066 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4067 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4068 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
4069 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4070 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
4071 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4072 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
4073 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4074 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
4075 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
4076 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4077 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v9, v8
4078 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4079 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
4080 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4081 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
4082 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
4083 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
4084 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4085 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
4086 %a = load <6 x half>, ptr %x
4087 %b = insertelement <6 x half> poison, half %y, i32 0
4088 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
4089 %d = fmul <6 x half> %a, %c
4090 store <6 x half> %d, ptr %x
4094 define void @fmul_vf_v4f32(ptr %x, float %y) {
4095 ; ZVFH-LABEL: fmul_vf_v4f32:
4097 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4098 ; ZVFH-NEXT: vle32.v v8, (a0)
4099 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
4100 ; ZVFH-NEXT: vse32.v v8, (a0)
4103 ; ZVFHMIN-LABEL: fmul_vf_v4f32:
4105 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4106 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4107 ; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0
4108 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4110 %a = load <4 x float>, ptr %x
4111 %b = insertelement <4 x float> poison, float %y, i32 0
4112 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
4113 %d = fmul <4 x float> %a, %c
4114 store <4 x float> %d, ptr %x
4118 define void @fmul_vf_v2f64(ptr %x, double %y) {
4119 ; CHECK-LABEL: fmul_vf_v2f64:
4121 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4122 ; CHECK-NEXT: vle64.v v8, (a0)
4123 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
4124 ; CHECK-NEXT: vse64.v v8, (a0)
4126 %a = load <2 x double>, ptr %x
4127 %b = insertelement <2 x double> poison, double %y, i32 0
4128 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
4129 %d = fmul <2 x double> %a, %c
4130 store <2 x double> %d, ptr %x
4134 define void @fmul_fv_v8f16(ptr %x, half %y) {
4135 ; ZVFH-LABEL: fmul_fv_v8f16:
4137 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4138 ; ZVFH-NEXT: vle16.v v8, (a0)
4139 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
4140 ; ZVFH-NEXT: vse16.v v8, (a0)
4143 ; ZVFHMIN-LABEL: fmul_fv_v8f16:
4145 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4146 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4147 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
4148 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4149 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
4150 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4151 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
4152 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4153 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4154 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
4155 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4156 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
4157 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4158 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4159 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4161 %a = load <8 x half>, ptr %x
4162 %b = insertelement <8 x half> poison, half %y, i32 0
4163 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
4164 %d = fmul <8 x half> %c, %a
4165 store <8 x half> %d, ptr %x
4169 define void @fmul_fv_v6f16(ptr %x, half %y) {
4170 ; ZVFH-LABEL: fmul_fv_v6f16:
4172 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4173 ; ZVFH-NEXT: vle16.v v8, (a0)
4174 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4175 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
4176 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4177 ; ZVFH-NEXT: vse16.v v8, (a0)
4180 ; ZVFHMINLMULMAX2-RV32-LABEL: fmul_fv_v6f16:
4181 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
4182 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4183 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
4184 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
4185 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4186 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
4187 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4188 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
4189 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4190 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
4191 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
4192 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4193 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v9
4194 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4195 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
4196 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4197 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
4198 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
4199 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
4200 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4201 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
4202 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
4204 ; ZVFHMINLMULMAX2-RV64-LABEL: fmul_fv_v6f16:
4205 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
4206 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4207 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4208 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
4209 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4210 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
4211 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4212 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
4213 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4214 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
4215 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
4216 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4217 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v9
4218 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4219 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
4220 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4221 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
4222 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
4223 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
4224 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
4225 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
4227 ; ZVFHMINLMULMAX1-RV32-LABEL: fmul_fv_v6f16:
4228 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
4229 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4230 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4231 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
4232 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4233 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
4234 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4235 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
4236 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4237 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
4238 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
4239 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4240 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9
4241 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4242 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
4243 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4244 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
4245 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
4246 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
4247 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4248 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
4249 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
4251 ; ZVFHMINLMULMAX1-RV64-LABEL: fmul_fv_v6f16:
4252 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
4253 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4254 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4255 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
4256 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4257 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
4258 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4259 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
4260 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4261 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
4262 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
4263 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4264 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9
4265 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4266 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
4267 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4268 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
4269 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
4270 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
4271 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4272 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
4273 %a = load <6 x half>, ptr %x
4274 %b = insertelement <6 x half> poison, half %y, i32 0
4275 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
4276 %d = fmul <6 x half> %c, %a
4277 store <6 x half> %d, ptr %x
4281 define void @fmul_fv_v4f32(ptr %x, float %y) {
4282 ; ZVFH-LABEL: fmul_fv_v4f32:
4284 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4285 ; ZVFH-NEXT: vle32.v v8, (a0)
4286 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
4287 ; ZVFH-NEXT: vse32.v v8, (a0)
4290 ; ZVFHMIN-LABEL: fmul_fv_v4f32:
4292 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4293 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4294 ; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0
4295 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4297 %a = load <4 x float>, ptr %x
4298 %b = insertelement <4 x float> poison, float %y, i32 0
4299 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
4300 %d = fmul <4 x float> %c, %a
4301 store <4 x float> %d, ptr %x
4305 define void @fmul_fv_v2f64(ptr %x, double %y) {
4306 ; CHECK-LABEL: fmul_fv_v2f64:
4308 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4309 ; CHECK-NEXT: vle64.v v8, (a0)
4310 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
4311 ; CHECK-NEXT: vse64.v v8, (a0)
4313 %a = load <2 x double>, ptr %x
4314 %b = insertelement <2 x double> poison, double %y, i32 0
4315 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
4316 %d = fmul <2 x double> %c, %a
4317 store <2 x double> %d, ptr %x
4321 define void @fdiv_vf_v8f16(ptr %x, half %y) {
4322 ; ZVFH-LABEL: fdiv_vf_v8f16:
4324 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4325 ; ZVFH-NEXT: vle16.v v8, (a0)
4326 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
4327 ; ZVFH-NEXT: vse16.v v8, (a0)
4330 ; ZVFHMIN-LABEL: fdiv_vf_v8f16:
4332 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4333 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4334 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
4335 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4336 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
4337 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4338 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
4339 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4340 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4341 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
4342 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4343 ; ZVFHMIN-NEXT: vfdiv.vv v8, v9, v8
4344 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4345 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4346 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4348 %a = load <8 x half>, ptr %x
4349 %b = insertelement <8 x half> poison, half %y, i32 0
4350 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
4351 %d = fdiv <8 x half> %a, %c
4352 store <8 x half> %d, ptr %x
4356 define void @fdiv_vf_v6f16(ptr %x, half %y) {
4357 ; ZVFH-LABEL: fdiv_vf_v6f16:
4359 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4360 ; ZVFH-NEXT: vle16.v v8, (a0)
4361 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4362 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
4363 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4364 ; ZVFH-NEXT: vse16.v v8, (a0)
4367 ; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_vf_v6f16:
4368 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
4369 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4370 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
4371 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
4372 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4373 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
4374 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4375 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
4376 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4377 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
4378 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
4379 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4380 ; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v9, v8
4381 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4382 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
4383 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4384 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
4385 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
4386 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
4387 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4388 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
4389 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
4391 ; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_vf_v6f16:
4392 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
4393 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4394 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4395 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
4396 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4397 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
4398 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4399 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
4400 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4401 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
4402 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
4403 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4404 ; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v9, v8
4405 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4406 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
4407 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4408 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
4409 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
4410 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
4411 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
4412 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
4414 ; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_vf_v6f16:
4415 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
4416 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4417 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4418 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
4419 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4420 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
4421 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4422 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
4423 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4424 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
4425 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
4426 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4427 ; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v9, v8
4428 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4429 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
4430 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4431 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
4432 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
4433 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
4434 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4435 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
4436 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
4438 ; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_vf_v6f16:
4439 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
4440 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4441 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4442 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
4443 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4444 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
4445 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4446 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
4447 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4448 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
4449 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
4450 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4451 ; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v9, v8
4452 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4453 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
4454 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4455 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
4456 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
4457 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
4458 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4459 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
4460 %a = load <6 x half>, ptr %x
4461 %b = insertelement <6 x half> poison, half %y, i32 0
4462 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
4463 %d = fdiv <6 x half> %a, %c
4464 store <6 x half> %d, ptr %x
4468 define void @fdiv_vf_v4f32(ptr %x, float %y) {
4469 ; ZVFH-LABEL: fdiv_vf_v4f32:
4471 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4472 ; ZVFH-NEXT: vle32.v v8, (a0)
4473 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
4474 ; ZVFH-NEXT: vse32.v v8, (a0)
4477 ; ZVFHMIN-LABEL: fdiv_vf_v4f32:
4479 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4480 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4481 ; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0
4482 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4484 %a = load <4 x float>, ptr %x
4485 %b = insertelement <4 x float> poison, float %y, i32 0
4486 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
4487 %d = fdiv <4 x float> %a, %c
4488 store <4 x float> %d, ptr %x
4492 define void @fdiv_vf_v2f64(ptr %x, double %y) {
4493 ; CHECK-LABEL: fdiv_vf_v2f64:
4495 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4496 ; CHECK-NEXT: vle64.v v8, (a0)
4497 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
4498 ; CHECK-NEXT: vse64.v v8, (a0)
4500 %a = load <2 x double>, ptr %x
4501 %b = insertelement <2 x double> poison, double %y, i32 0
4502 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
4503 %d = fdiv <2 x double> %a, %c
4504 store <2 x double> %d, ptr %x
4508 define void @fdiv_fv_v8f16(ptr %x, half %y) {
4509 ; ZVFH-LABEL: fdiv_fv_v8f16:
4511 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4512 ; ZVFH-NEXT: vle16.v v8, (a0)
4513 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
4514 ; ZVFH-NEXT: vse16.v v8, (a0)
4517 ; ZVFHMIN-LABEL: fdiv_fv_v8f16:
4519 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4520 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4521 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
4522 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4523 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
4524 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4525 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
4526 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4527 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4528 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
4529 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4530 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
4531 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4532 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4533 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4535 %a = load <8 x half>, ptr %x
4536 %b = insertelement <8 x half> poison, half %y, i32 0
4537 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
4538 %d = fdiv <8 x half> %c, %a
4539 store <8 x half> %d, ptr %x
4543 define void @fdiv_fv_v6f16(ptr %x, half %y) {
4544 ; ZVFH-LABEL: fdiv_fv_v6f16:
4546 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4547 ; ZVFH-NEXT: vle16.v v8, (a0)
4548 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4549 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
4550 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4551 ; ZVFH-NEXT: vse16.v v8, (a0)
4554 ; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_fv_v6f16:
4555 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
4556 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4557 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
4558 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
4559 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4560 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5
4561 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4562 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
4563 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4564 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
4565 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10
4566 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4567 ; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v9
4568 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4569 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
4570 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4571 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
4572 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
4573 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
4574 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4575 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
4576 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
4578 ; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_fv_v6f16:
4579 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
4580 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4581 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4582 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
4583 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4584 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5
4585 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4586 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
4587 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4588 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
4589 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10
4590 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4591 ; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v9
4592 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4593 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
4594 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4595 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
4596 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
4597 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
4598 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
4599 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
4601 ; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_fv_v6f16:
4602 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
4603 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4604 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4605 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
4606 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4607 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5
4608 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4609 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
4610 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4611 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
4612 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10
4613 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4614 ; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9
4615 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4616 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
4617 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4618 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
4619 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
4620 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
4621 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4622 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
4623 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
4625 ; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_fv_v6f16:
4626 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
4627 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4628 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4629 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
4630 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4631 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5
4632 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4633 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
4634 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4635 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
4636 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10
4637 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4638 ; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9
4639 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4640 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
4641 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4642 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
4643 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
4644 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
4645 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4646 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
4647 %a = load <6 x half>, ptr %x
4648 %b = insertelement <6 x half> poison, half %y, i32 0
4649 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
4650 %d = fdiv <6 x half> %c, %a
4651 store <6 x half> %d, ptr %x
4655 define void @fdiv_fv_v4f32(ptr %x, float %y) {
4656 ; ZVFH-LABEL: fdiv_fv_v4f32:
4658 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4659 ; ZVFH-NEXT: vle32.v v8, (a0)
4660 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
4661 ; ZVFH-NEXT: vse32.v v8, (a0)
4664 ; ZVFHMIN-LABEL: fdiv_fv_v4f32:
4666 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4667 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4668 ; ZVFHMIN-NEXT: vfrdiv.vf v8, v8, fa0
4669 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4671 %a = load <4 x float>, ptr %x
4672 %b = insertelement <4 x float> poison, float %y, i32 0
4673 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
4674 %d = fdiv <4 x float> %c, %a
4675 store <4 x float> %d, ptr %x
4679 define void @fdiv_fv_v2f64(ptr %x, double %y) {
4680 ; CHECK-LABEL: fdiv_fv_v2f64:
4682 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4683 ; CHECK-NEXT: vle64.v v8, (a0)
4684 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
4685 ; CHECK-NEXT: vse64.v v8, (a0)
4687 %a = load <2 x double>, ptr %x
4688 %b = insertelement <2 x double> poison, double %y, i32 0
4689 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
4690 %d = fdiv <2 x double> %c, %a
4691 store <2 x double> %d, ptr %x
4695 define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
4696 ; ZVFH-LABEL: fma_vf_v8f16:
4698 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4699 ; ZVFH-NEXT: vle16.v v8, (a0)
4700 ; ZVFH-NEXT: vle16.v v9, (a1)
4701 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
4702 ; ZVFH-NEXT: vse16.v v9, (a0)
4705 ; ZVFHMIN-LABEL: fma_vf_v8f16:
4707 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4708 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4709 ; ZVFHMIN-NEXT: vle16.v v9, (a1)
4710 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
4711 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4712 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
4713 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4714 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
4715 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4716 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
4717 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4718 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
4719 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4720 ; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10
4721 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4722 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4723 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4725 %a = load <8 x half>, ptr %x
4726 %b = load <8 x half>, ptr %y
4727 %c = insertelement <8 x half> poison, half %z, i32 0
4728 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
4729 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b)
4730 store <8 x half> %e, ptr %x
4734 define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
4735 ; ZVFH-LABEL: fma_vf_v6f16:
4737 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4738 ; ZVFH-NEXT: vle16.v v8, (a0)
4739 ; ZVFH-NEXT: vle16.v v9, (a1)
4740 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4741 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
4742 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4743 ; ZVFH-NEXT: vse16.v v9, (a0)
4746 ; ZVFHMINLMULMAX2-RV32-LABEL: fma_vf_v6f16:
4747 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
4748 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4749 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
4750 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1)
4751 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
4752 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4753 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5
4754 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4755 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10
4756 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4757 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9
4758 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
4759 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11
4760 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4761 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10
4762 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4763 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
4764 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4765 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
4766 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
4767 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
4768 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4769 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
4770 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
4772 ; ZVFHMINLMULMAX2-RV64-LABEL: fma_vf_v6f16:
4773 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
4774 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4775 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4776 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1)
4777 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
4778 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4779 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5
4780 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4781 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10
4782 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4783 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9
4784 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
4785 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11
4786 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4787 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10
4788 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4789 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
4790 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4791 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
4792 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
4793 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
4794 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
4795 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
4797 ; ZVFHMINLMULMAX1-RV32-LABEL: fma_vf_v6f16:
4798 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
4799 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4800 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4801 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1)
4802 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
4803 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4804 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5
4805 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4806 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10
4807 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4808 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9
4809 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
4810 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11
4811 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4812 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10
4813 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4814 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
4815 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4816 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
4817 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
4818 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
4819 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4820 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
4821 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
4823 ; ZVFHMINLMULMAX1-RV64-LABEL: fma_vf_v6f16:
4824 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
4825 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4826 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4827 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1)
4828 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
4829 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4830 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5
4831 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4832 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10
4833 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4834 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9
4835 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
4836 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11
4837 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4838 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10
4839 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4840 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
4841 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4842 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
4843 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
4844 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
4845 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4846 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
4847 %a = load <6 x half>, ptr %x
4848 %b = load <6 x half>, ptr %y
4849 %c = insertelement <6 x half> poison, half %z, i32 0
4850 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
4851 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b)
4852 store <6 x half> %e, ptr %x
4856 define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) {
4857 ; ZVFH-LABEL: fma_vf_v4f32:
4859 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4860 ; ZVFH-NEXT: vle32.v v8, (a0)
4861 ; ZVFH-NEXT: vle32.v v9, (a1)
4862 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
4863 ; ZVFH-NEXT: vse32.v v9, (a0)
4866 ; ZVFHMIN-LABEL: fma_vf_v4f32:
4868 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4869 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4870 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
4871 ; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8
4872 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
4874 %a = load <4 x float>, ptr %x
4875 %b = load <4 x float>, ptr %y
4876 %c = insertelement <4 x float> poison, float %z, i32 0
4877 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
4878 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b)
4879 store <4 x float> %e, ptr %x
4883 define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) {
4884 ; CHECK-LABEL: fma_vf_v2f64:
4886 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4887 ; CHECK-NEXT: vle64.v v8, (a0)
4888 ; CHECK-NEXT: vle64.v v9, (a1)
4889 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
4890 ; CHECK-NEXT: vse64.v v9, (a0)
4892 %a = load <2 x double>, ptr %x
4893 %b = load <2 x double>, ptr %y
4894 %c = insertelement <2 x double> poison, double %z, i32 0
4895 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
4896 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b)
4897 store <2 x double> %e, ptr %x
4901 define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
4902 ; ZVFH-LABEL: fma_fv_v8f16:
4904 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4905 ; ZVFH-NEXT: vle16.v v8, (a0)
4906 ; ZVFH-NEXT: vle16.v v9, (a1)
4907 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
4908 ; ZVFH-NEXT: vse16.v v9, (a0)
4911 ; ZVFHMIN-LABEL: fma_fv_v8f16:
4913 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4914 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4915 ; ZVFHMIN-NEXT: vle16.v v9, (a1)
4916 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
4917 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4918 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
4919 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4920 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
4921 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4922 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
4923 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4924 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
4925 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4926 ; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10
4927 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4928 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4929 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4931 %a = load <8 x half>, ptr %x
4932 %b = load <8 x half>, ptr %y
4933 %c = insertelement <8 x half> poison, half %z, i32 0
4934 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
4935 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b)
4936 store <8 x half> %e, ptr %x
4940 define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
4941 ; ZVFH-LABEL: fma_fv_v6f16:
4943 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4944 ; ZVFH-NEXT: vle16.v v8, (a0)
4945 ; ZVFH-NEXT: vle16.v v9, (a1)
4946 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4947 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
4948 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4949 ; ZVFH-NEXT: vse16.v v9, (a0)
4952 ; ZVFHMINLMULMAX2-RV32-LABEL: fma_fv_v6f16:
4953 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
4954 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4955 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
4956 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1)
4957 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
4958 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4959 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5
4960 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4961 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10
4962 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4963 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9
4964 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
4965 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11
4966 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4967 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10
4968 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4969 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
4970 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4971 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
4972 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
4973 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
4974 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4975 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
4976 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
4978 ; ZVFHMINLMULMAX2-RV64-LABEL: fma_fv_v6f16:
4979 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
4980 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4981 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4982 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1)
4983 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
4984 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
4985 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5
4986 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4987 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10
4988 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4989 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9
4990 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
4991 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11
4992 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4993 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10
4994 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4995 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
4996 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4997 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
4998 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
4999 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
5000 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
5001 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
5003 ; ZVFHMINLMULMAX1-RV32-LABEL: fma_fv_v6f16:
5004 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
5005 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5006 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5007 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1)
5008 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
5009 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5010 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5
5011 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5012 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10
5013 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5014 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9
5015 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
5016 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11
5017 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5018 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10
5019 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5020 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
5021 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5022 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
5023 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
5024 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
5025 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5026 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
5027 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
5029 ; ZVFHMINLMULMAX1-RV64-LABEL: fma_fv_v6f16:
5030 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
5031 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5032 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5033 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1)
5034 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
5035 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5036 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5
5037 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5038 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10
5039 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5040 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9
5041 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
5042 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11
5043 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5044 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10
5045 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5046 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
5047 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5048 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
5049 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
5050 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
5051 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5052 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
5053 %a = load <6 x half>, ptr %x
5054 %b = load <6 x half>, ptr %y
5055 %c = insertelement <6 x half> poison, half %z, i32 0
5056 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
5057 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b)
5058 store <6 x half> %e, ptr %x
5062 define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) {
5063 ; ZVFH-LABEL: fma_fv_v4f32:
5065 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5066 ; ZVFH-NEXT: vle32.v v8, (a0)
5067 ; ZVFH-NEXT: vle32.v v9, (a1)
5068 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
5069 ; ZVFH-NEXT: vse32.v v9, (a0)
5072 ; ZVFHMIN-LABEL: fma_fv_v4f32:
5074 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5075 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5076 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
5077 ; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8
5078 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
5080 %a = load <4 x float>, ptr %x
5081 %b = load <4 x float>, ptr %y
5082 %c = insertelement <4 x float> poison, float %z, i32 0
5083 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
5084 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b)
5085 store <4 x float> %e, ptr %x
5089 define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) {
5090 ; CHECK-LABEL: fma_fv_v2f64:
5092 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5093 ; CHECK-NEXT: vle64.v v8, (a0)
5094 ; CHECK-NEXT: vle64.v v9, (a1)
5095 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
5096 ; CHECK-NEXT: vse64.v v9, (a0)
5098 %a = load <2 x double>, ptr %x
5099 %b = load <2 x double>, ptr %y
5100 %c = insertelement <2 x double> poison, double %z, i32 0
5101 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
5102 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b)
5103 store <2 x double> %e, ptr %x
5107 define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
5108 ; ZVFH-LABEL: fmsub_vf_v8f16:
5110 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5111 ; ZVFH-NEXT: vle16.v v8, (a0)
5112 ; ZVFH-NEXT: vle16.v v9, (a1)
5113 ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
5114 ; ZVFH-NEXT: vse16.v v9, (a0)
5117 ; ZVFHMIN-LABEL: fmsub_vf_v8f16:
5119 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5120 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
5121 ; ZVFHMIN-NEXT: vle16.v v9, (a1)
5122 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
5123 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5124 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
5125 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5126 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
5127 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5128 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
5129 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5130 ; ZVFHMIN-NEXT: vfneg.v v9, v10
5131 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5132 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
5133 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
5134 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
5135 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
5136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5137 ; ZVFHMIN-NEXT: vfmacc.vv v11, v9, v8
5138 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5139 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
5140 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
5142 %a = load <8 x half>, ptr %x
5143 %b = load <8 x half>, ptr %y
5144 %c = insertelement <8 x half> poison, half %z, i32 0
5145 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
5146 %neg = fneg <8 x half> %b
5147 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg)
5148 store <8 x half> %e, ptr %x
5152 define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
5153 ; ZVFH-LABEL: fmsub_vf_v6f16:
5155 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5156 ; ZVFH-NEXT: vle16.v v8, (a0)
5157 ; ZVFH-NEXT: vle16.v v9, (a1)
5158 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5159 ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
5160 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5161 ; ZVFH-NEXT: vse16.v v9, (a0)
5164 ; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_vf_v6f16:
5165 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
5166 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5167 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
5168 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1)
5169 ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0
5170 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5171 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5
5172 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5173 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10
5174 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5175 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9
5176 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5177 ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v9, v10
5178 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5179 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9
5180 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
5181 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11
5182 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v10
5183 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5184 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v11, v9, v8
5185 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5186 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v11
5187 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5188 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2
5189 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
5190 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1)
5191 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5192 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0)
5193 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
5195 ; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_vf_v6f16:
5196 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
5197 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5198 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
5199 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1)
5200 ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0
5201 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5202 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5
5203 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5204 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10
5205 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5206 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9
5207 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5208 ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v9, v10
5209 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5210 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9
5211 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
5212 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11
5213 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v10
5214 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5215 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v11, v9, v8
5216 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5217 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v11
5218 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5219 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0)
5220 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2
5221 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
5222 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
5223 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
5225 ; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_vf_v6f16:
5226 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
5227 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5228 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5229 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1)
5230 ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0
5231 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5232 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5
5233 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5234 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10
5235 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5236 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9
5237 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5238 ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v9, v10
5239 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5240 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9
5241 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
5242 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11
5243 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v10
5244 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5245 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v11, v9, v8
5246 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5247 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v11
5248 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5249 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
5250 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
5251 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1)
5252 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5253 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0)
5254 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
5256 ; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_vf_v6f16:
5257 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
5258 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5259 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5260 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1)
5261 ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0
5262 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
5263 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5
5264 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5265 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10
5266 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5267 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9
5268 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5269 ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v9, v10
5270 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5271 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
5272 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
5273 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11
5274 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v10
5275 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5276 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v11, v9, v8
5277 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5278 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v11
5279 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5280 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5281 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
5282 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
5283 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5284 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
5285 %a = load <6 x half>, ptr %x
5286 %b = load <6 x half>, ptr %y
5287 %c = insertelement <6 x half> poison, half %z, i32 0
5288 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
5289 %neg = fneg <6 x half> %b
5290 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg)
5291 store <6 x half> %e, ptr %x
5295 define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) {
5296 ; ZVFH-LABEL: fnmsub_vf_v4f32:
5298 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5299 ; ZVFH-NEXT: vle32.v v8, (a0)
5300 ; ZVFH-NEXT: vle32.v v9, (a1)
5301 ; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8
5302 ; ZVFH-NEXT: vse32.v v9, (a0)
5305 ; ZVFHMIN-LABEL: fnmsub_vf_v4f32:
5307 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5308 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5309 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
5310 ; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8
5311 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
5313 %a = load <4 x float>, ptr %x
5314 %b = load <4 x float>, ptr %y
5315 %c = insertelement <4 x float> poison, float %z, i32 0
5316 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
5317 %neg = fneg <4 x float> %a
5318 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b)
5319 store <4 x float> %e, ptr %x
5323 define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) {
5324 ; CHECK-LABEL: fnmadd_vf_v2f64:
5326 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5327 ; CHECK-NEXT: vle64.v v8, (a0)
5328 ; CHECK-NEXT: vle64.v v9, (a1)
5329 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
5330 ; CHECK-NEXT: vse64.v v9, (a0)
5332 %a = load <2 x double>, ptr %x
5333 %b = load <2 x double>, ptr %y
5334 %c = insertelement <2 x double> poison, double %z, i32 0
5335 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
5336 %neg = fneg <2 x double> %a
5337 %neg2 = fneg <2 x double> %b
5338 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2)
5339 store <2 x double> %e, ptr %x
5343 define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) {
5344 ; ZVFH-LABEL: fnmsub_fv_v4f32:
5346 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5347 ; ZVFH-NEXT: vle32.v v8, (a0)
5348 ; ZVFH-NEXT: vle32.v v9, (a1)
5349 ; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8
5350 ; ZVFH-NEXT: vse32.v v9, (a0)
5353 ; ZVFHMIN-LABEL: fnmsub_fv_v4f32:
5355 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5356 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5357 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
5358 ; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8
5359 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
5361 %a = load <4 x float>, ptr %x
5362 %b = load <4 x float>, ptr %y
5363 %c = insertelement <4 x float> poison, float %z, i32 0
5364 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
5365 %neg = fneg <4 x float> %d
5366 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b)
5367 store <4 x float> %e, ptr %x
5371 define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) {
5372 ; CHECK-LABEL: fnmadd_fv_v2f64:
5374 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5375 ; CHECK-NEXT: vle64.v v8, (a0)
5376 ; CHECK-NEXT: vle64.v v9, (a1)
5377 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
5378 ; CHECK-NEXT: vse64.v v9, (a0)
5380 %a = load <2 x double>, ptr %x
5381 %b = load <2 x double>, ptr %y
5382 %c = insertelement <2 x double> poison, double %z, i32 0
5383 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
5384 %neg = fneg <2 x double> %d
5385 %neg2 = fneg <2 x double> %b
5386 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2)
5387 store <2 x double> %e, ptr %x
5391 define void @trunc_v8f16(ptr %x) {
5392 ; ZVFH-LABEL: trunc_v8f16:
5394 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5395 ; ZVFH-NEXT: vle16.v v8, (a0)
5396 ; ZVFH-NEXT: lui a1, %hi(.LCPI115_0)
5397 ; ZVFH-NEXT: flh fa5, %lo(.LCPI115_0)(a1)
5398 ; ZVFH-NEXT: vfabs.v v9, v8
5399 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5400 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
5401 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5402 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5403 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5404 ; ZVFH-NEXT: vse16.v v8, (a0)
5406 %a = load <8 x half>, ptr %x
5407 %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
5408 store <8 x half> %b, ptr %x
5411 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
5413 define void @trunc_v6f16(ptr %x) {
5414 ; ZVFH-LABEL: trunc_v6f16:
5416 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5417 ; ZVFH-NEXT: vle16.v v8, (a0)
5418 ; ZVFH-NEXT: lui a1, %hi(.LCPI116_0)
5419 ; ZVFH-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
5420 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5421 ; ZVFH-NEXT: vfabs.v v9, v8
5422 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5423 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
5424 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5425 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5426 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5427 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5428 ; ZVFH-NEXT: vse16.v v8, (a0)
5430 %a = load <6 x half>, ptr %x
5431 %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
5432 store <6 x half> %b, ptr %x
5435 declare <6 x half> @llvm.trunc.v6f16(<6 x half>)
5437 define void @trunc_v4f32(ptr %x) {
5438 ; ZVFH-LABEL: trunc_v4f32:
5440 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5441 ; ZVFH-NEXT: vle32.v v8, (a0)
5442 ; ZVFH-NEXT: vfabs.v v9, v8
5443 ; ZVFH-NEXT: lui a1, 307200
5444 ; ZVFH-NEXT: fmv.w.x fa5, a1
5445 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5446 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
5447 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5448 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5449 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5450 ; ZVFH-NEXT: vse32.v v8, (a0)
5453 ; ZVFHMIN-LABEL: trunc_v4f32:
5455 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5456 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5457 ; ZVFHMIN-NEXT: vfabs.v v9, v8
5458 ; ZVFHMIN-NEXT: lui a1, 307200
5459 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
5460 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
5461 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
5462 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
5463 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
5464 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5465 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
5467 %a = load <4 x float>, ptr %x
5468 %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
5469 store <4 x float> %b, ptr %x
5472 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
5474 define void @trunc_v2f64(ptr %x) {
5475 ; CHECK-LABEL: trunc_v2f64:
5477 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5478 ; CHECK-NEXT: vle64.v v8, (a0)
5479 ; CHECK-NEXT: lui a1, %hi(.LCPI118_0)
5480 ; CHECK-NEXT: fld fa5, %lo(.LCPI118_0)(a1)
5481 ; CHECK-NEXT: vfabs.v v9, v8
5482 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
5483 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
5484 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
5485 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
5486 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5487 ; CHECK-NEXT: vse64.v v8, (a0)
5489 %a = load <2 x double>, ptr %x
5490 %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
5491 store <2 x double> %b, ptr %x
5494 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
5496 define void @ceil_v8f16(ptr %x) {
5497 ; ZVFH-LABEL: ceil_v8f16:
5499 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5500 ; ZVFH-NEXT: vle16.v v8, (a0)
5501 ; ZVFH-NEXT: lui a1, %hi(.LCPI119_0)
5502 ; ZVFH-NEXT: flh fa5, %lo(.LCPI119_0)(a1)
5503 ; ZVFH-NEXT: vfabs.v v9, v8
5504 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5505 ; ZVFH-NEXT: fsrmi a1, 3
5506 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5507 ; ZVFH-NEXT: fsrm a1
5508 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5509 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5510 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5511 ; ZVFH-NEXT: vse16.v v8, (a0)
5514 ; ZVFHMIN-LABEL: ceil_v8f16:
5516 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5517 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
5518 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
5519 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5520 ; ZVFHMIN-NEXT: vfabs.v v8, v9
5521 ; ZVFHMIN-NEXT: lui a1, 307200
5522 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
5523 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
5524 ; ZVFHMIN-NEXT: fsrmi a1, 3
5525 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
5526 ; ZVFHMIN-NEXT: fsrm a1
5527 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
5528 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5529 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5530 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5531 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
5532 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
5534 %a = load <8 x half>, ptr %x
5535 %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
5536 store <8 x half> %b, ptr %x
5539 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
5541 define void @ceil_v6f16(ptr %x) {
5542 ; ZVFH-LABEL: ceil_v6f16:
5544 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5545 ; ZVFH-NEXT: vle16.v v8, (a0)
5546 ; ZVFH-NEXT: lui a1, %hi(.LCPI120_0)
5547 ; ZVFH-NEXT: flh fa5, %lo(.LCPI120_0)(a1)
5548 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5549 ; ZVFH-NEXT: vfabs.v v9, v8
5550 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5551 ; ZVFH-NEXT: fsrmi a1, 3
5552 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5553 ; ZVFH-NEXT: fsrm a1
5554 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5555 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5556 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5557 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5558 ; ZVFH-NEXT: vse16.v v8, (a0)
5561 ; ZVFHMINLMULMAX2-RV32-LABEL: ceil_v6f16:
5562 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
5563 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5564 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
5565 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
5566 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5567 ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9
5568 ; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200
5569 ; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1
5570 ; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5
5571 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 3
5572 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
5573 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1
5574 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
5575 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5576 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5577 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5578 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9
5579 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5580 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2
5581 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
5582 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1)
5583 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5584 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0)
5585 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
5587 ; ZVFHMINLMULMAX2-RV64-LABEL: ceil_v6f16:
5588 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
5589 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5590 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
5591 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
5592 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5593 ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9
5594 ; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200
5595 ; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1
5596 ; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5
5597 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 3
5598 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
5599 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1
5600 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
5601 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5602 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5603 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5604 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9
5605 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5606 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0)
5607 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2
5608 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
5609 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
5610 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
5612 ; ZVFHMINLMULMAX1-RV32-LABEL: ceil_v6f16:
5613 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
5614 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5615 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5616 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
5617 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5618 ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9
5619 ; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200
5620 ; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1
5621 ; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5
5622 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 3
5623 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
5624 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1
5625 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
5626 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5627 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5628 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5629 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9
5630 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5631 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
5632 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
5633 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1)
5634 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5635 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0)
5636 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
5638 ; ZVFHMINLMULMAX1-RV64-LABEL: ceil_v6f16:
5639 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
5640 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5641 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5642 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
5643 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5644 ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9
5645 ; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200
5646 ; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1
5647 ; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5
5648 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 3
5649 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
5650 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1
5651 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
5652 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5653 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5654 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5655 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9
5656 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5657 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5658 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
5659 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
5660 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5661 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
5662 %a = load <6 x half>, ptr %x
5663 %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
5664 store <6 x half> %b, ptr %x
5667 declare <6 x half> @llvm.ceil.v6f16(<6 x half>)
5669 define void @ceil_v4f32(ptr %x) {
5670 ; ZVFH-LABEL: ceil_v4f32:
5672 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5673 ; ZVFH-NEXT: vle32.v v8, (a0)
5674 ; ZVFH-NEXT: vfabs.v v9, v8
5675 ; ZVFH-NEXT: lui a1, 307200
5676 ; ZVFH-NEXT: fmv.w.x fa5, a1
5677 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5678 ; ZVFH-NEXT: fsrmi a1, 3
5679 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5680 ; ZVFH-NEXT: fsrm a1
5681 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5682 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5683 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5684 ; ZVFH-NEXT: vse32.v v8, (a0)
5687 ; ZVFHMIN-LABEL: ceil_v4f32:
5689 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5690 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5691 ; ZVFHMIN-NEXT: vfabs.v v9, v8
5692 ; ZVFHMIN-NEXT: lui a1, 307200
5693 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
5694 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
5695 ; ZVFHMIN-NEXT: fsrmi a1, 3
5696 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
5697 ; ZVFHMIN-NEXT: fsrm a1
5698 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
5699 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
5700 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5701 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
5703 %a = load <4 x float>, ptr %x
5704 %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
5705 store <4 x float> %b, ptr %x
5708 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
5710 define void @ceil_v2f64(ptr %x) {
5711 ; CHECK-LABEL: ceil_v2f64:
5713 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5714 ; CHECK-NEXT: vle64.v v8, (a0)
5715 ; CHECK-NEXT: lui a1, %hi(.LCPI122_0)
5716 ; CHECK-NEXT: fld fa5, %lo(.LCPI122_0)(a1)
5717 ; CHECK-NEXT: vfabs.v v9, v8
5718 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
5719 ; CHECK-NEXT: fsrmi a1, 3
5720 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
5721 ; CHECK-NEXT: fsrm a1
5722 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
5723 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
5724 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5725 ; CHECK-NEXT: vse64.v v8, (a0)
5727 %a = load <2 x double>, ptr %x
5728 %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
5729 store <2 x double> %b, ptr %x
5732 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
5734 define void @floor_v8f16(ptr %x) {
5735 ; ZVFH-LABEL: floor_v8f16:
5737 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5738 ; ZVFH-NEXT: vle16.v v8, (a0)
5739 ; ZVFH-NEXT: lui a1, %hi(.LCPI123_0)
5740 ; ZVFH-NEXT: flh fa5, %lo(.LCPI123_0)(a1)
5741 ; ZVFH-NEXT: vfabs.v v9, v8
5742 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5743 ; ZVFH-NEXT: fsrmi a1, 2
5744 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5745 ; ZVFH-NEXT: fsrm a1
5746 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5747 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5748 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5749 ; ZVFH-NEXT: vse16.v v8, (a0)
5752 ; ZVFHMIN-LABEL: floor_v8f16:
5754 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5755 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
5756 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
5757 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5758 ; ZVFHMIN-NEXT: vfabs.v v8, v9
5759 ; ZVFHMIN-NEXT: lui a1, 307200
5760 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
5761 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
5762 ; ZVFHMIN-NEXT: fsrmi a1, 2
5763 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
5764 ; ZVFHMIN-NEXT: fsrm a1
5765 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
5766 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5767 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5768 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5769 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
5770 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
5772 %a = load <8 x half>, ptr %x
5773 %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
5774 store <8 x half> %b, ptr %x
5777 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
5779 define void @floor_v6f16(ptr %x) {
5780 ; ZVFH-LABEL: floor_v6f16:
5782 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5783 ; ZVFH-NEXT: vle16.v v8, (a0)
5784 ; ZVFH-NEXT: lui a1, %hi(.LCPI124_0)
5785 ; ZVFH-NEXT: flh fa5, %lo(.LCPI124_0)(a1)
5786 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5787 ; ZVFH-NEXT: vfabs.v v9, v8
5788 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5789 ; ZVFH-NEXT: fsrmi a1, 2
5790 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5791 ; ZVFH-NEXT: fsrm a1
5792 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5793 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5794 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5795 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5796 ; ZVFH-NEXT: vse16.v v8, (a0)
5799 ; ZVFHMINLMULMAX2-RV32-LABEL: floor_v6f16:
5800 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
5801 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5802 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
5803 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
5804 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5805 ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9
5806 ; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200
5807 ; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1
5808 ; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5
5809 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 2
5810 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
5811 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1
5812 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
5813 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5814 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5815 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5816 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9
5817 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5818 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2
5819 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
5820 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1)
5821 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5822 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0)
5823 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
5825 ; ZVFHMINLMULMAX2-RV64-LABEL: floor_v6f16:
5826 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
5827 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5828 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
5829 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
5830 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5831 ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9
5832 ; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200
5833 ; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1
5834 ; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5
5835 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 2
5836 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
5837 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1
5838 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
5839 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5840 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5841 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5842 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9
5843 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5844 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0)
5845 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2
5846 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
5847 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
5848 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
5850 ; ZVFHMINLMULMAX1-RV32-LABEL: floor_v6f16:
5851 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
5852 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5853 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5854 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
5855 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5856 ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9
5857 ; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200
5858 ; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1
5859 ; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5
5860 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 2
5861 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
5862 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1
5863 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
5864 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5865 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5866 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5867 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9
5868 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5869 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
5870 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
5871 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1)
5872 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
5873 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0)
5874 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
5876 ; ZVFHMINLMULMAX1-RV64-LABEL: floor_v6f16:
5877 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
5878 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5879 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5880 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
5881 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5882 ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9
5883 ; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200
5884 ; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1
5885 ; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5
5886 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 2
5887 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
5888 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1
5889 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
5890 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5891 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
5892 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5893 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9
5894 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
5895 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5896 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
5897 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
5898 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5899 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
5900 %a = load <6 x half>, ptr %x
5901 %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
5902 store <6 x half> %b, ptr %x
5905 declare <6 x half> @llvm.floor.v6f16(<6 x half>)
5907 define void @floor_v4f32(ptr %x) {
5908 ; ZVFH-LABEL: floor_v4f32:
5910 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5911 ; ZVFH-NEXT: vle32.v v8, (a0)
5912 ; ZVFH-NEXT: vfabs.v v9, v8
5913 ; ZVFH-NEXT: lui a1, 307200
5914 ; ZVFH-NEXT: fmv.w.x fa5, a1
5915 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5916 ; ZVFH-NEXT: fsrmi a1, 2
5917 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5918 ; ZVFH-NEXT: fsrm a1
5919 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5920 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
5921 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5922 ; ZVFH-NEXT: vse32.v v8, (a0)
5925 ; ZVFHMIN-LABEL: floor_v4f32:
5927 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5928 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5929 ; ZVFHMIN-NEXT: vfabs.v v9, v8
5930 ; ZVFHMIN-NEXT: lui a1, 307200
5931 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
5932 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
5933 ; ZVFHMIN-NEXT: fsrmi a1, 2
5934 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
5935 ; ZVFHMIN-NEXT: fsrm a1
5936 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
5937 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
5938 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5939 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
5941 %a = load <4 x float>, ptr %x
5942 %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
5943 store <4 x float> %b, ptr %x
5946 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
5948 define void @floor_v2f64(ptr %x) {
5949 ; CHECK-LABEL: floor_v2f64:
5951 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5952 ; CHECK-NEXT: vle64.v v8, (a0)
5953 ; CHECK-NEXT: lui a1, %hi(.LCPI126_0)
5954 ; CHECK-NEXT: fld fa5, %lo(.LCPI126_0)(a1)
5955 ; CHECK-NEXT: vfabs.v v9, v8
5956 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
5957 ; CHECK-NEXT: fsrmi a1, 2
5958 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
5959 ; CHECK-NEXT: fsrm a1
5960 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
5961 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
5962 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5963 ; CHECK-NEXT: vse64.v v8, (a0)
5965 %a = load <2 x double>, ptr %x
5966 %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
5967 store <2 x double> %b, ptr %x
5970 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
5972 define void @round_v8f16(ptr %x) {
5973 ; ZVFH-LABEL: round_v8f16:
5975 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5976 ; ZVFH-NEXT: vle16.v v8, (a0)
5977 ; ZVFH-NEXT: lui a1, %hi(.LCPI127_0)
5978 ; ZVFH-NEXT: flh fa5, %lo(.LCPI127_0)(a1)
5979 ; ZVFH-NEXT: vfabs.v v9, v8
5980 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
5981 ; ZVFH-NEXT: fsrmi a1, 4
5982 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
5983 ; ZVFH-NEXT: fsrm a1
5984 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
5985 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
5986 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
5987 ; ZVFH-NEXT: vse16.v v8, (a0)
5990 ; ZVFHMIN-LABEL: round_v8f16:
5992 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
5993 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
5994 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
5995 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5996 ; ZVFHMIN-NEXT: vfabs.v v8, v9
5997 ; ZVFHMIN-NEXT: lui a1, 307200
5998 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
5999 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
6000 ; ZVFHMIN-NEXT: fsrmi a1, 4
6001 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
6002 ; ZVFHMIN-NEXT: fsrm a1
6003 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
6004 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6005 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6006 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6007 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
6008 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
6010 %a = load <8 x half>, ptr %x
6011 %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
6012 store <8 x half> %b, ptr %x
6015 declare <8 x half> @llvm.round.v8f16(<8 x half>)
6017 define void @round_v6f16(ptr %x) {
6018 ; ZVFH-LABEL: round_v6f16:
6020 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
6021 ; ZVFH-NEXT: vle16.v v8, (a0)
6022 ; ZVFH-NEXT: lui a1, %hi(.LCPI128_0)
6023 ; ZVFH-NEXT: flh fa5, %lo(.LCPI128_0)(a1)
6024 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6025 ; ZVFH-NEXT: vfabs.v v9, v8
6026 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
6027 ; ZVFH-NEXT: fsrmi a1, 4
6028 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
6029 ; ZVFH-NEXT: fsrm a1
6030 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
6031 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
6032 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6033 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
6034 ; ZVFH-NEXT: vse16.v v8, (a0)
6037 ; ZVFHMINLMULMAX2-RV32-LABEL: round_v6f16:
6038 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
6039 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6040 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0)
6041 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8
6042 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6043 ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9
6044 ; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200
6045 ; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1
6046 ; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5
6047 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 4
6048 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
6049 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1
6050 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
6051 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6052 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6053 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6054 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9
6055 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6056 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2
6057 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
6058 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1)
6059 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
6060 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0)
6061 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
6063 ; ZVFHMINLMULMAX2-RV64-LABEL: round_v6f16:
6064 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
6065 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6066 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0)
6067 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8
6068 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6069 ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9
6070 ; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200
6071 ; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1
6072 ; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5
6073 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 4
6074 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
6075 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1
6076 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
6077 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6078 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6079 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6080 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9
6081 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6082 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0)
6083 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2
6084 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
6085 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
6086 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
6088 ; ZVFHMINLMULMAX1-RV32-LABEL: round_v6f16:
6089 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
6090 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6091 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0)
6092 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8
6093 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6094 ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9
6095 ; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200
6096 ; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1
6097 ; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5
6098 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 4
6099 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
6100 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1
6101 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
6102 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6103 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6104 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6105 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9
6106 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6107 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
6108 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
6109 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1)
6110 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
6111 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0)
6112 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
6114 ; ZVFHMINLMULMAX1-RV64-LABEL: round_v6f16:
6115 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
6116 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6117 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0)
6118 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8
6119 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6120 ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9
6121 ; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200
6122 ; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1
6123 ; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5
6124 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 4
6125 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
6126 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1
6127 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
6128 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6129 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6130 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6131 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9
6132 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6133 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0)
6134 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
6135 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
6136 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
6137 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
6138 %a = load <6 x half>, ptr %x
6139 %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
6140 store <6 x half> %b, ptr %x
6143 declare <6 x half> @llvm.round.v6f16(<6 x half>)
6145 define void @round_v4f32(ptr %x) {
6146 ; ZVFH-LABEL: round_v4f32:
6148 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6149 ; ZVFH-NEXT: vle32.v v8, (a0)
6150 ; ZVFH-NEXT: vfabs.v v9, v8
6151 ; ZVFH-NEXT: lui a1, 307200
6152 ; ZVFH-NEXT: fmv.w.x fa5, a1
6153 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
6154 ; ZVFH-NEXT: fsrmi a1, 4
6155 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
6156 ; ZVFH-NEXT: fsrm a1
6157 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
6158 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6159 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6160 ; ZVFH-NEXT: vse32.v v8, (a0)
6163 ; ZVFHMIN-LABEL: round_v4f32:
6165 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
6166 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
6167 ; ZVFHMIN-NEXT: vfabs.v v9, v8
6168 ; ZVFHMIN-NEXT: lui a1, 307200
6169 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
6170 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
6171 ; ZVFHMIN-NEXT: fsrmi a1, 4
6172 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
6173 ; ZVFHMIN-NEXT: fsrm a1
6174 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
6175 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
6176 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6177 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
6179 %a = load <4 x float>, ptr %x
6180 %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
6181 store <4 x float> %b, ptr %x
6184 declare <4 x float> @llvm.round.v4f32(<4 x float>)
6186 define void @round_v2f64(ptr %x) {
6187 ; CHECK-LABEL: round_v2f64:
6189 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6190 ; CHECK-NEXT: vle64.v v8, (a0)
6191 ; CHECK-NEXT: lui a1, %hi(.LCPI130_0)
6192 ; CHECK-NEXT: fld fa5, %lo(.LCPI130_0)(a1)
6193 ; CHECK-NEXT: vfabs.v v9, v8
6194 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
6195 ; CHECK-NEXT: fsrmi a1, 4
6196 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
6197 ; CHECK-NEXT: fsrm a1
6198 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
6199 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
6200 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6201 ; CHECK-NEXT: vse64.v v8, (a0)
6203 %a = load <2 x double>, ptr %x
6204 %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
6205 store <2 x double> %b, ptr %x
6208 declare <2 x double> @llvm.round.v2f64(<2 x double>)
6210 define void @rint_v8f16(ptr %x) {
6211 ; ZVFH-LABEL: rint_v8f16:
6213 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6214 ; ZVFH-NEXT: vle16.v v8, (a0)
6215 ; ZVFH-NEXT: lui a1, %hi(.LCPI131_0)
6216 ; ZVFH-NEXT: flh fa5, %lo(.LCPI131_0)(a1)
6217 ; ZVFH-NEXT: vfabs.v v9, v8
6218 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
6219 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
6220 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
6221 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
6222 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6223 ; ZVFH-NEXT: vse16.v v8, (a0)
6226 ; ZVFHMIN-LABEL: rint_v8f16:
6228 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6229 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
6230 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
6231 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6232 ; ZVFHMIN-NEXT: vfabs.v v8, v9
6233 ; ZVFHMIN-NEXT: lui a1, 307200
6234 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
6235 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
6236 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
6237 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
6238 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6239 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6240 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6241 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
6242 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
6244 %a = load <8 x half>, ptr %x
6245 %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
6246 store <8 x half> %b, ptr %x
6249 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
6251 define void @rint_v4f32(ptr %x) {
6252 ; ZVFH-LABEL: rint_v4f32:
6254 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6255 ; ZVFH-NEXT: vle32.v v8, (a0)
6256 ; ZVFH-NEXT: vfabs.v v9, v8
6257 ; ZVFH-NEXT: lui a1, 307200
6258 ; ZVFH-NEXT: fmv.w.x fa5, a1
6259 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
6260 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
6261 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
6262 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6263 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6264 ; ZVFH-NEXT: vse32.v v8, (a0)
6267 ; ZVFHMIN-LABEL: rint_v4f32:
6269 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
6270 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
6271 ; ZVFHMIN-NEXT: vfabs.v v9, v8
6272 ; ZVFHMIN-NEXT: lui a1, 307200
6273 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
6274 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
6275 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
6276 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
6277 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
6278 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6279 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
6281 %a = load <4 x float>, ptr %x
6282 %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
6283 store <4 x float> %b, ptr %x
6286 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
6288 define void @rint_v2f64(ptr %x) {
6289 ; CHECK-LABEL: rint_v2f64:
6291 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6292 ; CHECK-NEXT: vle64.v v8, (a0)
6293 ; CHECK-NEXT: lui a1, %hi(.LCPI133_0)
6294 ; CHECK-NEXT: fld fa5, %lo(.LCPI133_0)(a1)
6295 ; CHECK-NEXT: vfabs.v v9, v8
6296 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
6297 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
6298 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
6299 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
6300 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6301 ; CHECK-NEXT: vse64.v v8, (a0)
6303 %a = load <2 x double>, ptr %x
6304 %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
6305 store <2 x double> %b, ptr %x
6308 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
6310 define void @nearbyint_v8f16(ptr %x) {
6311 ; ZVFH-LABEL: nearbyint_v8f16:
6313 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6314 ; ZVFH-NEXT: vle16.v v8, (a0)
6315 ; ZVFH-NEXT: lui a1, %hi(.LCPI134_0)
6316 ; ZVFH-NEXT: flh fa5, %lo(.LCPI134_0)(a1)
6317 ; ZVFH-NEXT: vfabs.v v9, v8
6318 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
6319 ; ZVFH-NEXT: frflags a1
6320 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
6321 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
6322 ; ZVFH-NEXT: fsflags a1
6323 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
6324 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6325 ; ZVFH-NEXT: vse16.v v8, (a0)
6328 ; ZVFHMIN-LABEL: nearbyint_v8f16:
6330 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6331 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
6332 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
6333 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6334 ; ZVFHMIN-NEXT: vfabs.v v8, v9
6335 ; ZVFHMIN-NEXT: lui a1, 307200
6336 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
6337 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
6338 ; ZVFHMIN-NEXT: frflags a1
6339 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
6340 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
6341 ; ZVFHMIN-NEXT: fsflags a1
6342 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6343 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
6344 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6345 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
6346 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
6348 %a = load <8 x half>, ptr %x
6349 %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
6350 store <8 x half> %b, ptr %x
6353 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
6355 define void @nearbyint_v4f32(ptr %x) {
6356 ; ZVFH-LABEL: nearbyint_v4f32:
6358 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6359 ; ZVFH-NEXT: vle32.v v8, (a0)
6360 ; ZVFH-NEXT: vfabs.v v9, v8
6361 ; ZVFH-NEXT: lui a1, 307200
6362 ; ZVFH-NEXT: fmv.w.x fa5, a1
6363 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
6364 ; ZVFH-NEXT: frflags a1
6365 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
6366 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
6367 ; ZVFH-NEXT: fsflags a1
6368 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
6369 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6370 ; ZVFH-NEXT: vse32.v v8, (a0)
6373 ; ZVFHMIN-LABEL: nearbyint_v4f32:
6375 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
6376 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
6377 ; ZVFHMIN-NEXT: vfabs.v v9, v8
6378 ; ZVFHMIN-NEXT: lui a1, 307200
6379 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
6380 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
6381 ; ZVFHMIN-NEXT: frflags a1
6382 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
6383 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
6384 ; ZVFHMIN-NEXT: fsflags a1
6385 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
6386 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6387 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
6389 %a = load <4 x float>, ptr %x
6390 %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
6391 store <4 x float> %b, ptr %x
6394 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
6396 define void @nearbyint_v2f64(ptr %x) {
6397 ; CHECK-LABEL: nearbyint_v2f64:
6399 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6400 ; CHECK-NEXT: vle64.v v8, (a0)
6401 ; CHECK-NEXT: lui a1, %hi(.LCPI136_0)
6402 ; CHECK-NEXT: fld fa5, %lo(.LCPI136_0)(a1)
6403 ; CHECK-NEXT: vfabs.v v9, v8
6404 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
6405 ; CHECK-NEXT: frflags a1
6406 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
6407 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
6408 ; CHECK-NEXT: fsflags a1
6409 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
6410 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
6411 ; CHECK-NEXT: vse64.v v8, (a0)
6413 %a = load <2 x double>, ptr %x
6414 %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
6415 store <2 x double> %b, ptr %x
6418 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
6420 define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
6421 ; ZVFH-LABEL: fmuladd_v8f16:
6423 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6424 ; ZVFH-NEXT: vle16.v v8, (a0)
6425 ; ZVFH-NEXT: vle16.v v9, (a1)
6426 ; ZVFH-NEXT: vle16.v v10, (a2)
6427 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
6428 ; ZVFH-NEXT: vse16.v v10, (a0)
6431 ; ZVFHMIN-LABEL: fmuladd_v8f16:
6433 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6434 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
6435 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
6436 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
6437 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
6438 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
6439 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6440 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11
6441 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6442 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
6443 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
6444 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
6445 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6446 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
6447 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6448 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
6449 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
6451 %a = load <8 x half>, ptr %x
6452 %b = load <8 x half>, ptr %y
6453 %c = load <8 x half>, ptr %z
6454 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
6455 store <8 x half> %d, ptr %x
6458 declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
6460 define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
6461 ; ZVFH-LABEL: fmuladd_v6f16:
6463 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
6464 ; ZVFH-NEXT: vle16.v v8, (a0)
6465 ; ZVFH-NEXT: vle16.v v9, (a1)
6466 ; ZVFH-NEXT: vle16.v v10, (a2)
6467 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6468 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
6469 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
6470 ; ZVFH-NEXT: vse16.v v10, (a0)
6473 ; ZVFHMINLMULMAX2-RV32-LABEL: fmuladd_v6f16:
6474 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
6475 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6476 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
6477 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
6478 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2)
6479 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8
6480 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
6481 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6482 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11
6483 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6484 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
6485 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
6486 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10
6487 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6488 ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9
6489 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6490 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
6491 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6492 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
6493 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
6494 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
6495 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
6496 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
6497 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
6499 ; ZVFHMINLMULMAX2-RV64-LABEL: fmuladd_v6f16:
6500 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
6501 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6502 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
6503 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
6504 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2)
6505 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8
6506 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
6507 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6508 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11
6509 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6510 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
6511 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
6512 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10
6513 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6514 ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9
6515 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6516 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
6517 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6518 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
6519 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
6520 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
6521 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
6522 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
6524 ; ZVFHMINLMULMAX1-RV32-LABEL: fmuladd_v6f16:
6525 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
6526 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6527 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
6528 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
6529 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2)
6530 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8
6531 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
6532 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6533 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11
6534 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6535 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
6536 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
6537 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10
6538 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6539 ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9
6540 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6541 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
6542 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6543 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
6544 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
6545 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
6546 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
6547 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
6548 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
6550 ; ZVFHMINLMULMAX1-RV64-LABEL: fmuladd_v6f16:
6551 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
6552 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6553 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
6554 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
6555 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2)
6556 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8
6557 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
6558 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6559 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11
6560 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6561 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
6562 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
6563 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10
6564 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6565 ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9
6566 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6567 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
6568 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6569 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
6570 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
6571 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
6572 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
6573 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
6574 %a = load <6 x half>, ptr %x
6575 %b = load <6 x half>, ptr %y
6576 %c = load <6 x half>, ptr %z
6577 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
6578 store <6 x half> %d, ptr %x
6581 declare <6 x half> @llvm.fmuladd.v6f16(<6 x half>, <6 x half>, <6 x half>)
6583 define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
6584 ; ZVFH-LABEL: fmuladd_v4f32:
6586 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6587 ; ZVFH-NEXT: vle32.v v8, (a0)
6588 ; ZVFH-NEXT: vle32.v v9, (a1)
6589 ; ZVFH-NEXT: vle32.v v10, (a2)
6590 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
6591 ; ZVFH-NEXT: vse32.v v10, (a0)
6594 ; ZVFHMIN-LABEL: fmuladd_v4f32:
6596 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
6597 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
6598 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
6599 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
6600 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
6601 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
6603 %a = load <4 x float>, ptr %x
6604 %b = load <4 x float>, ptr %y
6605 %c = load <4 x float>, ptr %z
6606 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6607 store <4 x float> %d, ptr %x
6610 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
6612 define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
6613 ; CHECK-LABEL: fmuladd_v2f64:
6615 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6616 ; CHECK-NEXT: vle64.v v8, (a0)
6617 ; CHECK-NEXT: vle64.v v9, (a1)
6618 ; CHECK-NEXT: vle64.v v10, (a2)
6619 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
6620 ; CHECK-NEXT: vse64.v v10, (a0)
6622 %a = load <2 x double>, ptr %x
6623 %b = load <2 x double>, ptr %y
6624 %c = load <2 x double>, ptr %z
6625 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
6626 store <2 x double> %d, ptr %x
6629 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
6631 define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
6632 ; ZVFH-LABEL: fmsub_fmuladd_v8f16:
6634 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6635 ; ZVFH-NEXT: vle16.v v8, (a0)
6636 ; ZVFH-NEXT: vle16.v v9, (a1)
6637 ; ZVFH-NEXT: vle16.v v10, (a2)
6638 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
6639 ; ZVFH-NEXT: vse16.v v10, (a0)
6642 ; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
6644 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6645 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
6646 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
6647 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
6648 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
6649 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
6650 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6651 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11
6652 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6653 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
6654 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
6655 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
6656 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6657 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
6658 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6659 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
6660 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
6662 %a = load <8 x half>, ptr %x
6663 %b = load <8 x half>, ptr %y
6664 %c = load <8 x half>, ptr %z
6665 %neg = fneg <8 x half> %c
6666 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
6667 store <8 x half> %d, ptr %x
6671 define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
6672 ; ZVFH-LABEL: fmsub_fmuladd_v6f16:
6674 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
6675 ; ZVFH-NEXT: vle16.v v8, (a0)
6676 ; ZVFH-NEXT: vle16.v v9, (a1)
6677 ; ZVFH-NEXT: vle16.v v10, (a2)
6678 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6679 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
6680 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
6681 ; ZVFH-NEXT: vse16.v v10, (a0)
6684 ; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_fmuladd_v6f16:
6685 ; ZVFHMINLMULMAX2-RV32: # %bb.0:
6686 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6687 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1)
6688 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0)
6689 ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2)
6690 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8
6691 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
6692 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6693 ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11
6694 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6695 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
6696 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9
6697 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10
6698 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6699 ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9
6700 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6701 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8
6702 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6703 ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2
6704 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8
6705 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1)
6706 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
6707 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0)
6708 ; ZVFHMINLMULMAX2-RV32-NEXT: ret
6710 ; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_fmuladd_v6f16:
6711 ; ZVFHMINLMULMAX2-RV64: # %bb.0:
6712 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6713 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1)
6714 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0)
6715 ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2)
6716 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8
6717 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
6718 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6719 ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11
6720 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6721 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
6722 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9
6723 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10
6724 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6725 ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9
6726 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6727 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8
6728 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6729 ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0)
6730 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2
6731 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8
6732 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0)
6733 ; ZVFHMINLMULMAX2-RV64-NEXT: ret
6735 ; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16:
6736 ; ZVFHMINLMULMAX1-RV32: # %bb.0:
6737 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6738 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1)
6739 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0)
6740 ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2)
6741 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8
6742 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
6743 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6744 ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11
6745 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6746 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
6747 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9
6748 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10
6749 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6750 ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9
6751 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6752 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8
6753 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6754 ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
6755 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8
6756 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1)
6757 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
6758 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0)
6759 ; ZVFHMINLMULMAX1-RV32-NEXT: ret
6761 ; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16:
6762 ; ZVFHMINLMULMAX1-RV64: # %bb.0:
6763 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
6764 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1)
6765 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0)
6766 ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2)
6767 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8
6768 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
6769 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6770 ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11
6771 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6772 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
6773 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9
6774 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10
6775 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6776 ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9
6777 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6778 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8
6779 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6780 ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0)
6781 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
6782 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8
6783 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0)
6784 ; ZVFHMINLMULMAX1-RV64-NEXT: ret
6785 %a = load <6 x half>, ptr %x
6786 %b = load <6 x half>, ptr %y
6787 %c = load <6 x half>, ptr %z
6788 %neg = fneg <6 x half> %c
6789 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
6790 store <6 x half> %d, ptr %x
6794 define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
6795 ; ZVFH-LABEL: fnmsub_fmuladd_v4f32:
6797 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6798 ; ZVFH-NEXT: vle32.v v8, (a0)
6799 ; ZVFH-NEXT: vle32.v v9, (a1)
6800 ; ZVFH-NEXT: vle32.v v10, (a2)
6801 ; ZVFH-NEXT: vfnmsac.vv v10, v8, v9
6802 ; ZVFH-NEXT: vse32.v v10, (a0)
6805 ; ZVFHMIN-LABEL: fnmsub_fmuladd_v4f32:
6807 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
6808 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
6809 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
6810 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
6811 ; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9
6812 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
6814 %a = load <4 x float>, ptr %x
6815 %b = load <4 x float>, ptr %y
6816 %c = load <4 x float>, ptr %z
6817 %neg = fneg <4 x float> %a
6818 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
6819 store <4 x float> %d, ptr %x
6823 define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
6824 ; CHECK-LABEL: fnmadd_fmuladd_v2f64:
6826 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6827 ; CHECK-NEXT: vle64.v v8, (a0)
6828 ; CHECK-NEXT: vle64.v v9, (a1)
6829 ; CHECK-NEXT: vle64.v v10, (a2)
6830 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
6831 ; CHECK-NEXT: vse64.v v10, (a0)
6833 %a = load <2 x double>, ptr %x
6834 %b = load <2 x double>, ptr %y
6835 %c = load <2 x double>, ptr %z
6836 %neg = fneg <2 x double> %b
6837 %neg2 = fneg <2 x double> %c
6838 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
6839 store <2 x double> %d, ptr %x