1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define double @fma_f64(double %a, double %b, double %c) {
8 ; CHECK-LABEL: fma_f64:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: fmadd d0, d0, d1, d2
13 %d = call double @llvm.fma.f64(double %a, double %b, double %c)
17 define float @fma_f32(float %a, float %b, float %c) {
18 ; CHECK-LABEL: fma_f32:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: fmadd s0, s0, s1, s2
23 %d = call float @llvm.fma.f32(float %a, float %b, float %c)
27 define half @fma_f16(half %a, half %b, half %c) {
28 ; CHECK-SD-NOFP16-LABEL: fma_f16:
29 ; CHECK-SD-NOFP16: // %bb.0: // %entry
30 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
31 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
32 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
33 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
34 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
35 ; CHECK-SD-NOFP16-NEXT: ret
37 ; CHECK-SD-FP16-LABEL: fma_f16:
38 ; CHECK-SD-FP16: // %bb.0: // %entry
39 ; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
40 ; CHECK-SD-FP16-NEXT: ret
42 ; CHECK-GI-NOFP16-LABEL: fma_f16:
43 ; CHECK-GI-NOFP16: // %bb.0: // %entry
44 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
45 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
46 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
47 ; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2
48 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
49 ; CHECK-GI-NOFP16-NEXT: ret
51 ; CHECK-GI-FP16-LABEL: fma_f16:
52 ; CHECK-GI-FP16: // %bb.0: // %entry
53 ; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
54 ; CHECK-GI-FP16-NEXT: ret
56 %d = call half @llvm.fma.f16(half %a, half %b, half %c)
60 define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
61 ; CHECK-LABEL: fma_v2f64:
62 ; CHECK: // %bb.0: // %entry
63 ; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
64 ; CHECK-NEXT: mov v0.16b, v2.16b
67 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
71 define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
72 ; CHECK-SD-LABEL: fma_v3f64:
73 ; CHECK-SD: // %bb.0: // %entry
74 ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
75 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
76 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
77 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
78 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
79 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
80 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
81 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
82 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
83 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
84 ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
85 ; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
86 ; CHECK-SD-NEXT: ldr d3, [sp]
87 ; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
88 ; CHECK-SD-NEXT: fmov d0, d6
89 ; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
90 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
91 ; CHECK-SD-NEXT: fmov d2, d3
94 ; CHECK-GI-LABEL: fma_v3f64:
95 ; CHECK-GI: // %bb.0: // %entry
96 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
97 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
98 ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
99 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
100 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
101 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
102 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
103 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
104 ; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
105 ; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d
106 ; CHECK-GI-NEXT: ldr d0, [sp]
107 ; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
108 ; CHECK-GI-NEXT: mov d1, v6.d[1]
109 ; CHECK-GI-NEXT: fmov d0, d6
112 %d = call <3 x double> @llvm.fma.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
116 define <4 x double> @fma_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
117 ; CHECK-LABEL: fma_v4f64:
118 ; CHECK: // %bb.0: // %entry
119 ; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d
120 ; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d
121 ; CHECK-NEXT: mov v0.16b, v4.16b
122 ; CHECK-NEXT: mov v1.16b, v5.16b
125 %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
129 define <2 x float> @fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
130 ; CHECK-LABEL: fma_v2f32:
131 ; CHECK: // %bb.0: // %entry
132 ; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s
133 ; CHECK-NEXT: fmov d0, d2
136 %d = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
140 define <3 x float> @fma_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
141 ; CHECK-LABEL: fma_v3f32:
142 ; CHECK: // %bb.0: // %entry
143 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
144 ; CHECK-NEXT: mov v0.16b, v2.16b
147 %d = call <3 x float> @llvm.fma.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
151 define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
152 ; CHECK-LABEL: fma_v4f32:
153 ; CHECK: // %bb.0: // %entry
154 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
155 ; CHECK-NEXT: mov v0.16b, v2.16b
158 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
162 define <8 x float> @fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
163 ; CHECK-LABEL: fma_v8f32:
164 ; CHECK: // %bb.0: // %entry
165 ; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s
166 ; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s
167 ; CHECK-NEXT: mov v0.16b, v4.16b
168 ; CHECK-NEXT: mov v1.16b, v5.16b
171 %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
175 define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
176 ; CHECK-SD-NOFP16-LABEL: fma_v7f16:
177 ; CHECK-SD-NOFP16: // %bb.0: // %entry
178 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
179 ; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
180 ; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
181 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
182 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
183 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
184 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
185 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
186 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
187 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
188 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
189 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
190 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
191 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
192 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
193 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
194 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
195 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
196 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
197 ; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
198 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
199 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
200 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
201 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
202 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
203 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
204 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
205 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
206 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
207 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
208 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
209 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
210 ; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
211 ; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
212 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
213 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
214 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
215 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
216 ; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
217 ; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
218 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
219 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
220 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
221 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
222 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
223 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
224 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
225 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
226 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
227 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
228 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
229 ; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
230 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
231 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
232 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
233 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
234 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
235 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
236 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
237 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
238 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
239 ; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
240 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
241 ; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
242 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
243 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
244 ; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
245 ; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
246 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
247 ; CHECK-SD-NOFP16-NEXT: ret
249 ; CHECK-SD-FP16-LABEL: fma_v7f16:
250 ; CHECK-SD-FP16: // %bb.0: // %entry
251 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
252 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
253 ; CHECK-SD-FP16-NEXT: ret
255 ; CHECK-GI-NOFP16-LABEL: fma_v7f16:
256 ; CHECK-GI-NOFP16: // %bb.0: // %entry
257 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4]
258 ; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[5]
259 ; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[4]
260 ; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5]
261 ; CHECK-GI-NOFP16-NEXT: mov h5, v2.h[4]
262 ; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[5]
263 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v0.4h
264 ; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v1.4h
265 ; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v2.4h
266 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
267 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
268 ; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6]
269 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v6.h[0]
270 ; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v7.h[0]
271 ; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v16.h[0]
272 ; CHECK-GI-NOFP16-NEXT: fmla v19.4s, v18.4s, v17.4s
273 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v0.h[0]
274 ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v1.h[0]
275 ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[0]
276 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v19.4s
277 ; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v0.h[0]
278 ; CHECK-GI-NOFP16-NEXT: mov v4.h[3], v0.h[0]
279 ; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v0.h[0]
280 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
281 ; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[3]
282 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
283 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v4.4h
284 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v5.4h
285 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[2]
286 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
287 ; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v3.4s, v2.4s
288 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v5.h[0]
289 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v4.4s
290 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v6.h[0]
291 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
292 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
293 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
294 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
295 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
296 ; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
297 ; CHECK-GI-NOFP16-NEXT: ret
299 ; CHECK-GI-FP16-LABEL: fma_v7f16:
300 ; CHECK-GI-FP16: // %bb.0: // %entry
301 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
302 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
303 ; CHECK-GI-FP16-NEXT: ret
305 %d = call <7 x half> @llvm.fma.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c)
309 define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
310 ; CHECK-SD-NOFP16-LABEL: fma_v4f16:
311 ; CHECK-SD-NOFP16: // %bb.0: // %entry
312 ; CHECK-SD-NOFP16-NEXT: // kill: def $d2 killed $d2 def $q2
313 ; CHECK-SD-NOFP16-NEXT: // kill: def $d1 killed $d1 def $q1
314 ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
315 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
316 ; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
317 ; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
318 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
319 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
320 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
321 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
322 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
323 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
324 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3]
325 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3]
326 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
327 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
328 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
329 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
330 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[3]
331 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
332 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
333 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
334 ; CHECK-SD-NOFP16-NEXT: fmadd s3, s5, s4, s3
335 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h17
336 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h18
337 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s6
338 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s7, s5, s4
339 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
340 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h16
341 ; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v3.h[0]
342 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
343 ; CHECK-SD-NOFP16-NEXT: fmadd s1, s5, s1, s2
344 ; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v3.h[0]
345 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
346 ; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
347 ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
348 ; CHECK-SD-NOFP16-NEXT: ret
350 ; CHECK-SD-FP16-LABEL: fma_v4f16:
351 ; CHECK-SD-FP16: // %bb.0: // %entry
352 ; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
353 ; CHECK-SD-FP16-NEXT: fmov d0, d2
354 ; CHECK-SD-FP16-NEXT: ret
356 ; CHECK-GI-NOFP16-LABEL: fma_v4f16:
357 ; CHECK-GI-NOFP16: // %bb.0: // %entry
358 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
359 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
360 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
361 ; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s
362 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
363 ; CHECK-GI-NOFP16-NEXT: ret
365 ; CHECK-GI-FP16-LABEL: fma_v4f16:
366 ; CHECK-GI-FP16: // %bb.0: // %entry
367 ; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
368 ; CHECK-GI-FP16-NEXT: fmov d0, d2
369 ; CHECK-GI-FP16-NEXT: ret
371 %d = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
375 define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
376 ; CHECK-SD-NOFP16-LABEL: fma_v8f16:
377 ; CHECK-SD-NOFP16: // %bb.0: // %entry
378 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
379 ; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
380 ; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
381 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
382 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
383 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
384 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
385 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
386 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
387 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
388 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
389 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
390 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
391 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
392 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
393 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
394 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
395 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
396 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
397 ; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
398 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
399 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
400 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
401 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
402 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
403 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
404 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
405 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
406 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
407 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
408 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
409 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
410 ; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
411 ; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
412 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
413 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
414 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
415 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
416 ; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
417 ; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
418 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
419 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
420 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
421 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
422 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
423 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
424 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
425 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
426 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
427 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
428 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
429 ; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
430 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
431 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
432 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
433 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
434 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
435 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
436 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
437 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
438 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
439 ; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
440 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
441 ; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
442 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
443 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
444 ; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
445 ; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
446 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
447 ; CHECK-SD-NOFP16-NEXT: ret
449 ; CHECK-SD-FP16-LABEL: fma_v8f16:
450 ; CHECK-SD-FP16: // %bb.0: // %entry
451 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
452 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
453 ; CHECK-SD-FP16-NEXT: ret
455 ; CHECK-GI-NOFP16-LABEL: fma_v8f16:
456 ; CHECK-GI-NOFP16: // %bb.0: // %entry
457 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
458 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
459 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v2.4h
460 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
461 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
462 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
463 ; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v4.4s, v3.4s
464 ; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s
465 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v5.4s
466 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
467 ; CHECK-GI-NOFP16-NEXT: ret
469 ; CHECK-GI-FP16-LABEL: fma_v8f16:
470 ; CHECK-GI-FP16: // %bb.0: // %entry
471 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
472 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
473 ; CHECK-GI-FP16-NEXT: ret
475 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
479 define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
480 ; CHECK-SD-NOFP16-LABEL: fma_v16f16:
481 ; CHECK-SD-NOFP16: // %bb.0: // %entry
482 ; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[1]
483 ; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1]
484 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[1]
485 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h4
486 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h2
487 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h0
488 ; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[2]
489 ; CHECK-SD-NOFP16-NEXT: mov h21, v2.h[2]
490 ; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2]
491 ; CHECK-SD-NOFP16-NEXT: mov h23, v4.h[3]
492 ; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3]
493 ; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[3]
494 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
495 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
496 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
497 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
498 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[1]
499 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h5
500 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
501 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h21
502 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
503 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h23
504 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h24
505 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h25
506 ; CHECK-SD-NOFP16-NEXT: fmadd s7, s16, s7, s6
507 ; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[1]
508 ; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[1]
509 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
510 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h3
511 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h1
512 ; CHECK-SD-NOFP16-NEXT: fmadd s19, s20, s19, s18
513 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h26
514 ; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[4]
515 ; CHECK-SD-NOFP16-NEXT: fmadd s21, s23, s22, s21
516 ; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[2]
517 ; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2]
518 ; CHECK-SD-NOFP16-NEXT: fcvt h20, s7
519 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
520 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
521 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
522 ; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4]
523 ; CHECK-SD-NOFP16-NEXT: mov h7, v4.h[5]
524 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
525 ; CHECK-SD-NOFP16-NEXT: mov h30, v2.h[5]
526 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
527 ; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
528 ; CHECK-SD-NOFP16-NEXT: mov h31, v1.h[4]
529 ; CHECK-SD-NOFP16-NEXT: fmadd s24, s26, s25, s24
530 ; CHECK-SD-NOFP16-NEXT: fmadd s25, s29, s28, s27
531 ; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v20.h[0]
532 ; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[2]
533 ; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[3]
534 ; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[3]
535 ; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[3]
536 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
537 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
538 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h7
539 ; CHECK-SD-NOFP16-NEXT: fcvt s30, h30
540 ; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v19.h[0]
541 ; CHECK-SD-NOFP16-NEXT: fcvt h24, s24
542 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s25
543 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h20
544 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
545 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h23
546 ; CHECK-SD-NOFP16-NEXT: fmadd s16, s18, s17, s16
547 ; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5]
548 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
549 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
550 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
551 ; CHECK-SD-NOFP16-NEXT: mov h18, v4.h[6]
552 ; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v21.h[0]
553 ; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v24.h[0]
554 ; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[5]
555 ; CHECK-SD-NOFP16-NEXT: fmadd s19, s22, s20, s19
556 ; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[4]
557 ; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[4]
558 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h23
559 ; CHECK-SD-NOFP16-NEXT: mov h28, v0.h[6]
560 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
561 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
562 ; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7]
563 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
564 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
565 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h22
566 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h31
567 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s23, s30, s29
568 ; CHECK-SD-NOFP16-NEXT: fmadd s23, s27, s26, s25
569 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
570 ; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[5]
571 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[5]
572 ; CHECK-SD-NOFP16-NEXT: mov h27, v2.h[6]
573 ; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[6]
574 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
575 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
576 ; CHECK-SD-NOFP16-NEXT: fmadd s20, s22, s21, s20
577 ; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[6]
578 ; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[6]
579 ; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v19.h[0]
580 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s23
581 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h24
582 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h25
583 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
584 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
585 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
586 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h29
587 ; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7]
588 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
589 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
590 ; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7]
591 ; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v19.h[0]
592 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s20
593 ; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0]
594 ; CHECK-SD-NOFP16-NEXT: fmadd s20, s25, s24, s23
595 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
596 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
597 ; CHECK-SD-NOFP16-NEXT: fmadd s18, s27, s26, s18
598 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
599 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
600 ; CHECK-SD-NOFP16-NEXT: fmadd s21, s28, s22, s21
601 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
602 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
603 ; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v19.h[0]
604 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
605 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s20
606 ; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v16.h[0]
607 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s2, s4
608 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s18
609 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s21
610 ; CHECK-SD-NOFP16-NEXT: fmadd s1, s1, s3, s5
611 ; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v17.h[0]
612 ; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0]
613 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
614 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
615 ; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v4.h[0]
616 ; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0]
617 ; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0]
618 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
619 ; CHECK-SD-NOFP16-NEXT: mov v1.16b, v7.16b
620 ; CHECK-SD-NOFP16-NEXT: ret
622 ; CHECK-SD-FP16-LABEL: fma_v16f16:
623 ; CHECK-SD-FP16: // %bb.0: // %entry
624 ; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
625 ; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
626 ; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
627 ; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
628 ; CHECK-SD-FP16-NEXT: ret
630 ; CHECK-GI-NOFP16-LABEL: fma_v16f16:
631 ; CHECK-GI-NOFP16: // %bb.0: // %entry
632 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
633 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h
634 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v2.4h
635 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
636 ; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v4.4h
637 ; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v5.4h
638 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
639 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
640 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
641 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
642 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
643 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
644 ; CHECK-GI-NOFP16-NEXT: fmla v18.4s, v16.4s, v6.4s
645 ; CHECK-GI-NOFP16-NEXT: fmla v19.4s, v17.4s, v7.4s
646 ; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v2.4s, v0.4s
647 ; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v3.4s, v1.4s
648 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v18.4s
649 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v19.4s
650 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
651 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
652 ; CHECK-GI-NOFP16-NEXT: ret
654 ; CHECK-GI-FP16-LABEL: fma_v16f16:
655 ; CHECK-GI-FP16: // %bb.0: // %entry
656 ; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
657 ; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
658 ; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
659 ; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
660 ; CHECK-GI-FP16-NEXT: ret
662 %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
666 define double @fmuladd_f64(double %a, double %b, double %c) {
667 ; CHECK-LABEL: fmuladd_f64:
668 ; CHECK: // %bb.0: // %entry
669 ; CHECK-NEXT: fmadd d0, d0, d1, d2
672 %d = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
676 define float @fmuladd_f32(float %a, float %b, float %c) {
677 ; CHECK-LABEL: fmuladd_f32:
678 ; CHECK: // %bb.0: // %entry
679 ; CHECK-NEXT: fmadd s0, s0, s1, s2
682 %d = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
686 define half @fmuladd_f16(half %a, half %b, half %c) {
687 ; CHECK-SD-NOFP16-LABEL: fmuladd_f16:
688 ; CHECK-SD-NOFP16: // %bb.0: // %entry
689 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
690 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
691 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
692 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
693 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
694 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
695 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
696 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
697 ; CHECK-SD-NOFP16-NEXT: ret
699 ; CHECK-SD-FP16-LABEL: fmuladd_f16:
700 ; CHECK-SD-FP16: // %bb.0: // %entry
701 ; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
702 ; CHECK-SD-FP16-NEXT: ret
704 ; CHECK-GI-NOFP16-LABEL: fmuladd_f16:
705 ; CHECK-GI-NOFP16: // %bb.0: // %entry
706 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
707 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
708 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
709 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
710 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
711 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
712 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
713 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
714 ; CHECK-GI-NOFP16-NEXT: ret
716 ; CHECK-GI-FP16-LABEL: fmuladd_f16:
717 ; CHECK-GI-FP16: // %bb.0: // %entry
718 ; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
719 ; CHECK-GI-FP16-NEXT: ret
721 %d = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
725 define <2 x double> @fmuladd_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
726 ; CHECK-LABEL: fmuladd_v2f64:
727 ; CHECK: // %bb.0: // %entry
728 ; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
729 ; CHECK-NEXT: mov v0.16b, v2.16b
732 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
736 define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
737 ; CHECK-SD-LABEL: fmuladd_v3f64:
738 ; CHECK-SD: // %bb.0: // %entry
739 ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
740 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
741 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
742 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
743 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
744 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
745 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
746 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
747 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
748 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
749 ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
750 ; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
751 ; CHECK-SD-NEXT: ldr d3, [sp]
752 ; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
753 ; CHECK-SD-NEXT: fmov d0, d6
754 ; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
755 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
756 ; CHECK-SD-NEXT: fmov d2, d3
759 ; CHECK-GI-LABEL: fmuladd_v3f64:
760 ; CHECK-GI: // %bb.0: // %entry
761 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
762 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
763 ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
764 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
765 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
766 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
767 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
768 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
769 ; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
770 ; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d
771 ; CHECK-GI-NEXT: ldr d0, [sp]
772 ; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
773 ; CHECK-GI-NEXT: mov d1, v6.d[1]
774 ; CHECK-GI-NEXT: fmov d0, d6
777 %d = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
781 define <4 x double> @fmuladd_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
782 ; CHECK-LABEL: fmuladd_v4f64:
783 ; CHECK: // %bb.0: // %entry
784 ; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d
785 ; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d
786 ; CHECK-NEXT: mov v0.16b, v4.16b
787 ; CHECK-NEXT: mov v1.16b, v5.16b
790 %d = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
794 define <2 x float> @fmuladd_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
795 ; CHECK-LABEL: fmuladd_v2f32:
796 ; CHECK: // %bb.0: // %entry
797 ; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s
798 ; CHECK-NEXT: fmov d0, d2
801 %d = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
805 define <3 x float> @fmuladd_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
806 ; CHECK-LABEL: fmuladd_v3f32:
807 ; CHECK: // %bb.0: // %entry
808 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
809 ; CHECK-NEXT: mov v0.16b, v2.16b
812 %d = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
816 define <4 x float> @fmuladd_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
817 ; CHECK-LABEL: fmuladd_v4f32:
818 ; CHECK: // %bb.0: // %entry
819 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
820 ; CHECK-NEXT: mov v0.16b, v2.16b
823 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
827 define <8 x float> @fmuladd_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
828 ; CHECK-LABEL: fmuladd_v8f32:
829 ; CHECK: // %bb.0: // %entry
830 ; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s
831 ; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s
832 ; CHECK-NEXT: mov v0.16b, v4.16b
833 ; CHECK-NEXT: mov v1.16b, v5.16b
836 %d = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
840 define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
841 ; CHECK-SD-NOFP16-LABEL: fmuladd_v7f16:
842 ; CHECK-SD-NOFP16: // %bb.0: // %entry
843 ; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
844 ; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1]
845 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h1
846 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h0
847 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
848 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2]
849 ; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[3]
850 ; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4]
851 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h2
852 ; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[2]
853 ; CHECK-SD-NOFP16-NEXT: mov h21, v1.h[5]
854 ; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[6]
855 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
856 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
857 ; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5
858 ; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[3]
859 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
860 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
861 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
862 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
863 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
864 ; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3
865 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
866 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
867 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h17
868 ; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7
869 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1]
870 ; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4]
871 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
872 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
873 ; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5
874 ; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[5]
875 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
876 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
877 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
878 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
879 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
880 ; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s19
881 ; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3]
882 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
883 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
884 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
885 ; CHECK-SD-NOFP16-NEXT: fmul s16, s18, s16
886 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
887 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
888 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
889 ; CHECK-SD-NOFP16-NEXT: fadd s17, s3, s17
890 ; CHECK-SD-NOFP16-NEXT: fmul s6, s6, s21
891 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
892 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
893 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
894 ; CHECK-SD-NOFP16-NEXT: fadd s4, s7, s20
895 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
896 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
897 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
898 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
899 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
900 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
901 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
902 ; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s19
903 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
904 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
905 ; CHECK-SD-NOFP16-NEXT: fmul s18, s20, s18
906 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
907 ; CHECK-SD-NOFP16-NEXT: mov h1, v2.h[6]
908 ; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v7.h[0]
909 ; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[5]
910 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
911 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
912 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
913 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
914 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
915 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
916 ; CHECK-SD-NOFP16-NEXT: fadd s16, s16, s17
917 ; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v4.h[0]
918 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
919 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s18
920 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
921 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
922 ; CHECK-SD-NOFP16-NEXT: fadd s6, s6, s7
923 ; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v4.h[0]
924 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s16
925 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
926 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
927 ; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v4.h[0]
928 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s6
929 ; CHECK-SD-NOFP16-NEXT: fadd s1, s5, s1
930 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
931 ; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
932 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
933 ; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v1.h[0]
934 ; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
935 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
936 ; CHECK-SD-NOFP16-NEXT: ret
938 ; CHECK-SD-FP16-LABEL: fmuladd_v7f16:
939 ; CHECK-SD-FP16: // %bb.0: // %entry
940 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
941 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
942 ; CHECK-SD-FP16-NEXT: ret
944 ; CHECK-GI-NOFP16-LABEL: fmuladd_v7f16:
945 ; CHECK-GI-NOFP16: // %bb.0: // %entry
946 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4]
947 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5]
948 ; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[4]
949 ; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[5]
950 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v0.4h
951 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
952 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
953 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
954 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0]
955 ; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0]
956 ; CHECK-GI-NOFP16-NEXT: fmul v4.4s, v7.4s, v16.4s
957 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h
958 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v0.h[0]
959 ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v1.h[0]
960 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s
961 ; CHECK-GI-NOFP16-NEXT: mov h1, v2.h[4]
962 ; CHECK-GI-NOFP16-NEXT: mov h4, v2.h[5]
963 ; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6]
964 ; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v0.h[0]
965 ; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v0.h[0]
966 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
967 ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v4.h[0]
968 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
969 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v5.4h
970 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v6.4s
971 ; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
972 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v3.4s, v4.4s
973 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
974 ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
975 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
976 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
977 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
978 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
979 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
980 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
981 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[0]
982 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
983 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
984 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
985 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
986 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
987 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
988 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
989 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
990 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
991 ; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
992 ; CHECK-GI-NOFP16-NEXT: ret
994 ; CHECK-GI-FP16-LABEL: fmuladd_v7f16:
995 ; CHECK-GI-FP16: // %bb.0: // %entry
996 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
997 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
998 ; CHECK-GI-FP16-NEXT: ret
1000 %d = call <7 x half> @llvm.fmuladd.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c)
1004 define <4 x half> @fmuladd_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1005 ; CHECK-SD-NOFP16-LABEL: fmuladd_v4f16:
1006 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1007 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1008 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1009 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1010 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1011 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1012 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1013 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1014 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1015 ; CHECK-SD-NOFP16-NEXT: ret
1017 ; CHECK-SD-FP16-LABEL: fmuladd_v4f16:
1018 ; CHECK-SD-FP16: // %bb.0: // %entry
1019 ; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
1020 ; CHECK-SD-FP16-NEXT: fmov d0, d2
1021 ; CHECK-SD-FP16-NEXT: ret
1023 ; CHECK-GI-NOFP16-LABEL: fmuladd_v4f16:
1024 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1025 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1026 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1027 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1028 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1029 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1030 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1031 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1032 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1033 ; CHECK-GI-NOFP16-NEXT: ret
1035 ; CHECK-GI-FP16-LABEL: fmuladd_v4f16:
1036 ; CHECK-GI-FP16: // %bb.0: // %entry
1037 ; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
1038 ; CHECK-GI-FP16-NEXT: fmov d0, d2
1039 ; CHECK-GI-FP16-NEXT: ret
1041 %d = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
1045 define <8 x half> @fmuladd_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1046 ; CHECK-SD-NOFP16-LABEL: fmuladd_v8f16:
1047 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1048 ; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
1049 ; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1]
1050 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h1
1051 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h0
1052 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
1053 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2]
1054 ; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[3]
1055 ; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4]
1056 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h2
1057 ; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[2]
1058 ; CHECK-SD-NOFP16-NEXT: mov h21, v1.h[5]
1059 ; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[6]
1060 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1061 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1062 ; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5
1063 ; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[3]
1064 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1065 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
1066 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
1067 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
1068 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
1069 ; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3
1070 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
1071 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
1072 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h17
1073 ; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7
1074 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1]
1075 ; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4]
1076 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1077 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1078 ; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5
1079 ; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[5]
1080 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
1081 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
1082 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
1083 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
1084 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1085 ; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s19
1086 ; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3]
1087 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1088 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
1089 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1090 ; CHECK-SD-NOFP16-NEXT: fmul s16, s18, s16
1091 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
1092 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
1093 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1094 ; CHECK-SD-NOFP16-NEXT: fadd s17, s3, s17
1095 ; CHECK-SD-NOFP16-NEXT: fmul s6, s6, s21
1096 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
1097 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
1098 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1099 ; CHECK-SD-NOFP16-NEXT: fadd s4, s7, s20
1100 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
1101 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
1102 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
1103 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
1104 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
1105 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
1106 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
1107 ; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s19
1108 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
1109 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
1110 ; CHECK-SD-NOFP16-NEXT: fmul s18, s20, s18
1111 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
1112 ; CHECK-SD-NOFP16-NEXT: mov h1, v2.h[6]
1113 ; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v7.h[0]
1114 ; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[5]
1115 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
1116 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1117 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
1118 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
1119 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1120 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1121 ; CHECK-SD-NOFP16-NEXT: fadd s16, s16, s17
1122 ; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v4.h[0]
1123 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
1124 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s18
1125 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
1126 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1127 ; CHECK-SD-NOFP16-NEXT: fadd s6, s6, s7
1128 ; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v4.h[0]
1129 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s16
1130 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1131 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
1132 ; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v4.h[0]
1133 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s6
1134 ; CHECK-SD-NOFP16-NEXT: fadd s1, s5, s1
1135 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1136 ; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
1137 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
1138 ; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v1.h[0]
1139 ; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
1140 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
1141 ; CHECK-SD-NOFP16-NEXT: ret
1143 ; CHECK-SD-FP16-LABEL: fmuladd_v8f16:
1144 ; CHECK-SD-FP16: // %bb.0: // %entry
1145 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1146 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
1147 ; CHECK-SD-FP16-NEXT: ret
1149 ; CHECK-GI-NOFP16-LABEL: fmuladd_v8f16:
1150 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1151 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
1152 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
1153 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1154 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1155 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
1156 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1157 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1158 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h
1159 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1160 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1161 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1162 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1163 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
1164 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
1165 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
1166 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
1167 ; CHECK-GI-NOFP16-NEXT: ret
1169 ; CHECK-GI-FP16-LABEL: fmuladd_v8f16:
1170 ; CHECK-GI-FP16: // %bb.0: // %entry
1171 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1172 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
1173 ; CHECK-GI-FP16-NEXT: ret
1175 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
1179 define <16 x half> @fmuladd_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
1180 ; CHECK-SD-NOFP16-LABEL: fmuladd_v16f16:
1181 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1182 ; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[1]
1183 ; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[1]
1184 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h2
1185 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h0
1186 ; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[2]
1187 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
1188 ; CHECK-SD-NOFP16-NEXT: mov h20, v2.h[3]
1189 ; CHECK-SD-NOFP16-NEXT: mov h21, v0.h[3]
1190 ; CHECK-SD-NOFP16-NEXT: mov h24, v3.h[1]
1191 ; CHECK-SD-NOFP16-NEXT: mov h25, v1.h[1]
1192 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[2]
1193 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h1
1194 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1195 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1196 ; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[4]
1197 ; CHECK-SD-NOFP16-NEXT: fmul s16, s17, s16
1198 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
1199 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
1200 ; CHECK-SD-NOFP16-NEXT: mov h17, v4.h[1]
1201 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
1202 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
1203 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
1204 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
1205 ; CHECK-SD-NOFP16-NEXT: mov h30, v1.h[6]
1206 ; CHECK-SD-NOFP16-NEXT: fmul s6, s7, s6
1207 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h4
1208 ; CHECK-SD-NOFP16-NEXT: fmul s23, s19, s18
1209 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s16
1210 ; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[4]
1211 ; CHECK-SD-NOFP16-NEXT: fmul s20, s21, s20
1212 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
1213 ; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[2]
1214 ; CHECK-SD-NOFP16-NEXT: fmul s24, s25, s24
1215 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
1216 ; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[3]
1217 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
1218 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h22
1219 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s23
1220 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
1221 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
1222 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
1223 ; CHECK-SD-NOFP16-NEXT: fcvt h20, s20
1224 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h6
1225 ; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[3]
1226 ; CHECK-SD-NOFP16-NEXT: fadd s7, s21, s7
1227 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h22
1228 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h3
1229 ; CHECK-SD-NOFP16-NEXT: fmul s18, s19, s18
1230 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h20
1231 ; CHECK-SD-NOFP16-NEXT: fadd s17, s23, s17
1232 ; CHECK-SD-NOFP16-NEXT: mov h23, v3.h[2]
1233 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h6
1234 ; CHECK-SD-NOFP16-NEXT: fmul s22, s27, s22
1235 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s7
1236 ; CHECK-SD-NOFP16-NEXT: fadd s7, s21, s16
1237 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h26
1238 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[3]
1239 ; CHECK-SD-NOFP16-NEXT: fcvt h18, s18
1240 ; CHECK-SD-NOFP16-NEXT: mov h27, v0.h[5]
1241 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
1242 ; CHECK-SD-NOFP16-NEXT: mov h17, v4.h[4]
1243 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h23
1244 ; CHECK-SD-NOFP16-NEXT: fadd s19, s19, s28
1245 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
1246 ; CHECK-SD-NOFP16-NEXT: mov h23, v2.h[5]
1247 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
1248 ; CHECK-SD-NOFP16-NEXT: fmul s20, s21, s20
1249 ; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v16.h[0]
1250 ; CHECK-SD-NOFP16-NEXT: mov h16, v5.h[1]
1251 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
1252 ; CHECK-SD-NOFP16-NEXT: fcvt h21, s22
1253 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s24
1254 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h25
1255 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
1256 ; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[2]
1257 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
1258 ; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v7.h[0]
1259 ; CHECK-SD-NOFP16-NEXT: mov h7, v3.h[4]
1260 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h16
1261 ; CHECK-SD-NOFP16-NEXT: fcvt h20, s20
1262 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
1263 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
1264 ; CHECK-SD-NOFP16-NEXT: fadd s16, s18, s17
1265 ; CHECK-SD-NOFP16-NEXT: fmul s18, s25, s24
1266 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h5
1267 ; CHECK-SD-NOFP16-NEXT: mov h24, v0.h[6]
1268 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
1269 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
1270 ; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v19.h[0]
1271 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h23
1272 ; CHECK-SD-NOFP16-NEXT: mov h23, v2.h[6]
1273 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
1274 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1275 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h29
1276 ; CHECK-SD-NOFP16-NEXT: fadd s22, s22, s28
1277 ; CHECK-SD-NOFP16-NEXT: fadd s17, s21, s17
1278 ; CHECK-SD-NOFP16-NEXT: fcvt h18, s18
1279 ; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[3]
1280 ; CHECK-SD-NOFP16-NEXT: mov h28, v3.h[5]
1281 ; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[5]
1282 ; CHECK-SD-NOFP16-NEXT: fmul s19, s26, s19
1283 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h23
1284 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
1285 ; CHECK-SD-NOFP16-NEXT: fadd s20, s20, s25
1286 ; CHECK-SD-NOFP16-NEXT: fmul s25, s27, s7
1287 ; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[6]
1288 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s22
1289 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
1290 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
1291 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h21
1292 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h28
1293 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h29
1294 ; CHECK-SD-NOFP16-NEXT: fmul s23, s24, s23
1295 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
1296 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
1297 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h27
1298 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h30
1299 ; CHECK-SD-NOFP16-NEXT: fcvt h20, s20
1300 ; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v22.h[0]
1301 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s25
1302 ; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[4]
1303 ; CHECK-SD-NOFP16-NEXT: fadd s17, s17, s18
1304 ; CHECK-SD-NOFP16-NEXT: fmul s18, s28, s21
1305 ; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7]
1306 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
1307 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
1308 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
1309 ; CHECK-SD-NOFP16-NEXT: fmul s21, s26, s24
1310 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1311 ; CHECK-SD-NOFP16-NEXT: mov h24, v4.h[5]
1312 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
1313 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
1314 ; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v20.h[0]
1315 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s17
1316 ; CHECK-SD-NOFP16-NEXT: fcvt h18, s18
1317 ; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[5]
1318 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1319 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
1320 ; CHECK-SD-NOFP16-NEXT: fcvt h23, s23
1321 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
1322 ; CHECK-SD-NOFP16-NEXT: mov h2, v4.h[6]
1323 ; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
1324 ; CHECK-SD-NOFP16-NEXT: fadd s22, s22, s25
1325 ; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[6]
1326 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
1327 ; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v17.h[0]
1328 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
1329 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h24
1330 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s3
1331 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h18
1332 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
1333 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h23
1334 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
1335 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
1336 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s22
1337 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h25
1338 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1339 ; CHECK-SD-NOFP16-NEXT: fadd s17, s17, s19
1340 ; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7]
1341 ; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7]
1342 ; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s18
1343 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
1344 ; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0]
1345 ; CHECK-SD-NOFP16-NEXT: fadd s2, s20, s2
1346 ; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v22.h[0]
1347 ; CHECK-SD-NOFP16-NEXT: fadd s16, s21, s23
1348 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1349 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s17
1350 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1351 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1352 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1353 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
1354 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
1355 ; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v17.h[0]
1356 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s4
1357 ; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v3.h[0]
1358 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s16
1359 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s5
1360 ; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0]
1361 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1362 ; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v3.h[0]
1363 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
1364 ; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0]
1365 ; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0]
1366 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
1367 ; CHECK-SD-NOFP16-NEXT: mov v1.16b, v7.16b
1368 ; CHECK-SD-NOFP16-NEXT: ret
1370 ; CHECK-SD-FP16-LABEL: fmuladd_v16f16:
1371 ; CHECK-SD-FP16: // %bb.0: // %entry
1372 ; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
1373 ; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
1374 ; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
1375 ; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
1376 ; CHECK-SD-FP16-NEXT: ret
1378 ; CHECK-GI-NOFP16-LABEL: fmuladd_v16f16:
1379 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1380 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1381 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h
1382 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
1383 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
1384 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1385 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1386 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1387 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1388 ; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s
1389 ; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s
1390 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
1391 ; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
1392 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s
1393 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h
1394 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
1395 ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s
1396 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h
1397 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
1398 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1399 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1400 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1401 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1402 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1403 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1404 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s
1405 ; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s
1406 ; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s
1407 ; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s
1408 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
1409 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1410 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
1411 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
1412 ; CHECK-GI-NOFP16-NEXT: ret
1414 ; CHECK-GI-FP16-LABEL: fmuladd_v16f16:
1415 ; CHECK-GI-FP16: // %bb.0: // %entry
1416 ; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
1417 ; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
1418 ; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
1419 ; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
1420 ; CHECK-GI-FP16-NEXT: ret
1422 %d = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
1426 define double @fmul_f64(double %a, double %b, double %c) {
1427 ; CHECK-LABEL: fmul_f64:
1428 ; CHECK: // %bb.0: // %entry
1429 ; CHECK-NEXT: fmadd d0, d0, d1, d2
1432 %d = fmul fast double %a, %b
1433 %e = fadd fast double %d, %c
1437 define float @fmul_f32(float %a, float %b, float %c) {
1438 ; CHECK-LABEL: fmul_f32:
1439 ; CHECK: // %bb.0: // %entry
1440 ; CHECK-NEXT: fmadd s0, s0, s1, s2
1443 %d = fmul fast float %a, %b
1444 %e = fadd fast float %d, %c
1448 define half @fmul_f16(half %a, half %b, half %c) {
1449 ; CHECK-SD-NOFP16-LABEL: fmul_f16:
1450 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1451 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
1452 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1453 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
1454 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
1455 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1456 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1457 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
1458 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1459 ; CHECK-SD-NOFP16-NEXT: ret
1461 ; CHECK-SD-FP16-LABEL: fmul_f16:
1462 ; CHECK-SD-FP16: // %bb.0: // %entry
1463 ; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
1464 ; CHECK-SD-FP16-NEXT: ret
1466 ; CHECK-GI-NOFP16-LABEL: fmul_f16:
1467 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1468 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
1469 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
1470 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
1471 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
1472 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
1473 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
1474 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
1475 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
1476 ; CHECK-GI-NOFP16-NEXT: ret
1478 ; CHECK-GI-FP16-LABEL: fmul_f16:
1479 ; CHECK-GI-FP16: // %bb.0: // %entry
1480 ; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
1481 ; CHECK-GI-FP16-NEXT: ret
1483 %d = fmul fast half %a, %b
1484 %e = fadd fast half %d, %c
1488 define <2 x double> @fmul_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
1489 ; CHECK-SD-LABEL: fmul_v2f64:
1490 ; CHECK-SD: // %bb.0: // %entry
1491 ; CHECK-SD-NEXT: fmla v2.2d, v1.2d, v0.2d
1492 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
1493 ; CHECK-SD-NEXT: ret
1495 ; CHECK-GI-LABEL: fmul_v2f64:
1496 ; CHECK-GI: // %bb.0: // %entry
1497 ; CHECK-GI-NEXT: fmla v2.2d, v0.2d, v1.2d
1498 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1499 ; CHECK-GI-NEXT: ret
1501 %d = fmul fast <2 x double> %a, %b
1502 %e = fadd fast <2 x double> %d, %c
1506 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
1507 ; CHECK-SD-LABEL: fmul_v3f64:
1508 ; CHECK-SD: // %bb.0: // %entry
1509 ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
1510 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
1511 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1512 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
1513 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
1514 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
1515 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
1516 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
1517 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
1518 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1519 ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
1520 ; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
1521 ; CHECK-SD-NEXT: ldr d3, [sp]
1522 ; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
1523 ; CHECK-SD-NEXT: fmov d0, d6
1524 ; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
1525 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
1526 ; CHECK-SD-NEXT: fmov d2, d3
1527 ; CHECK-SD-NEXT: ret
1529 ; CHECK-GI-LABEL: fmul_v3f64:
1530 ; CHECK-GI: // %bb.0: // %entry
1531 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1532 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
1533 ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
1534 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1535 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
1536 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
1537 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1538 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
1539 ; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
1540 ; CHECK-GI-NEXT: fmla v6.2d, v0.2d, v3.2d
1541 ; CHECK-GI-NEXT: ldr d0, [sp]
1542 ; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
1543 ; CHECK-GI-NEXT: mov d1, v6.d[1]
1544 ; CHECK-GI-NEXT: fmov d0, d6
1545 ; CHECK-GI-NEXT: ret
1547 %d = fmul fast <3 x double> %a, %b
1548 %e = fadd fast <3 x double> %d, %c
1552 define <4 x double> @fmul_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
1553 ; CHECK-SD-LABEL: fmul_v4f64:
1554 ; CHECK-SD: // %bb.0: // %entry
1555 ; CHECK-SD-NEXT: fmla v4.2d, v2.2d, v0.2d
1556 ; CHECK-SD-NEXT: fmla v5.2d, v3.2d, v1.2d
1557 ; CHECK-SD-NEXT: mov v0.16b, v4.16b
1558 ; CHECK-SD-NEXT: mov v1.16b, v5.16b
1559 ; CHECK-SD-NEXT: ret
1561 ; CHECK-GI-LABEL: fmul_v4f64:
1562 ; CHECK-GI: // %bb.0: // %entry
1563 ; CHECK-GI-NEXT: fmla v4.2d, v0.2d, v2.2d
1564 ; CHECK-GI-NEXT: fmla v5.2d, v1.2d, v3.2d
1565 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
1566 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
1567 ; CHECK-GI-NEXT: ret
1569 %d = fmul fast <4 x double> %a, %b
1570 %e = fadd fast <4 x double> %d, %c
1574 define <2 x float> @fmul_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
1575 ; CHECK-SD-LABEL: fmul_v2f32:
1576 ; CHECK-SD: // %bb.0: // %entry
1577 ; CHECK-SD-NEXT: fmla v2.2s, v1.2s, v0.2s
1578 ; CHECK-SD-NEXT: fmov d0, d2
1579 ; CHECK-SD-NEXT: ret
1581 ; CHECK-GI-LABEL: fmul_v2f32:
1582 ; CHECK-GI: // %bb.0: // %entry
1583 ; CHECK-GI-NEXT: fmla v2.2s, v0.2s, v1.2s
1584 ; CHECK-GI-NEXT: fmov d0, d2
1585 ; CHECK-GI-NEXT: ret
1587 %d = fmul fast <2 x float> %a, %b
1588 %e = fadd fast <2 x float> %d, %c
1592 define <3 x float> @fmul_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
1593 ; CHECK-SD-LABEL: fmul_v3f32:
1594 ; CHECK-SD: // %bb.0: // %entry
1595 ; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s
1596 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
1597 ; CHECK-SD-NEXT: ret
1599 ; CHECK-GI-LABEL: fmul_v3f32:
1600 ; CHECK-GI: // %bb.0: // %entry
1601 ; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s
1602 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1603 ; CHECK-GI-NEXT: ret
1605 %d = fmul fast <3 x float> %a, %b
1606 %e = fadd fast <3 x float> %d, %c
1610 define <4 x float> @fmul_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
1611 ; CHECK-SD-LABEL: fmul_v4f32:
1612 ; CHECK-SD: // %bb.0: // %entry
1613 ; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s
1614 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
1615 ; CHECK-SD-NEXT: ret
1617 ; CHECK-GI-LABEL: fmul_v4f32:
1618 ; CHECK-GI: // %bb.0: // %entry
1619 ; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s
1620 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1621 ; CHECK-GI-NEXT: ret
1623 %d = fmul fast <4 x float> %a, %b
1624 %e = fadd fast <4 x float> %d, %c
1628 define <8 x float> @fmul_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
1629 ; CHECK-SD-LABEL: fmul_v8f32:
1630 ; CHECK-SD: // %bb.0: // %entry
1631 ; CHECK-SD-NEXT: fmla v4.4s, v2.4s, v0.4s
1632 ; CHECK-SD-NEXT: fmla v5.4s, v3.4s, v1.4s
1633 ; CHECK-SD-NEXT: mov v0.16b, v4.16b
1634 ; CHECK-SD-NEXT: mov v1.16b, v5.16b
1635 ; CHECK-SD-NEXT: ret
1637 ; CHECK-GI-LABEL: fmul_v8f32:
1638 ; CHECK-GI: // %bb.0: // %entry
1639 ; CHECK-GI-NEXT: fmla v4.4s, v0.4s, v2.4s
1640 ; CHECK-GI-NEXT: fmla v5.4s, v1.4s, v3.4s
1641 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
1642 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
1643 ; CHECK-GI-NEXT: ret
1645 %d = fmul fast <8 x float> %a, %b
1646 %e = fadd fast <8 x float> %d, %c
1650 define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
1651 ; CHECK-SD-NOFP16-LABEL: fmul_v7f16:
1652 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1653 ; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
1654 ; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1]
1655 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h1
1656 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h0
1657 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
1658 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2]
1659 ; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[3]
1660 ; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
1661 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
1662 ; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[5]
1663 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h2
1664 ; CHECK-SD-NOFP16-NEXT: mov h22, v2.h[2]
1665 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1666 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1667 ; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5
1668 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
1669 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
1670 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
1671 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
1672 ; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3
1673 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h7
1674 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
1675 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1]
1676 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
1677 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1678 ; CHECK-SD-NOFP16-NEXT: fmul s4, s6, s4
1679 ; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[5]
1680 ; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7
1681 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h17
1682 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1683 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
1684 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h19
1685 ; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3]
1686 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1687 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
1688 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1689 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
1690 ; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s21
1691 ; CHECK-SD-NOFP16-NEXT: mov h21, v0.h[6]
1692 ; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17
1693 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
1694 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
1695 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
1696 ; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s16
1697 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h22
1698 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1699 ; CHECK-SD-NOFP16-NEXT: fmul s6, s20, s6
1700 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1701 ; CHECK-SD-NOFP16-NEXT: mov h20, v1.h[7]
1702 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s5
1703 ; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[4]
1704 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1705 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1706 ; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s16
1707 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
1708 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
1709 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h21
1710 ; CHECK-SD-NOFP16-NEXT: fadd s7, s7, s19
1711 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
1712 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1713 ; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
1714 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[5]
1715 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
1716 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
1717 ; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17
1718 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
1719 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1720 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
1721 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1722 ; CHECK-SD-NOFP16-NEXT: fadd s5, s16, s5
1723 ; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v4.h[0]
1724 ; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[6]
1725 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s18
1726 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
1727 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
1728 ; CHECK-SD-NOFP16-NEXT: fadd s3, s6, s3
1729 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
1730 ; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v7.h[0]
1731 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1732 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1733 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
1734 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
1735 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1736 ; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v5.h[0]
1737 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1738 ; CHECK-SD-NOFP16-NEXT: fadd s4, s6, s4
1739 ; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
1740 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
1741 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
1742 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1743 ; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v3.h[0]
1744 ; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
1745 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
1746 ; CHECK-SD-NOFP16-NEXT: ret
1748 ; CHECK-SD-FP16-LABEL: fmul_v7f16:
1749 ; CHECK-SD-FP16: // %bb.0: // %entry
1750 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1751 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
1752 ; CHECK-SD-FP16-NEXT: ret
1754 ; CHECK-GI-NOFP16-LABEL: fmul_v7f16:
1755 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1756 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4]
1757 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5]
1758 ; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[4]
1759 ; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[5]
1760 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v0.4h
1761 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
1762 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
1763 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
1764 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0]
1765 ; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0]
1766 ; CHECK-GI-NOFP16-NEXT: fmul v4.4s, v7.4s, v16.4s
1767 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h
1768 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v0.h[0]
1769 ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v1.h[0]
1770 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s
1771 ; CHECK-GI-NOFP16-NEXT: mov h1, v2.h[4]
1772 ; CHECK-GI-NOFP16-NEXT: mov h4, v2.h[5]
1773 ; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6]
1774 ; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v0.h[0]
1775 ; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v0.h[0]
1776 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1777 ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v4.h[0]
1778 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1779 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v5.4h
1780 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v6.4s
1781 ; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
1782 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v3.4s, v4.4s
1783 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1784 ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
1785 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
1786 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
1787 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
1788 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
1789 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1790 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1791 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[0]
1792 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
1793 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
1794 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1795 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
1796 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
1797 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
1798 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
1799 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
1800 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
1801 ; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
1802 ; CHECK-GI-NOFP16-NEXT: ret
1804 ; CHECK-GI-FP16-LABEL: fmul_v7f16:
1805 ; CHECK-GI-FP16: // %bb.0: // %entry
1806 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h
1807 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
1808 ; CHECK-GI-FP16-NEXT: ret
1810 %d = fmul fast <7 x half> %a, %b
1811 %e = fadd fast <7 x half> %d, %c
1815 define <4 x half> @fmul_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1816 ; CHECK-SD-NOFP16-LABEL: fmul_v4f16:
1817 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1818 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1819 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1820 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1821 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1822 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1823 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1824 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1825 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1826 ; CHECK-SD-NOFP16-NEXT: ret
1828 ; CHECK-SD-FP16-LABEL: fmul_v4f16:
1829 ; CHECK-SD-FP16: // %bb.0: // %entry
1830 ; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
1831 ; CHECK-SD-FP16-NEXT: fmov d0, d2
1832 ; CHECK-SD-FP16-NEXT: ret
1834 ; CHECK-GI-NOFP16-LABEL: fmul_v4f16:
1835 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1836 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1837 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1838 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1839 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1840 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1841 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1842 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1843 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1844 ; CHECK-GI-NOFP16-NEXT: ret
1846 ; CHECK-GI-FP16-LABEL: fmul_v4f16:
1847 ; CHECK-GI-FP16: // %bb.0: // %entry
1848 ; CHECK-GI-FP16-NEXT: fmla v2.4h, v0.4h, v1.4h
1849 ; CHECK-GI-FP16-NEXT: fmov d0, d2
1850 ; CHECK-GI-FP16-NEXT: ret
1852 %d = fmul fast <4 x half> %a, %b
1853 %e = fadd fast <4 x half> %d, %c
1857 define <8 x half> @fmul_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1858 ; CHECK-SD-NOFP16-LABEL: fmul_v8f16:
1859 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1860 ; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
1861 ; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[1]
1862 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h1
1863 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h0
1864 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
1865 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[2]
1866 ; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[3]
1867 ; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
1868 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
1869 ; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[5]
1870 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h2
1871 ; CHECK-SD-NOFP16-NEXT: mov h22, v2.h[2]
1872 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1873 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1874 ; CHECK-SD-NOFP16-NEXT: fmul s5, s6, s5
1875 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
1876 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
1877 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
1878 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
1879 ; CHECK-SD-NOFP16-NEXT: fmul s3, s4, s3
1880 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h7
1881 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
1882 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[1]
1883 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
1884 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1885 ; CHECK-SD-NOFP16-NEXT: fmul s4, s6, s4
1886 ; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[5]
1887 ; CHECK-SD-NOFP16-NEXT: fmul s7, s16, s7
1888 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h17
1889 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1890 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
1891 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h19
1892 ; CHECK-SD-NOFP16-NEXT: mov h19, v2.h[3]
1893 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1894 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
1895 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1896 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
1897 ; CHECK-SD-NOFP16-NEXT: fadd s5, s5, s21
1898 ; CHECK-SD-NOFP16-NEXT: mov h21, v0.h[6]
1899 ; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17
1900 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
1901 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
1902 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
1903 ; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s16
1904 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h22
1905 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1906 ; CHECK-SD-NOFP16-NEXT: fmul s6, s20, s6
1907 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
1908 ; CHECK-SD-NOFP16-NEXT: mov h20, v1.h[7]
1909 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s5
1910 ; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[4]
1911 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1912 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1913 ; CHECK-SD-NOFP16-NEXT: fadd s4, s4, s16
1914 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
1915 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
1916 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h21
1917 ; CHECK-SD-NOFP16-NEXT: fadd s7, s7, s19
1918 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
1919 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
1920 ; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
1921 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[5]
1922 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
1923 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
1924 ; CHECK-SD-NOFP16-NEXT: fmul s17, s18, s17
1925 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
1926 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
1927 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
1928 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
1929 ; CHECK-SD-NOFP16-NEXT: fadd s5, s16, s5
1930 ; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v4.h[0]
1931 ; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[6]
1932 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s18
1933 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
1934 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
1935 ; CHECK-SD-NOFP16-NEXT: fadd s3, s6, s3
1936 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
1937 ; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v7.h[0]
1938 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
1939 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1940 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
1941 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
1942 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
1943 ; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v5.h[0]
1944 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1945 ; CHECK-SD-NOFP16-NEXT: fadd s4, s6, s4
1946 ; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
1947 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
1948 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
1949 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1950 ; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v3.h[0]
1951 ; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
1952 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
1953 ; CHECK-SD-NOFP16-NEXT: ret
1955 ; CHECK-SD-FP16-LABEL: fmul_v8f16:
1956 ; CHECK-SD-FP16: // %bb.0: // %entry
1957 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1958 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
1959 ; CHECK-SD-FP16-NEXT: ret
1961 ; CHECK-GI-NOFP16-LABEL: fmul_v8f16:
1962 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1963 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
1964 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
1965 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1966 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1967 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
1968 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1969 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1970 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h
1971 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1972 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1973 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1974 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1975 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
1976 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
1977 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
1978 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
1979 ; CHECK-GI-NOFP16-NEXT: ret
1981 ; CHECK-GI-FP16-LABEL: fmul_v8f16:
1982 ; CHECK-GI-FP16: // %bb.0: // %entry
1983 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h
1984 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
1985 ; CHECK-GI-FP16-NEXT: ret
1987 %d = fmul fast <8 x half> %a, %b
1988 %e = fadd fast <8 x half> %d, %c
1992 define <16 x half> @fmul_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
1993 ; CHECK-SD-NOFP16-LABEL: fmul_v16f16:
1994 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1995 ; CHECK-SD-NOFP16-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill
1996 ; CHECK-SD-NOFP16-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
1997 ; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 32
1998 ; CHECK-SD-NOFP16-NEXT: .cfi_offset b8, -8
1999 ; CHECK-SD-NOFP16-NEXT: .cfi_offset b9, -16
2000 ; CHECK-SD-NOFP16-NEXT: .cfi_offset b10, -24
2001 ; CHECK-SD-NOFP16-NEXT: .cfi_offset b11, -32
2002 ; CHECK-SD-NOFP16-NEXT: mov h6, v3.h[7]
2003 ; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[7]
2004 ; CHECK-SD-NOFP16-NEXT: mov h20, v3.h[6]
2005 ; CHECK-SD-NOFP16-NEXT: mov h22, v1.h[6]
2006 ; CHECK-SD-NOFP16-NEXT: mov h23, v3.h[5]
2007 ; CHECK-SD-NOFP16-NEXT: mov h24, v3.h[2]
2008 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[2]
2009 ; CHECK-SD-NOFP16-NEXT: mov h17, v3.h[3]
2010 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
2011 ; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[1]
2012 ; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[1]
2013 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
2014 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
2015 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h16
2016 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h20
2017 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
2018 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h23
2019 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h24
2020 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h26
2021 ; CHECK-SD-NOFP16-NEXT: mov h26, v0.h[1]
2022 ; CHECK-SD-NOFP16-NEXT: mov h19, v3.h[4]
2023 ; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4]
2024 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h3
2025 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
2026 ; CHECK-SD-NOFP16-NEXT: fmul s6, s25, s6
2027 ; CHECK-SD-NOFP16-NEXT: mov h25, v2.h[1]
2028 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h27
2029 ; CHECK-SD-NOFP16-NEXT: fmul s3, s22, s21
2030 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h28
2031 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
2032 ; CHECK-SD-NOFP16-NEXT: fmul s23, s24, s23
2033 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
2034 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h2
2035 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h0
2036 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
2037 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
2038 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h25
2039 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
2040 ; CHECK-SD-NOFP16-NEXT: fmul s26, s1, s29
2041 ; CHECK-SD-NOFP16-NEXT: fmul s27, s28, s27
2042 ; CHECK-SD-NOFP16-NEXT: mov h28, v2.h[7]
2043 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
2044 ; CHECK-SD-NOFP16-NEXT: fmul s1, s18, s17
2045 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s23
2046 ; CHECK-SD-NOFP16-NEXT: mov h29, v4.h[1]
2047 ; CHECK-SD-NOFP16-NEXT: fmul s21, s22, s21
2048 ; CHECK-SD-NOFP16-NEXT: fmul s16, s16, s19
2049 ; CHECK-SD-NOFP16-NEXT: mov h8, v2.h[6]
2050 ; CHECK-SD-NOFP16-NEXT: fmul s23, s25, s24
2051 ; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[2]
2052 ; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[2]
2053 ; CHECK-SD-NOFP16-NEXT: fmul s7, s7, s20
2054 ; CHECK-SD-NOFP16-NEXT: fcvt h18, s26
2055 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s27
2056 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h28
2057 ; CHECK-SD-NOFP16-NEXT: mov h26, v2.h[3]
2058 ; CHECK-SD-NOFP16-NEXT: mov h27, v0.h[3]
2059 ; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
2060 ; CHECK-SD-NOFP16-NEXT: mov h20, v0.h[7]
2061 ; CHECK-SD-NOFP16-NEXT: mov h11, v2.h[4]
2062 ; CHECK-SD-NOFP16-NEXT: fcvt h28, s23
2063 ; CHECK-SD-NOFP16-NEXT: fcvt s30, h24
2064 ; CHECK-SD-NOFP16-NEXT: fcvt s31, h25
2065 ; CHECK-SD-NOFP16-NEXT: mov h24, v0.h[4]
2066 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h29
2067 ; CHECK-SD-NOFP16-NEXT: mov h9, v0.h[6]
2068 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
2069 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
2070 ; CHECK-SD-NOFP16-NEXT: mov h10, v2.h[5]
2071 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
2072 ; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5]
2073 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h11
2074 ; CHECK-SD-NOFP16-NEXT: fmul s27, s31, s30
2075 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h28
2076 ; CHECK-SD-NOFP16-NEXT: fcvt s30, h21
2077 ; CHECK-SD-NOFP16-NEXT: fcvt s31, h4
2078 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h8
2079 ; CHECK-SD-NOFP16-NEXT: mov h8, v5.h[1]
2080 ; CHECK-SD-NOFP16-NEXT: fmul s25, s26, s25
2081 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
2082 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
2083 ; CHECK-SD-NOFP16-NEXT: fmul s2, s20, s22
2084 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h9
2085 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h10
2086 ; CHECK-SD-NOFP16-NEXT: fadd s26, s28, s29
2087 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h23
2088 ; CHECK-SD-NOFP16-NEXT: fcvt h27, s27
2089 ; CHECK-SD-NOFP16-NEXT: fadd s28, s30, s31
2090 ; CHECK-SD-NOFP16-NEXT: mov h29, v4.h[2]
2091 ; CHECK-SD-NOFP16-NEXT: mov h30, v5.h[2]
2092 ; CHECK-SD-NOFP16-NEXT: fmul s24, s24, s0
2093 ; CHECK-SD-NOFP16-NEXT: fcvt s31, h8
2094 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
2095 ; CHECK-SD-NOFP16-NEXT: fmul s21, s22, s21
2096 ; CHECK-SD-NOFP16-NEXT: fcvt s8, h5
2097 ; CHECK-SD-NOFP16-NEXT: fcvt h25, s25
2098 ; CHECK-SD-NOFP16-NEXT: fmul s20, s23, s20
2099 ; CHECK-SD-NOFP16-NEXT: fcvt h26, s26
2100 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h27
2101 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s28
2102 ; CHECK-SD-NOFP16-NEXT: mov h28, v4.h[3]
2103 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h29
2104 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
2105 ; CHECK-SD-NOFP16-NEXT: fcvt s30, h30
2106 ; CHECK-SD-NOFP16-NEXT: fadd s19, s19, s31
2107 ; CHECK-SD-NOFP16-NEXT: fadd s18, s18, s8
2108 ; CHECK-SD-NOFP16-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
2109 ; CHECK-SD-NOFP16-NEXT: fcvt h22, s1
2110 ; CHECK-SD-NOFP16-NEXT: mov h23, v5.h[3]
2111 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
2112 ; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v26.h[0]
2113 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h28
2114 ; CHECK-SD-NOFP16-NEXT: fadd s27, s27, s29
2115 ; CHECK-SD-NOFP16-NEXT: fcvt h24, s24
2116 ; CHECK-SD-NOFP16-NEXT: fadd s17, s17, s30
2117 ; CHECK-SD-NOFP16-NEXT: mov h28, v4.h[4]
2118 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
2119 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s18
2120 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
2121 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h22
2122 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h23
2123 ; CHECK-SD-NOFP16-NEXT: fcvt h20, s20
2124 ; CHECK-SD-NOFP16-NEXT: fadd s23, s25, s26
2125 ; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[4]
2126 ; CHECK-SD-NOFP16-NEXT: fcvt h26, s27
2127 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
2128 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
2129 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
2130 ; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v19.h[0]
2131 ; CHECK-SD-NOFP16-NEXT: mov h19, v4.h[5]
2132 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s17
2133 ; CHECK-SD-NOFP16-NEXT: fadd s18, s18, s22
2134 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
2135 ; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
2136 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h25
2137 ; CHECK-SD-NOFP16-NEXT: mov h25, v5.h[5]
2138 ; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v26.h[0]
2139 ; CHECK-SD-NOFP16-NEXT: fcvt h23, s23
2140 ; CHECK-SD-NOFP16-NEXT: fadd s24, s24, s27
2141 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
2142 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
2143 ; CHECK-SD-NOFP16-NEXT: mov h26, v4.h[6]
2144 ; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v17.h[0]
2145 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s18
2146 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
2147 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
2148 ; CHECK-SD-NOFP16-NEXT: fadd s16, s16, s22
2149 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h25
2150 ; CHECK-SD-NOFP16-NEXT: mov h22, v5.h[6]
2151 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
2152 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
2153 ; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v23.h[0]
2154 ; CHECK-SD-NOFP16-NEXT: fcvt h23, s24
2155 ; CHECK-SD-NOFP16-NEXT: fadd s19, s20, s19
2156 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h21
2157 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h26
2158 ; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7]
2159 ; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v17.h[0]
2160 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
2161 ; CHECK-SD-NOFP16-NEXT: fadd s7, s7, s18
2162 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
2163 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h22
2164 ; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7]
2165 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
2166 ; CHECK-SD-NOFP16-NEXT: mov v0.h[4], v23.h[0]
2167 ; CHECK-SD-NOFP16-NEXT: fcvt h18, s19
2168 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
2169 ; CHECK-SD-NOFP16-NEXT: fadd s19, s20, s21
2170 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
2171 ; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v16.h[0]
2172 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
2173 ; CHECK-SD-NOFP16-NEXT: fadd s3, s3, s17
2174 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
2175 ; CHECK-SD-NOFP16-NEXT: mov v0.h[5], v18.h[0]
2176 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s19
2177 ; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s4
2178 ; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v7.h[0]
2179 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
2180 ; CHECK-SD-NOFP16-NEXT: fadd s4, s6, s5
2181 ; CHECK-SD-NOFP16-NEXT: mov v0.h[6], v16.h[0]
2182 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
2183 ; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v3.h[0]
2184 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
2185 ; CHECK-SD-NOFP16-NEXT: mov v0.h[7], v2.h[0]
2186 ; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v3.h[0]
2187 ; CHECK-SD-NOFP16-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload
2188 ; CHECK-SD-NOFP16-NEXT: ret
2190 ; CHECK-SD-FP16-LABEL: fmul_v16f16:
2191 ; CHECK-SD-FP16: // %bb.0: // %entry
2192 ; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
2193 ; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
2194 ; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
2195 ; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
2196 ; CHECK-SD-FP16-NEXT: ret
2198 ; CHECK-GI-NOFP16-LABEL: fmul_v16f16:
2199 ; CHECK-GI-NOFP16: // %bb.0: // %entry
2200 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
2201 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h
2202 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
2203 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
2204 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
2205 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
2206 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
2207 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
2208 ; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s
2209 ; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s
2210 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
2211 ; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
2212 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s
2213 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h
2214 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
2215 ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s
2216 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h
2217 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
2218 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
2219 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
2220 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
2221 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
2222 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
2223 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
2224 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s
2225 ; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s
2226 ; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s
2227 ; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s
2228 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
2229 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
2230 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
2231 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
2232 ; CHECK-GI-NOFP16-NEXT: ret
2234 ; CHECK-GI-FP16-LABEL: fmul_v16f16:
2235 ; CHECK-GI-FP16: // %bb.0: // %entry
2236 ; CHECK-GI-FP16-NEXT: fmla v4.8h, v0.8h, v2.8h
2237 ; CHECK-GI-FP16-NEXT: fmla v5.8h, v1.8h, v3.8h
2238 ; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
2239 ; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
2240 ; CHECK-GI-FP16-NEXT: ret
2242 %d = fmul fast <16 x half> %a, %b
2243 %e = fadd fast <16 x half> %d, %c
2247 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
2248 declare <16 x half> @llvm.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>)
2249 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
2250 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
2251 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
2252 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
2253 declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>)
2254 declare <3 x double> @llvm.fmuladd.v3f64(<3 x double>, <3 x double>, <3 x double>)
2255 declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>)
2256 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>)
2257 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
2258 declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)
2259 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2260 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
2261 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
2262 declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>)
2263 declare <7 x half> @llvm.fma.v7f16(<7 x half>, <7 x half>, <7 x half>)
2264 declare <7 x half> @llvm.fmuladd.v7f16(<7 x half>, <7 x half>, <7 x half>)
2265 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
2266 declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
2267 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
2268 declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
2269 declare double @llvm.fma.f64(double, double, double)
2270 declare double @llvm.fmuladd.f64(double, double, double)
2271 declare float @llvm.fma.f32(float, float, float)
2272 declare float @llvm.fmuladd.f32(float, float, float)
2273 declare half @llvm.fma.f16(half, half, half)
2274 declare half @llvm.fmuladd.f16(half, half, half)