1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define double @fma_f64(double %a, double %b, double %c) {
8 ; CHECK-LABEL: fma_f64:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: fmadd d0, d0, d1, d2
13 %d = call double @llvm.fma.f64(double %a, double %b, double %c)
17 define float @fma_f32(float %a, float %b, float %c) {
18 ; CHECK-LABEL: fma_f32:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: fmadd s0, s0, s1, s2
23 %d = call float @llvm.fma.f32(float %a, float %b, float %c)
27 define half @fma_f16(half %a, half %b, half %c) {
28 ; CHECK-SD-NOFP16-LABEL: fma_f16:
29 ; CHECK-SD-NOFP16: // %bb.0: // %entry
30 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
31 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
32 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
33 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
34 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
35 ; CHECK-SD-NOFP16-NEXT: ret
37 ; CHECK-SD-FP16-LABEL: fma_f16:
38 ; CHECK-SD-FP16: // %bb.0: // %entry
39 ; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
40 ; CHECK-SD-FP16-NEXT: ret
42 ; CHECK-GI-NOFP16-LABEL: fma_f16:
43 ; CHECK-GI-NOFP16: // %bb.0: // %entry
44 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
45 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
46 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
47 ; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2
48 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
49 ; CHECK-GI-NOFP16-NEXT: ret
51 ; CHECK-GI-FP16-LABEL: fma_f16:
52 ; CHECK-GI-FP16: // %bb.0: // %entry
53 ; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
54 ; CHECK-GI-FP16-NEXT: ret
56 %d = call half @llvm.fma.f16(half %a, half %b, half %c)
60 define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
61 ; CHECK-LABEL: fma_v2f64:
62 ; CHECK: // %bb.0: // %entry
63 ; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
64 ; CHECK-NEXT: mov v0.16b, v2.16b
67 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
71 define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
72 ; CHECK-SD-LABEL: fma_v3f64:
73 ; CHECK-SD: // %bb.0: // %entry
74 ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
75 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
76 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
77 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
78 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
79 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
80 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
81 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
82 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
83 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
84 ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
85 ; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
86 ; CHECK-SD-NEXT: ldr d3, [sp]
87 ; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
88 ; CHECK-SD-NEXT: fmov d0, d6
89 ; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
90 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
91 ; CHECK-SD-NEXT: fmov d2, d3
94 ; CHECK-GI-LABEL: fma_v3f64:
95 ; CHECK-GI: // %bb.0: // %entry
96 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
97 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
98 ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
99 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
100 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
101 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
102 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
103 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
104 ; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
105 ; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d
106 ; CHECK-GI-NEXT: ldr d0, [sp]
107 ; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
108 ; CHECK-GI-NEXT: mov d1, v6.d[1]
109 ; CHECK-GI-NEXT: fmov d0, d6
112 %d = call <3 x double> @llvm.fma.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
116 define <4 x double> @fma_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
117 ; CHECK-LABEL: fma_v4f64:
118 ; CHECK: // %bb.0: // %entry
119 ; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d
120 ; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d
121 ; CHECK-NEXT: mov v0.16b, v4.16b
122 ; CHECK-NEXT: mov v1.16b, v5.16b
125 %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
129 define <2 x float> @fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
130 ; CHECK-LABEL: fma_v2f32:
131 ; CHECK: // %bb.0: // %entry
132 ; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s
133 ; CHECK-NEXT: fmov d0, d2
136 %d = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
140 define <3 x float> @fma_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
141 ; CHECK-LABEL: fma_v3f32:
142 ; CHECK: // %bb.0: // %entry
143 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
144 ; CHECK-NEXT: mov v0.16b, v2.16b
147 %d = call <3 x float> @llvm.fma.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
151 define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
152 ; CHECK-LABEL: fma_v4f32:
153 ; CHECK: // %bb.0: // %entry
154 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
155 ; CHECK-NEXT: mov v0.16b, v2.16b
158 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
162 define <8 x float> @fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
163 ; CHECK-LABEL: fma_v8f32:
164 ; CHECK: // %bb.0: // %entry
165 ; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s
166 ; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s
167 ; CHECK-NEXT: mov v0.16b, v4.16b
168 ; CHECK-NEXT: mov v1.16b, v5.16b
171 %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
175 define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
176 ; CHECK-SD-NOFP16-LABEL: fma_v7f16:
177 ; CHECK-SD-NOFP16: // %bb.0: // %entry
178 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
179 ; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
180 ; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
181 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
182 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
183 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
184 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
185 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
186 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
187 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
188 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
189 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
190 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
191 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
192 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
193 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
194 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
195 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
196 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
197 ; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
198 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
199 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
200 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
201 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
202 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
203 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
204 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
205 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
206 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
207 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
208 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
209 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
210 ; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
211 ; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
212 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
213 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
214 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
215 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
216 ; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
217 ; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
218 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
219 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
220 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
221 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
222 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
223 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
224 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
225 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
226 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
227 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
228 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
229 ; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
230 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
231 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
232 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
233 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
234 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
235 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
236 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
237 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
238 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
239 ; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
240 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
241 ; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
242 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
243 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
244 ; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
245 ; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
246 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
247 ; CHECK-SD-NOFP16-NEXT: ret
249 ; CHECK-SD-FP16-LABEL: fma_v7f16:
250 ; CHECK-SD-FP16: // %bb.0: // %entry
251 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
252 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
253 ; CHECK-SD-FP16-NEXT: ret
255 ; CHECK-GI-NOFP16-LABEL: fma_v7f16:
256 ; CHECK-GI-NOFP16: // %bb.0: // %entry
257 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
258 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
259 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v2.4h
260 ; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
261 ; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
262 ; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[4]
263 ; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
264 ; CHECK-GI-NOFP16-NEXT: mov h18, v2.h[4]
265 ; CHECK-GI-NOFP16-NEXT: mov h19, v2.h[5]
266 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
267 ; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[6]
268 ; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v4.4s, v3.4s
269 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
270 ; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v7.h[0]
271 ; CHECK-GI-NOFP16-NEXT: mov v16.h[1], v17.h[0]
272 ; CHECK-GI-NOFP16-NEXT: mov v18.h[1], v19.h[0]
273 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v5.4s
274 ; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v3.h[0]
275 ; CHECK-GI-NOFP16-NEXT: mov v16.h[2], v1.h[0]
276 ; CHECK-GI-NOFP16-NEXT: mov v18.h[2], v2.h[0]
277 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
278 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[2]
279 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v6.4h
280 ; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[3]
281 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v16.4h
282 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v18.4h
283 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
284 ; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v3.4s, v2.4s
285 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v5.h[0]
286 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v4.4s
287 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v6.h[0]
288 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
289 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
290 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
291 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
292 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
293 ; CHECK-GI-NOFP16-NEXT: ret
295 ; CHECK-GI-FP16-LABEL: fma_v7f16:
296 ; CHECK-GI-FP16: // %bb.0: // %entry
297 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
298 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
299 ; CHECK-GI-FP16-NEXT: ret
301 %d = call <7 x half> @llvm.fma.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c)
305 define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
306 ; CHECK-SD-NOFP16-LABEL: fma_v4f16:
307 ; CHECK-SD-NOFP16: // %bb.0: // %entry
308 ; CHECK-SD-NOFP16-NEXT: // kill: def $d2 killed $d2 def $q2
309 ; CHECK-SD-NOFP16-NEXT: // kill: def $d1 killed $d1 def $q1
310 ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
311 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
312 ; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
313 ; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
314 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
315 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
316 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
317 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
318 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
319 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
320 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3]
321 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3]
322 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
323 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
324 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
325 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
326 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[3]
327 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
328 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
329 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
330 ; CHECK-SD-NOFP16-NEXT: fmadd s3, s5, s4, s3
331 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h17
332 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h18
333 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s6
334 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s7, s5, s4
335 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
336 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h16
337 ; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v3.h[0]
338 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
339 ; CHECK-SD-NOFP16-NEXT: fmadd s1, s5, s1, s2
340 ; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v3.h[0]
341 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
342 ; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
343 ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
344 ; CHECK-SD-NOFP16-NEXT: ret
346 ; CHECK-SD-FP16-LABEL: fma_v4f16:
347 ; CHECK-SD-FP16: // %bb.0: // %entry
348 ; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
349 ; CHECK-SD-FP16-NEXT: fmov d0, d2
350 ; CHECK-SD-FP16-NEXT: ret
352 ; CHECK-GI-NOFP16-LABEL: fma_v4f16:
353 ; CHECK-GI-NOFP16: // %bb.0: // %entry
354 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
355 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
356 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
357 ; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s
358 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
359 ; CHECK-GI-NOFP16-NEXT: ret
361 ; CHECK-GI-FP16-LABEL: fma_v4f16:
362 ; CHECK-GI-FP16: // %bb.0: // %entry
363 ; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
364 ; CHECK-GI-FP16-NEXT: fmov d0, d2
365 ; CHECK-GI-FP16-NEXT: ret
367 %d = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
371 define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
372 ; CHECK-SD-NOFP16-LABEL: fma_v8f16:
373 ; CHECK-SD-NOFP16: // %bb.0: // %entry
374 ; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
375 ; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
376 ; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
377 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
378 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
379 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
380 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
381 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
382 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
383 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
384 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
385 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
386 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
387 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
388 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
389 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
390 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
391 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
392 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
393 ; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
394 ; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
395 ; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
396 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
397 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
398 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
399 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
400 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
401 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
402 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
403 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
404 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
405 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
406 ; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
407 ; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
408 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
409 ; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
410 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
411 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
412 ; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
413 ; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
414 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
415 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
416 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
417 ; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
418 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
419 ; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
420 ; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
421 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
422 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
423 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
424 ; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
425 ; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
426 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
427 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
428 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
429 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
430 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
431 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
432 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
433 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
434 ; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
435 ; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
436 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
437 ; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
438 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
439 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
440 ; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
441 ; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
442 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
443 ; CHECK-SD-NOFP16-NEXT: ret
445 ; CHECK-SD-FP16-LABEL: fma_v8f16:
446 ; CHECK-SD-FP16: // %bb.0: // %entry
447 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
448 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
449 ; CHECK-SD-FP16-NEXT: ret
451 ; CHECK-GI-NOFP16-LABEL: fma_v8f16:
452 ; CHECK-GI-NOFP16: // %bb.0: // %entry
453 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
454 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
455 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v2.4h
456 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
457 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
458 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
459 ; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v4.4s, v3.4s
460 ; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s
461 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v5.4s
462 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
463 ; CHECK-GI-NOFP16-NEXT: ret
465 ; CHECK-GI-FP16-LABEL: fma_v8f16:
466 ; CHECK-GI-FP16: // %bb.0: // %entry
467 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
468 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
469 ; CHECK-GI-FP16-NEXT: ret
471 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
475 define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
476 ; CHECK-SD-NOFP16-LABEL: fma_v16f16:
477 ; CHECK-SD-NOFP16: // %bb.0: // %entry
478 ; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[1]
479 ; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1]
480 ; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[1]
481 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h4
482 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h2
483 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h0
484 ; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[2]
485 ; CHECK-SD-NOFP16-NEXT: mov h21, v2.h[2]
486 ; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2]
487 ; CHECK-SD-NOFP16-NEXT: mov h23, v4.h[3]
488 ; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3]
489 ; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[3]
490 ; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
491 ; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
492 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
493 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
494 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[1]
495 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h5
496 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
497 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h21
498 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
499 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h23
500 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h24
501 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h25
502 ; CHECK-SD-NOFP16-NEXT: fmadd s7, s16, s7, s6
503 ; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[1]
504 ; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[1]
505 ; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
506 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h3
507 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h1
508 ; CHECK-SD-NOFP16-NEXT: fmadd s19, s20, s19, s18
509 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h26
510 ; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[4]
511 ; CHECK-SD-NOFP16-NEXT: fmadd s21, s23, s22, s21
512 ; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[2]
513 ; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2]
514 ; CHECK-SD-NOFP16-NEXT: fcvt h20, s7
515 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
516 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
517 ; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
518 ; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4]
519 ; CHECK-SD-NOFP16-NEXT: mov h7, v4.h[5]
520 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
521 ; CHECK-SD-NOFP16-NEXT: mov h30, v2.h[5]
522 ; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
523 ; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
524 ; CHECK-SD-NOFP16-NEXT: mov h31, v1.h[4]
525 ; CHECK-SD-NOFP16-NEXT: fmadd s24, s26, s25, s24
526 ; CHECK-SD-NOFP16-NEXT: fmadd s25, s29, s28, s27
527 ; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v20.h[0]
528 ; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[2]
529 ; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[3]
530 ; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[3]
531 ; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[3]
532 ; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
533 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
534 ; CHECK-SD-NOFP16-NEXT: fcvt s29, h7
535 ; CHECK-SD-NOFP16-NEXT: fcvt s30, h30
536 ; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v19.h[0]
537 ; CHECK-SD-NOFP16-NEXT: fcvt h24, s24
538 ; CHECK-SD-NOFP16-NEXT: fcvt h7, s25
539 ; CHECK-SD-NOFP16-NEXT: fcvt s19, h20
540 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
541 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h23
542 ; CHECK-SD-NOFP16-NEXT: fmadd s16, s18, s17, s16
543 ; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5]
544 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
545 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
546 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
547 ; CHECK-SD-NOFP16-NEXT: mov h18, v4.h[6]
548 ; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v21.h[0]
549 ; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v24.h[0]
550 ; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[5]
551 ; CHECK-SD-NOFP16-NEXT: fmadd s19, s22, s20, s19
552 ; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[4]
553 ; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[4]
554 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h23
555 ; CHECK-SD-NOFP16-NEXT: mov h28, v0.h[6]
556 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
557 ; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
558 ; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7]
559 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
560 ; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
561 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h22
562 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h31
563 ; CHECK-SD-NOFP16-NEXT: fmadd s17, s23, s30, s29
564 ; CHECK-SD-NOFP16-NEXT: fmadd s23, s27, s26, s25
565 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
566 ; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[5]
567 ; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[5]
568 ; CHECK-SD-NOFP16-NEXT: mov h27, v2.h[6]
569 ; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[6]
570 ; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
571 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
572 ; CHECK-SD-NOFP16-NEXT: fmadd s20, s22, s21, s20
573 ; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[6]
574 ; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[6]
575 ; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v19.h[0]
576 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s23
577 ; CHECK-SD-NOFP16-NEXT: fcvt s23, h24
578 ; CHECK-SD-NOFP16-NEXT: fcvt s24, h25
579 ; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
580 ; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
581 ; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
582 ; CHECK-SD-NOFP16-NEXT: fcvt s28, h29
583 ; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7]
584 ; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
585 ; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
586 ; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7]
587 ; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v19.h[0]
588 ; CHECK-SD-NOFP16-NEXT: fcvt h19, s20
589 ; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0]
590 ; CHECK-SD-NOFP16-NEXT: fmadd s20, s25, s24, s23
591 ; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
592 ; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
593 ; CHECK-SD-NOFP16-NEXT: fmadd s18, s27, s26, s18
594 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
595 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
596 ; CHECK-SD-NOFP16-NEXT: fmadd s21, s28, s22, s21
597 ; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
598 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
599 ; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v19.h[0]
600 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
601 ; CHECK-SD-NOFP16-NEXT: fcvt h17, s20
602 ; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v16.h[0]
603 ; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s2, s4
604 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s18
605 ; CHECK-SD-NOFP16-NEXT: fcvt h4, s21
606 ; CHECK-SD-NOFP16-NEXT: fmadd s1, s1, s3, s5
607 ; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v17.h[0]
608 ; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0]
609 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
610 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
611 ; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v4.h[0]
612 ; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0]
613 ; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0]
614 ; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
615 ; CHECK-SD-NOFP16-NEXT: mov v1.16b, v7.16b
616 ; CHECK-SD-NOFP16-NEXT: ret
618 ; CHECK-SD-FP16-LABEL: fma_v16f16:
619 ; CHECK-SD-FP16: // %bb.0: // %entry
620 ; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
621 ; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
622 ; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
623 ; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
624 ; CHECK-SD-FP16-NEXT: ret
626 ; CHECK-GI-NOFP16-LABEL: fma_v16f16:
627 ; CHECK-GI-NOFP16: // %bb.0: // %entry
628 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
629 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h
630 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v2.4h
631 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
632 ; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v4.4h
633 ; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v5.4h
634 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
635 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
636 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
637 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
638 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
639 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
640 ; CHECK-GI-NOFP16-NEXT: fmla v18.4s, v16.4s, v6.4s
641 ; CHECK-GI-NOFP16-NEXT: fmla v19.4s, v17.4s, v7.4s
642 ; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v2.4s, v0.4s
643 ; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v3.4s, v1.4s
644 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v18.4s
645 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v19.4s
646 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
647 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
648 ; CHECK-GI-NOFP16-NEXT: ret
650 ; CHECK-GI-FP16-LABEL: fma_v16f16:
651 ; CHECK-GI-FP16: // %bb.0: // %entry
652 ; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
653 ; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
654 ; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
655 ; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
656 ; CHECK-GI-FP16-NEXT: ret
658 %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
662 define double @fmuladd_f64(double %a, double %b, double %c) {
663 ; CHECK-LABEL: fmuladd_f64:
664 ; CHECK: // %bb.0: // %entry
665 ; CHECK-NEXT: fmadd d0, d0, d1, d2
668 %d = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
672 define float @fmuladd_f32(float %a, float %b, float %c) {
673 ; CHECK-LABEL: fmuladd_f32:
674 ; CHECK: // %bb.0: // %entry
675 ; CHECK-NEXT: fmadd s0, s0, s1, s2
678 %d = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
682 define half @fmuladd_f16(half %a, half %b, half %c) {
683 ; CHECK-SD-NOFP16-LABEL: fmuladd_f16:
684 ; CHECK-SD-NOFP16: // %bb.0: // %entry
685 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
686 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
687 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
688 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
689 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
690 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
691 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
692 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
693 ; CHECK-SD-NOFP16-NEXT: ret
695 ; CHECK-SD-FP16-LABEL: fmuladd_f16:
696 ; CHECK-SD-FP16: // %bb.0: // %entry
697 ; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
698 ; CHECK-SD-FP16-NEXT: ret
700 ; CHECK-GI-NOFP16-LABEL: fmuladd_f16:
701 ; CHECK-GI-NOFP16: // %bb.0: // %entry
702 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
703 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
704 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
705 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
706 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
707 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
708 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
709 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
710 ; CHECK-GI-NOFP16-NEXT: ret
712 ; CHECK-GI-FP16-LABEL: fmuladd_f16:
713 ; CHECK-GI-FP16: // %bb.0: // %entry
714 ; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
715 ; CHECK-GI-FP16-NEXT: ret
717 %d = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
721 define <2 x double> @fmuladd_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
722 ; CHECK-LABEL: fmuladd_v2f64:
723 ; CHECK: // %bb.0: // %entry
724 ; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
725 ; CHECK-NEXT: mov v0.16b, v2.16b
728 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
732 define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
733 ; CHECK-SD-LABEL: fmuladd_v3f64:
734 ; CHECK-SD: // %bb.0: // %entry
735 ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
736 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
737 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
738 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
739 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
740 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
741 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
742 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
743 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
744 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
745 ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
746 ; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
747 ; CHECK-SD-NEXT: ldr d3, [sp]
748 ; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
749 ; CHECK-SD-NEXT: fmov d0, d6
750 ; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
751 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
752 ; CHECK-SD-NEXT: fmov d2, d3
755 ; CHECK-GI-LABEL: fmuladd_v3f64:
756 ; CHECK-GI: // %bb.0: // %entry
757 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
758 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
759 ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
760 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
761 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
762 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
763 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
764 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
765 ; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
766 ; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d
767 ; CHECK-GI-NEXT: ldr d0, [sp]
768 ; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
769 ; CHECK-GI-NEXT: mov d1, v6.d[1]
770 ; CHECK-GI-NEXT: fmov d0, d6
773 %d = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
777 define <4 x double> @fmuladd_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
778 ; CHECK-LABEL: fmuladd_v4f64:
779 ; CHECK: // %bb.0: // %entry
780 ; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d
781 ; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d
782 ; CHECK-NEXT: mov v0.16b, v4.16b
783 ; CHECK-NEXT: mov v1.16b, v5.16b
786 %d = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
790 define <2 x float> @fmuladd_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
791 ; CHECK-LABEL: fmuladd_v2f32:
792 ; CHECK: // %bb.0: // %entry
793 ; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s
794 ; CHECK-NEXT: fmov d0, d2
797 %d = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
801 define <3 x float> @fmuladd_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
802 ; CHECK-LABEL: fmuladd_v3f32:
803 ; CHECK: // %bb.0: // %entry
804 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
805 ; CHECK-NEXT: mov v0.16b, v2.16b
808 %d = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
812 define <4 x float> @fmuladd_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
813 ; CHECK-LABEL: fmuladd_v4f32:
814 ; CHECK: // %bb.0: // %entry
815 ; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
816 ; CHECK-NEXT: mov v0.16b, v2.16b
819 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
823 define <8 x float> @fmuladd_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
824 ; CHECK-LABEL: fmuladd_v8f32:
825 ; CHECK: // %bb.0: // %entry
826 ; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s
827 ; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s
828 ; CHECK-NEXT: mov v0.16b, v4.16b
829 ; CHECK-NEXT: mov v1.16b, v5.16b
832 %d = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
836 define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
837 ; CHECK-SD-NOFP16-LABEL: fmuladd_v7f16:
838 ; CHECK-SD-NOFP16: // %bb.0: // %entry
839 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
840 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
841 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
842 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
843 ; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
844 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
845 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
846 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
847 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
848 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
849 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
850 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
851 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
852 ; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
853 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
854 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
855 ; CHECK-SD-NOFP16-NEXT: ret
857 ; CHECK-SD-FP16-LABEL: fmuladd_v7f16:
858 ; CHECK-SD-FP16: // %bb.0: // %entry
859 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
860 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
861 ; CHECK-SD-FP16-NEXT: ret
863 ; CHECK-GI-NOFP16-LABEL: fmuladd_v7f16:
864 ; CHECK-GI-NOFP16: // %bb.0: // %entry
865 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
866 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
867 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
868 ; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[5]
869 ; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[4]
870 ; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[5]
871 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
872 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
873 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
874 ; CHECK-GI-NOFP16-NEXT: mov h4, v2.h[5]
875 ; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0]
876 ; CHECK-GI-NOFP16-NEXT: mov v7.h[1], v16.h[0]
877 ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
878 ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v0.h[0]
879 ; CHECK-GI-NOFP16-NEXT: mov v7.h[2], v1.h[0]
880 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
881 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
882 ; CHECK-GI-NOFP16-NEXT: mov h3, v2.h[4]
883 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v5.4h
884 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v7.4h
885 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
886 ; CHECK-GI-NOFP16-NEXT: mov h1, v2.h[6]
887 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0]
888 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v5.4s, v6.4s
889 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
890 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[0]
891 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
892 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
893 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
894 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
895 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
896 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
897 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
898 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v3.4s
899 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
900 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
901 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
902 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
903 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
904 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
905 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
906 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
907 ; CHECK-GI-NOFP16-NEXT: ret
909 ; CHECK-GI-FP16-LABEL: fmuladd_v7f16:
910 ; CHECK-GI-FP16: // %bb.0: // %entry
911 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
912 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
913 ; CHECK-GI-FP16-NEXT: ret
915 %d = call <7 x half> @llvm.fmuladd.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c)
919 define <4 x half> @fmuladd_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
920 ; CHECK-SD-NOFP16-LABEL: fmuladd_v4f16:
921 ; CHECK-SD-NOFP16: // %bb.0: // %entry
922 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
923 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
924 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
925 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
926 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
927 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
928 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
929 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
930 ; CHECK-SD-NOFP16-NEXT: ret
932 ; CHECK-SD-FP16-LABEL: fmuladd_v4f16:
933 ; CHECK-SD-FP16: // %bb.0: // %entry
934 ; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
935 ; CHECK-SD-FP16-NEXT: fmov d0, d2
936 ; CHECK-SD-FP16-NEXT: ret
938 ; CHECK-GI-NOFP16-LABEL: fmuladd_v4f16:
939 ; CHECK-GI-NOFP16: // %bb.0: // %entry
940 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
941 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
942 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
943 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
944 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
945 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
946 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
947 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
948 ; CHECK-GI-NOFP16-NEXT: ret
950 ; CHECK-GI-FP16-LABEL: fmuladd_v4f16:
951 ; CHECK-GI-FP16: // %bb.0: // %entry
952 ; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
953 ; CHECK-GI-FP16-NEXT: fmov d0, d2
954 ; CHECK-GI-FP16-NEXT: ret
956 %d = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
960 define <8 x half> @fmuladd_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
961 ; CHECK-SD-NOFP16-LABEL: fmuladd_v8f16:
962 ; CHECK-SD-NOFP16: // %bb.0: // %entry
963 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
964 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
965 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
966 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
967 ; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
968 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
969 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
970 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
971 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
972 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
973 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
974 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
975 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
976 ; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
977 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
978 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
979 ; CHECK-SD-NOFP16-NEXT: ret
981 ; CHECK-SD-FP16-LABEL: fmuladd_v8f16:
982 ; CHECK-SD-FP16: // %bb.0: // %entry
983 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
984 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
985 ; CHECK-SD-FP16-NEXT: ret
987 ; CHECK-GI-NOFP16-LABEL: fmuladd_v8f16:
988 ; CHECK-GI-NOFP16: // %bb.0: // %entry
989 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
990 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
991 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
992 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
993 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
994 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
995 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
996 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h
997 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
998 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
999 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1000 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1001 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
1002 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
1003 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
1004 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
1005 ; CHECK-GI-NOFP16-NEXT: ret
1007 ; CHECK-GI-FP16-LABEL: fmuladd_v8f16:
1008 ; CHECK-GI-FP16: // %bb.0: // %entry
1009 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1010 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
1011 ; CHECK-GI-FP16-NEXT: ret
1013 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
1017 define <16 x half> @fmuladd_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
1018 ; CHECK-SD-NOFP16-LABEL: fmuladd_v16f16:
1019 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1020 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v2.4h
1021 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v0.4h
1022 ; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v3.4h
1023 ; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v1.4h
1024 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1025 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1026 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1027 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1028 ; CHECK-SD-NOFP16-NEXT: fmul v6.4s, v7.4s, v6.4s
1029 ; CHECK-SD-NOFP16-NEXT: fmul v7.4s, v17.4s, v16.4s
1030 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
1031 ; CHECK-SD-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
1032 ; CHECK-SD-NOFP16-NEXT: fcvtn v2.4h, v6.4s
1033 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v5.4h
1034 ; CHECK-SD-NOFP16-NEXT: fcvtn v3.4h, v7.4s
1035 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v2.8h, v0.4s
1036 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v4.4h
1037 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
1038 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v3.8h, v1.4s
1039 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1040 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1041 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v3.4h
1042 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
1043 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v3.8h
1044 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v5.8h
1045 ; CHECK-SD-NOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
1046 ; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v2.4s, v4.4s
1047 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1048 ; CHECK-SD-NOFP16-NEXT: fadd v3.4s, v1.4s, v3.4s
1049 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
1050 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
1051 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
1052 ; CHECK-SD-NOFP16-NEXT: ret
1054 ; CHECK-SD-FP16-LABEL: fmuladd_v16f16:
1055 ; CHECK-SD-FP16: // %bb.0: // %entry
1056 ; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
1057 ; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
1058 ; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
1059 ; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
1060 ; CHECK-SD-FP16-NEXT: ret
1062 ; CHECK-GI-NOFP16-LABEL: fmuladd_v16f16:
1063 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1064 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1065 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h
1066 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
1067 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
1068 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1069 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1070 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1071 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1072 ; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s
1073 ; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s
1074 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
1075 ; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
1076 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s
1077 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h
1078 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
1079 ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s
1080 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h
1081 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
1082 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1083 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1084 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1085 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1086 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1087 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1088 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s
1089 ; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s
1090 ; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s
1091 ; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s
1092 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
1093 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1094 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
1095 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
1096 ; CHECK-GI-NOFP16-NEXT: ret
1098 ; CHECK-GI-FP16-LABEL: fmuladd_v16f16:
1099 ; CHECK-GI-FP16: // %bb.0: // %entry
1100 ; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
1101 ; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
1102 ; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
1103 ; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
1104 ; CHECK-GI-FP16-NEXT: ret
1106 %d = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
1110 define double @fmul_f64(double %a, double %b, double %c) {
1111 ; CHECK-LABEL: fmul_f64:
1112 ; CHECK: // %bb.0: // %entry
1113 ; CHECK-NEXT: fmadd d0, d0, d1, d2
1116 %d = fmul fast double %a, %b
1117 %e = fadd fast double %d, %c
1121 define float @fmul_f32(float %a, float %b, float %c) {
1122 ; CHECK-LABEL: fmul_f32:
1123 ; CHECK: // %bb.0: // %entry
1124 ; CHECK-NEXT: fmadd s0, s0, s1, s2
1127 %d = fmul fast float %a, %b
1128 %e = fadd fast float %d, %c
1132 define half @fmul_f16(half %a, half %b, half %c) {
1133 ; CHECK-SD-NOFP16-LABEL: fmul_f16:
1134 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1135 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
1136 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1137 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
1138 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
1139 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1140 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
1141 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
1142 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
1143 ; CHECK-SD-NOFP16-NEXT: ret
1145 ; CHECK-SD-FP16-LABEL: fmul_f16:
1146 ; CHECK-SD-FP16: // %bb.0: // %entry
1147 ; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
1148 ; CHECK-SD-FP16-NEXT: ret
1150 ; CHECK-GI-NOFP16-LABEL: fmul_f16:
1151 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1152 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
1153 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
1154 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
1155 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
1156 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
1157 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
1158 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
1159 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
1160 ; CHECK-GI-NOFP16-NEXT: ret
1162 ; CHECK-GI-FP16-LABEL: fmul_f16:
1163 ; CHECK-GI-FP16: // %bb.0: // %entry
1164 ; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
1165 ; CHECK-GI-FP16-NEXT: ret
1167 %d = fmul fast half %a, %b
1168 %e = fadd fast half %d, %c
1172 define <2 x double> @fmul_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
1173 ; CHECK-SD-LABEL: fmul_v2f64:
1174 ; CHECK-SD: // %bb.0: // %entry
1175 ; CHECK-SD-NEXT: fmla v2.2d, v1.2d, v0.2d
1176 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
1177 ; CHECK-SD-NEXT: ret
1179 ; CHECK-GI-LABEL: fmul_v2f64:
1180 ; CHECK-GI: // %bb.0: // %entry
1181 ; CHECK-GI-NEXT: fmla v2.2d, v0.2d, v1.2d
1182 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1183 ; CHECK-GI-NEXT: ret
1185 %d = fmul fast <2 x double> %a, %b
1186 %e = fadd fast <2 x double> %d, %c
1190 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
1191 ; CHECK-SD-LABEL: fmul_v3f64:
1192 ; CHECK-SD: // %bb.0: // %entry
1193 ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
1194 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
1195 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1196 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
1197 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
1198 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
1199 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
1200 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
1201 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
1202 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1203 ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
1204 ; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
1205 ; CHECK-SD-NEXT: ldr d3, [sp]
1206 ; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
1207 ; CHECK-SD-NEXT: fmov d0, d6
1208 ; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
1209 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
1210 ; CHECK-SD-NEXT: fmov d2, d3
1211 ; CHECK-SD-NEXT: ret
1213 ; CHECK-GI-LABEL: fmul_v3f64:
1214 ; CHECK-GI: // %bb.0: // %entry
1215 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1216 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
1217 ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
1218 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1219 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
1220 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
1221 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1222 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
1223 ; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
1224 ; CHECK-GI-NEXT: fmla v6.2d, v0.2d, v3.2d
1225 ; CHECK-GI-NEXT: ldr d0, [sp]
1226 ; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
1227 ; CHECK-GI-NEXT: mov d1, v6.d[1]
1228 ; CHECK-GI-NEXT: fmov d0, d6
1229 ; CHECK-GI-NEXT: ret
1231 %d = fmul fast <3 x double> %a, %b
1232 %e = fadd fast <3 x double> %d, %c
1236 define <4 x double> @fmul_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
1237 ; CHECK-SD-LABEL: fmul_v4f64:
1238 ; CHECK-SD: // %bb.0: // %entry
1239 ; CHECK-SD-NEXT: fmla v4.2d, v2.2d, v0.2d
1240 ; CHECK-SD-NEXT: fmla v5.2d, v3.2d, v1.2d
1241 ; CHECK-SD-NEXT: mov v0.16b, v4.16b
1242 ; CHECK-SD-NEXT: mov v1.16b, v5.16b
1243 ; CHECK-SD-NEXT: ret
1245 ; CHECK-GI-LABEL: fmul_v4f64:
1246 ; CHECK-GI: // %bb.0: // %entry
1247 ; CHECK-GI-NEXT: fmla v4.2d, v0.2d, v2.2d
1248 ; CHECK-GI-NEXT: fmla v5.2d, v1.2d, v3.2d
1249 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
1250 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
1251 ; CHECK-GI-NEXT: ret
1253 %d = fmul fast <4 x double> %a, %b
1254 %e = fadd fast <4 x double> %d, %c
1258 define <2 x float> @fmul_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
1259 ; CHECK-SD-LABEL: fmul_v2f32:
1260 ; CHECK-SD: // %bb.0: // %entry
1261 ; CHECK-SD-NEXT: fmla v2.2s, v1.2s, v0.2s
1262 ; CHECK-SD-NEXT: fmov d0, d2
1263 ; CHECK-SD-NEXT: ret
1265 ; CHECK-GI-LABEL: fmul_v2f32:
1266 ; CHECK-GI: // %bb.0: // %entry
1267 ; CHECK-GI-NEXT: fmla v2.2s, v0.2s, v1.2s
1268 ; CHECK-GI-NEXT: fmov d0, d2
1269 ; CHECK-GI-NEXT: ret
1271 %d = fmul fast <2 x float> %a, %b
1272 %e = fadd fast <2 x float> %d, %c
1276 define <3 x float> @fmul_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
1277 ; CHECK-SD-LABEL: fmul_v3f32:
1278 ; CHECK-SD: // %bb.0: // %entry
1279 ; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s
1280 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
1281 ; CHECK-SD-NEXT: ret
1283 ; CHECK-GI-LABEL: fmul_v3f32:
1284 ; CHECK-GI: // %bb.0: // %entry
1285 ; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s
1286 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1287 ; CHECK-GI-NEXT: ret
1289 %d = fmul fast <3 x float> %a, %b
1290 %e = fadd fast <3 x float> %d, %c
1294 define <4 x float> @fmul_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
1295 ; CHECK-SD-LABEL: fmul_v4f32:
1296 ; CHECK-SD: // %bb.0: // %entry
1297 ; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s
1298 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
1299 ; CHECK-SD-NEXT: ret
1301 ; CHECK-GI-LABEL: fmul_v4f32:
1302 ; CHECK-GI: // %bb.0: // %entry
1303 ; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s
1304 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1305 ; CHECK-GI-NEXT: ret
1307 %d = fmul fast <4 x float> %a, %b
1308 %e = fadd fast <4 x float> %d, %c
1312 define <8 x float> @fmul_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
1313 ; CHECK-SD-LABEL: fmul_v8f32:
1314 ; CHECK-SD: // %bb.0: // %entry
1315 ; CHECK-SD-NEXT: fmla v4.4s, v2.4s, v0.4s
1316 ; CHECK-SD-NEXT: fmla v5.4s, v3.4s, v1.4s
1317 ; CHECK-SD-NEXT: mov v0.16b, v4.16b
1318 ; CHECK-SD-NEXT: mov v1.16b, v5.16b
1319 ; CHECK-SD-NEXT: ret
1321 ; CHECK-GI-LABEL: fmul_v8f32:
1322 ; CHECK-GI: // %bb.0: // %entry
1323 ; CHECK-GI-NEXT: fmla v4.4s, v0.4s, v2.4s
1324 ; CHECK-GI-NEXT: fmla v5.4s, v1.4s, v3.4s
1325 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
1326 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
1327 ; CHECK-GI-NEXT: ret
1329 %d = fmul fast <8 x float> %a, %b
1330 %e = fadd fast <8 x float> %d, %c
1334 define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
1335 ; CHECK-SD-NOFP16-LABEL: fmul_v7f16:
1336 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1337 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
1338 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
1339 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1340 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1341 ; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
1342 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1343 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1344 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
1345 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1346 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
1347 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
1348 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1349 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
1350 ; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
1351 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1352 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
1353 ; CHECK-SD-NOFP16-NEXT: ret
1355 ; CHECK-SD-FP16-LABEL: fmul_v7f16:
1356 ; CHECK-SD-FP16: // %bb.0: // %entry
1357 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1358 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
1359 ; CHECK-SD-FP16-NEXT: ret
1361 ; CHECK-GI-NOFP16-LABEL: fmul_v7f16:
1362 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1363 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
1364 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
1365 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
1366 ; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[5]
1367 ; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[4]
1368 ; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[5]
1369 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
1370 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
1371 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
1372 ; CHECK-GI-NOFP16-NEXT: mov h4, v2.h[5]
1373 ; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0]
1374 ; CHECK-GI-NOFP16-NEXT: mov v7.h[1], v16.h[0]
1375 ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
1376 ; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v0.h[0]
1377 ; CHECK-GI-NOFP16-NEXT: mov v7.h[2], v1.h[0]
1378 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1379 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
1380 ; CHECK-GI-NOFP16-NEXT: mov h3, v2.h[4]
1381 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v5.4h
1382 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v7.4h
1383 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1384 ; CHECK-GI-NOFP16-NEXT: mov h1, v2.h[6]
1385 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v4.h[0]
1386 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v5.4s, v6.4s
1387 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1388 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[0]
1389 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
1390 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
1391 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1392 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
1393 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1394 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
1395 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
1396 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v3.4s
1397 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
1398 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1399 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
1400 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
1401 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
1402 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
1403 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
1404 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
1405 ; CHECK-GI-NOFP16-NEXT: ret
1407 ; CHECK-GI-FP16-LABEL: fmul_v7f16:
1408 ; CHECK-GI-FP16: // %bb.0: // %entry
1409 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h
1410 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
1411 ; CHECK-GI-FP16-NEXT: ret
1413 %d = fmul fast <7 x half> %a, %b
1414 %e = fadd fast <7 x half> %d, %c
1418 define <4 x half> @fmul_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1419 ; CHECK-SD-NOFP16-LABEL: fmul_v4f16:
1420 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1421 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1422 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1423 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1424 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1425 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1426 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1427 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1428 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1429 ; CHECK-SD-NOFP16-NEXT: ret
1431 ; CHECK-SD-FP16-LABEL: fmul_v4f16:
1432 ; CHECK-SD-FP16: // %bb.0: // %entry
1433 ; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
1434 ; CHECK-SD-FP16-NEXT: fmov d0, d2
1435 ; CHECK-SD-FP16-NEXT: ret
1437 ; CHECK-GI-NOFP16-LABEL: fmul_v4f16:
1438 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1439 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1440 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1441 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1442 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1443 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1444 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1445 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1446 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1447 ; CHECK-GI-NOFP16-NEXT: ret
1449 ; CHECK-GI-FP16-LABEL: fmul_v4f16:
1450 ; CHECK-GI-FP16: // %bb.0: // %entry
1451 ; CHECK-GI-FP16-NEXT: fmla v2.4h, v0.4h, v1.4h
1452 ; CHECK-GI-FP16-NEXT: fmov d0, d2
1453 ; CHECK-GI-FP16-NEXT: ret
1455 %d = fmul fast <4 x half> %a, %b
1456 %e = fadd fast <4 x half> %d, %c
1460 define <8 x half> @fmul_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1461 ; CHECK-SD-NOFP16-LABEL: fmul_v8f16:
1462 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1463 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
1464 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
1465 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1466 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1467 ; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
1468 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1469 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1470 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
1471 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1472 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
1473 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
1474 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1475 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
1476 ; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
1477 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1478 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
1479 ; CHECK-SD-NOFP16-NEXT: ret
1481 ; CHECK-SD-FP16-LABEL: fmul_v8f16:
1482 ; CHECK-SD-FP16: // %bb.0: // %entry
1483 ; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
1484 ; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
1485 ; CHECK-SD-FP16-NEXT: ret
1487 ; CHECK-GI-NOFP16-LABEL: fmul_v8f16:
1488 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1489 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
1490 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
1491 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1492 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1493 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
1494 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
1495 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1496 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h
1497 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1498 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1499 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1500 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1501 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
1502 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
1503 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
1504 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
1505 ; CHECK-GI-NOFP16-NEXT: ret
1507 ; CHECK-GI-FP16-LABEL: fmul_v8f16:
1508 ; CHECK-GI-FP16: // %bb.0: // %entry
1509 ; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h
1510 ; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
1511 ; CHECK-GI-FP16-NEXT: ret
1513 %d = fmul fast <8 x half> %a, %b
1514 %e = fadd fast <8 x half> %d, %c
1518 define <16 x half> @fmul_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
1519 ; CHECK-SD-NOFP16-LABEL: fmul_v16f16:
1520 ; CHECK-SD-NOFP16: // %bb.0: // %entry
1521 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v2.4h
1522 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v0.4h
1523 ; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v3.4h
1524 ; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v1.4h
1525 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1526 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1527 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1528 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1529 ; CHECK-SD-NOFP16-NEXT: fmul v6.4s, v7.4s, v6.4s
1530 ; CHECK-SD-NOFP16-NEXT: fmul v7.4s, v17.4s, v16.4s
1531 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
1532 ; CHECK-SD-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
1533 ; CHECK-SD-NOFP16-NEXT: fcvtn v2.4h, v6.4s
1534 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v5.4h
1535 ; CHECK-SD-NOFP16-NEXT: fcvtn v3.4h, v7.4s
1536 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v2.8h, v0.4s
1537 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v4.4h
1538 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
1539 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v3.8h, v1.4s
1540 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
1541 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1542 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v3.4h
1543 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
1544 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v3.8h
1545 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v5.8h
1546 ; CHECK-SD-NOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
1547 ; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v2.4s, v4.4s
1548 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1549 ; CHECK-SD-NOFP16-NEXT: fadd v3.4s, v1.4s, v3.4s
1550 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
1551 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
1552 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
1553 ; CHECK-SD-NOFP16-NEXT: ret
1555 ; CHECK-SD-FP16-LABEL: fmul_v16f16:
1556 ; CHECK-SD-FP16: // %bb.0: // %entry
1557 ; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
1558 ; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
1559 ; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
1560 ; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
1561 ; CHECK-SD-FP16-NEXT: ret
1563 ; CHECK-GI-NOFP16-LABEL: fmul_v16f16:
1564 ; CHECK-GI-NOFP16: // %bb.0: // %entry
1565 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1566 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h
1567 ; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
1568 ; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
1569 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1570 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1571 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1572 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1573 ; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s
1574 ; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s
1575 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
1576 ; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
1577 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s
1578 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h
1579 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
1580 ; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s
1581 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h
1582 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
1583 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
1584 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1585 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1586 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1587 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1588 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1589 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s
1590 ; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s
1591 ; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s
1592 ; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s
1593 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
1594 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
1595 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
1596 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
1597 ; CHECK-GI-NOFP16-NEXT: ret
1599 ; CHECK-GI-FP16-LABEL: fmul_v16f16:
1600 ; CHECK-GI-FP16: // %bb.0: // %entry
1601 ; CHECK-GI-FP16-NEXT: fmla v4.8h, v0.8h, v2.8h
1602 ; CHECK-GI-FP16-NEXT: fmla v5.8h, v1.8h, v3.8h
1603 ; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
1604 ; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
1605 ; CHECK-GI-FP16-NEXT: ret
1607 %d = fmul fast <16 x half> %a, %b
1608 %e = fadd fast <16 x half> %d, %c
1612 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
1613 declare <16 x half> @llvm.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>)
1614 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
1615 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
1616 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
1617 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
1618 declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>)
1619 declare <3 x double> @llvm.fmuladd.v3f64(<3 x double>, <3 x double>, <3 x double>)
1620 declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>)
1621 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>)
1622 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
1623 declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)
1624 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
1625 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
1626 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
1627 declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>)
1628 declare <7 x half> @llvm.fma.v7f16(<7 x half>, <7 x half>, <7 x half>)
1629 declare <7 x half> @llvm.fmuladd.v7f16(<7 x half>, <7 x half>, <7 x half>)
1630 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
1631 declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
1632 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1633 declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
1634 declare double @llvm.fma.f64(double, double, double)
1635 declare double @llvm.fmuladd.f64(double, double, double)
1636 declare float @llvm.fma.f32(float, float, float)
1637 declare float @llvm.fmuladd.f32(float, float, float)
1638 declare half @llvm.fma.f16(half, half, half)
1639 declare half @llvm.fmuladd.f16(half, half, half)