1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
3 ; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
5 ; Check generated fp16 fused MAC and MLS.
7 define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
8 ; CHECK-LABEL: fusedMACTest2:
10 ; CHECK-NEXT: vldr.16 s0, [r1]
11 ; CHECK-NEXT: vldr.16 s2, [r0]
12 ; CHECK-NEXT: vldr.16 s4, [r2]
13 ; CHECK-NEXT: vfma.f16 s4, s2, s0
14 ; CHECK-NEXT: vstr.16 s4, [r0]
17 ; DONT-FUSE-LABEL: fusedMACTest2:
19 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
20 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
21 ; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
22 ; DONT-FUSE-NEXT: vldr.16 s2, [r2]
23 ; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2
24 ; DONT-FUSE-NEXT: vstr.16 s0, [r0]
25 ; DONT-FUSE-NEXT: bx lr
27 %f1 = load half, half *%a1, align 2
28 %f2 = load half, half *%a2, align 2
29 %f3 = load half, half *%a3, align 2
30 %1 = fmul half %f1, %f2
31 %2 = fadd half %1, %f3
32 store half %2, half *%a1, align 2
36 define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
37 ; CHECK-LABEL: fusedMACTest4:
39 ; CHECK-NEXT: vldr.16 s0, [r2]
40 ; CHECK-NEXT: vldr.16 s2, [r1]
41 ; CHECK-NEXT: vldr.16 s4, [r0]
42 ; CHECK-NEXT: vfms.f16 s4, s2, s0
43 ; CHECK-NEXT: vstr.16 s4, [r0]
46 ; DONT-FUSE-LABEL: fusedMACTest4:
48 ; DONT-FUSE-NEXT: vldr.16 s0, [r2]
49 ; DONT-FUSE-NEXT: vldr.16 s2, [r1]
50 ; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
51 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
52 ; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0
53 ; DONT-FUSE-NEXT: vstr.16 s0, [r0]
54 ; DONT-FUSE-NEXT: bx lr
56 %f1 = load half, half *%a1, align 2
57 %f2 = load half, half *%a2, align 2
58 %f3 = load half, half *%a3, align 2
59 %1 = fmul half %f2, %f3
60 %2 = fsub half %f1, %1
61 store half %2, half *%a1, align 2
65 define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
66 ; CHECK-LABEL: fusedMACTest6:
68 ; CHECK-NEXT: vldr.16 s0, [r1]
69 ; CHECK-NEXT: vldr.16 s2, [r0]
70 ; CHECK-NEXT: vldr.16 s4, [r2]
71 ; CHECK-NEXT: vfnma.f16 s4, s2, s0
72 ; CHECK-NEXT: vstr.16 s4, [r0]
75 ; DONT-FUSE-LABEL: fusedMACTest6:
77 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
78 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
79 ; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0
80 ; DONT-FUSE-NEXT: vldr.16 s2, [r2]
81 ; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
82 ; DONT-FUSE-NEXT: vstr.16 s0, [r0]
83 ; DONT-FUSE-NEXT: bx lr
85 %f1 = load half, half *%a1, align 2
86 %f2 = load half, half *%a2, align 2
87 %f3 = load half, half *%a3, align 2
88 %1 = fmul half %f1, %f2
89 %2 = fsub half -0.0, %1
90 %3 = fsub half %2, %f3
91 store half %3, half *%a1, align 2
95 define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
96 ; CHECK-LABEL: fusedMACTest8:
98 ; CHECK-NEXT: vldr.16 s0, [r1]
99 ; CHECK-NEXT: vldr.16 s2, [r0]
100 ; CHECK-NEXT: vldr.16 s4, [r2]
101 ; CHECK-NEXT: vfnms.f16 s4, s2, s0
102 ; CHECK-NEXT: vstr.16 s4, [r0]
105 ; DONT-FUSE-LABEL: fusedMACTest8:
106 ; DONT-FUSE: @ %bb.0:
107 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
108 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
109 ; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
110 ; DONT-FUSE-NEXT: vldr.16 s2, [r2]
111 ; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
112 ; DONT-FUSE-NEXT: vstr.16 s0, [r0]
113 ; DONT-FUSE-NEXT: bx lr
115 %f1 = load half, half *%a1, align 2
116 %f2 = load half, half *%a2, align 2
117 %f3 = load half, half *%a3, align 2
118 %1 = fmul half %f1, %f2
119 %2 = fsub half %1, %f3
120 store half %2, half *%a1, align 2
124 define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
125 ; CHECK-LABEL: test_fma_f16:
126 ; CHECK: @ %bb.0: @ %entry
127 ; CHECK-NEXT: vldr.16 s0, [r1]
128 ; CHECK-NEXT: vldr.16 s2, [r0]
129 ; CHECK-NEXT: vldr.16 s4, [r2]
130 ; CHECK-NEXT: vfma.f16 s4, s2, s0
131 ; CHECK-NEXT: vstr.16 s4, [r0]
134 ; DONT-FUSE-LABEL: test_fma_f16:
135 ; DONT-FUSE: @ %bb.0: @ %entry
136 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
137 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
138 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
139 ; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
140 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
141 ; DONT-FUSE-NEXT: bx lr
143 %a = load half, half *%aa, align 2
144 %b = load half, half *%bb, align 2
145 %c = load half, half *%cc, align 2
146 %tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
147 store half %tmp1, half *%aa, align 2
151 define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
152 ; CHECK-LABEL: test_fnms_f16:
154 ; CHECK-NEXT: vldr.16 s0, [r1]
155 ; CHECK-NEXT: vldr.16 s2, [r0]
156 ; CHECK-NEXT: vldr.16 s4, [r2]
157 ; CHECK-NEXT: vfma.f16 s4, s2, s0
158 ; CHECK-NEXT: vstr.16 s4, [r0]
161 ; DONT-FUSE-LABEL: test_fnms_f16:
162 ; DONT-FUSE: @ %bb.0:
163 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
164 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
165 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
166 ; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
167 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
168 ; DONT-FUSE-NEXT: bx lr
170 %a = load half, half *%aa, align 2
171 %b = load half, half *%bb, align 2
172 %c = load half, half *%cc, align 2
173 %tmp2 = fsub half -0.0, %c
174 %tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
175 store half %tmp3, half *%aa, align 2
179 define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
180 ; CHECK-LABEL: test_fma_const_fold:
182 ; CHECK-NEXT: vldr.16 s0, [r1]
183 ; CHECK-NEXT: vldr.16 s2, [r0]
184 ; CHECK-NEXT: vadd.f16 s0, s2, s0
185 ; CHECK-NEXT: vstr.16 s0, [r0]
188 ; DONT-FUSE-LABEL: test_fma_const_fold:
189 ; DONT-FUSE: @ %bb.0:
190 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
191 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
192 ; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0
193 ; DONT-FUSE-NEXT: vstr.16 s0, [r0]
194 ; DONT-FUSE-NEXT: bx lr
196 %a = load half, half *%aa, align 2
197 %b = load half, half *%bb, align 2
198 %ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
199 store half %ret, half *%aa, align 2
203 define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
204 ; CHECK-LABEL: test_fma_canonicalize:
206 ; CHECK-NEXT: vldr.16 s0, [r0]
207 ; CHECK-NEXT: vldr.16 s2, [r1]
208 ; CHECK-NEXT: vmov.f16 s4, #2.000000e+00
209 ; CHECK-NEXT: vfma.f16 s2, s0, s4
210 ; CHECK-NEXT: vstr.16 s2, [r0]
213 ; DONT-FUSE-LABEL: test_fma_canonicalize:
214 ; DONT-FUSE: @ %bb.0:
215 ; DONT-FUSE-NEXT: vldr.16 s0, [r0]
216 ; DONT-FUSE-NEXT: vldr.16 s2, [r1]
217 ; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00
218 ; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
219 ; DONT-FUSE-NEXT: vstr.16 s2, [r0]
220 ; DONT-FUSE-NEXT: bx lr
222 %a = load half, half *%aa, align 2
223 %b = load half, half *%bb, align 2
224 %ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
225 store half %ret, half *%aa, align 2
229 define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
232 ; CHECK-NEXT: vldr.16 s0, [r1]
233 ; CHECK-NEXT: vldr.16 s2, [r0]
234 ; CHECK-NEXT: vldr.16 s4, [r2]
235 ; CHECK-NEXT: vfms.f16 s4, s2, s0
236 ; CHECK-NEXT: vstr.16 s4, [r0]
239 ; DONT-FUSE-LABEL: fms1:
240 ; DONT-FUSE: @ %bb.0:
241 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
242 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
243 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
244 ; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
245 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
246 ; DONT-FUSE-NEXT: bx lr
248 %f1 = load half, half *%a1, align 2
249 %f2 = load half, half *%a2, align 2
250 %f3 = load half, half *%a3, align 2
251 %s = fsub half -0.0, %f1
252 %ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
253 store half %ret, half *%a1, align 2
257 define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
260 ; CHECK-NEXT: vldr.16 s0, [r1]
261 ; CHECK-NEXT: vldr.16 s2, [r0]
262 ; CHECK-NEXT: vldr.16 s4, [r2]
263 ; CHECK-NEXT: vfms.f16 s4, s2, s0
264 ; CHECK-NEXT: vstr.16 s4, [r0]
267 ; DONT-FUSE-LABEL: fms2:
268 ; DONT-FUSE: @ %bb.0:
269 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
270 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
271 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
272 ; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
273 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
274 ; DONT-FUSE-NEXT: bx lr
276 %f1 = load half, half *%a1, align 2
277 %f2 = load half, half *%a2, align 2
278 %f3 = load half, half *%a3, align 2
279 %s = fsub half -0.0, %f1
280 %ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
281 store half %ret, half *%a1, align 2
285 define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
286 ; CHECK-LABEL: fnma1:
288 ; CHECK-NEXT: vldr.16 s0, [r1]
289 ; CHECK-NEXT: vldr.16 s2, [r0]
290 ; CHECK-NEXT: vldr.16 s4, [r2]
291 ; CHECK-NEXT: vfnma.f16 s4, s2, s0
292 ; CHECK-NEXT: vstr.16 s4, [r0]
295 ; DONT-FUSE-LABEL: fnma1:
296 ; DONT-FUSE: @ %bb.0:
297 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
298 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
299 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
300 ; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
301 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
302 ; DONT-FUSE-NEXT: bx lr
304 %f1 = load half, half *%a1, align 2
305 %f2 = load half, half *%a2, align 2
306 %f3 = load half, half *%a3, align 2
307 %fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
308 %n1 = fsub half -0.0, %fma
309 store half %n1, half *%a1, align 2
313 define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
314 ; CHECK-LABEL: fnma2:
316 ; CHECK-NEXT: vldr.16 s0, [r1]
317 ; CHECK-NEXT: vldr.16 s2, [r0]
318 ; CHECK-NEXT: vldr.16 s4, [r2]
319 ; CHECK-NEXT: vfnma.f16 s4, s2, s0
320 ; CHECK-NEXT: vstr.16 s4, [r0]
323 ; DONT-FUSE-LABEL: fnma2:
324 ; DONT-FUSE: @ %bb.0:
325 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
326 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
327 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
328 ; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
329 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
330 ; DONT-FUSE-NEXT: bx lr
332 %f1 = load half, half *%a1, align 2
333 %f2 = load half, half *%a2, align 2
334 %f3 = load half, half *%a3, align 2
335 %n1 = fsub half -0.0, %f1
336 %n3 = fsub half -0.0, %f3
337 %ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
338 store half %ret, half *%a1, align 2
342 define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
343 ; CHECK-LABEL: fnms1:
345 ; CHECK-NEXT: vldr.16 s0, [r1]
346 ; CHECK-NEXT: vldr.16 s2, [r0]
347 ; CHECK-NEXT: vldr.16 s4, [r2]
348 ; CHECK-NEXT: vfnms.f16 s4, s2, s0
349 ; CHECK-NEXT: vstr.16 s4, [r0]
352 ; DONT-FUSE-LABEL: fnms1:
353 ; DONT-FUSE: @ %bb.0:
354 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
355 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
356 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
357 ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
358 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
359 ; DONT-FUSE-NEXT: bx lr
361 %f1 = load half, half *%a1, align 2
362 %f2 = load half, half *%a2, align 2
363 %f3 = load half, half *%a3, align 2
364 %n3 = fsub half -0.0, %f3
365 %ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
366 store half %ret, half *%a1, align 2
370 define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
371 ; CHECK-LABEL: fnms2:
373 ; CHECK-NEXT: vldr.16 s0, [r1]
374 ; CHECK-NEXT: vldr.16 s2, [r0]
375 ; CHECK-NEXT: vldr.16 s4, [r2]
376 ; CHECK-NEXT: vfnms.f16 s4, s2, s0
377 ; CHECK-NEXT: vstr.16 s4, [r0]
380 ; DONT-FUSE-LABEL: fnms2:
381 ; DONT-FUSE: @ %bb.0:
382 ; DONT-FUSE-NEXT: vldr.16 s0, [r1]
383 ; DONT-FUSE-NEXT: vldr.16 s2, [r0]
384 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
385 ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
386 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
387 ; DONT-FUSE-NEXT: bx lr
389 %f1 = load half, half *%a1, align 2
390 %f2 = load half, half *%a2, align 2
391 %f3 = load half, half *%a3, align 2
392 %n1 = fsub half -0.0, %f1
393 %fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
394 %n = fsub half -0.0, %fma
395 store half %n, half *%a1, align 2
399 define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
400 ; CHECK-LABEL: fnms3:
402 ; CHECK-NEXT: vldr.16 s0, [r0]
403 ; CHECK-NEXT: vldr.16 s2, [r1]
404 ; CHECK-NEXT: vldr.16 s4, [r2]
405 ; CHECK-NEXT: vfnms.f16 s4, s2, s0
406 ; CHECK-NEXT: vstr.16 s4, [r0]
409 ; DONT-FUSE-LABEL: fnms3:
410 ; DONT-FUSE: @ %bb.0:
411 ; DONT-FUSE-NEXT: vldr.16 s0, [r0]
412 ; DONT-FUSE-NEXT: vldr.16 s2, [r1]
413 ; DONT-FUSE-NEXT: vldr.16 s4, [r2]
414 ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
415 ; DONT-FUSE-NEXT: vstr.16 s4, [r0]
416 ; DONT-FUSE-NEXT: bx lr
418 %f1 = load half, half *%a1, align 2
419 %f2 = load half, half *%a2, align 2
420 %f3 = load half, half *%a3, align 2
421 %n2 = fsub half -0.0, %f2
422 %fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
423 %n1 = fsub half -0.0, %fma
424 store half %n1, half *%a1, align 2
429 declare half @llvm.fma.f16(half, half, half) nounwind readnone