1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 attributes #0 = { strictfp }
6 declare float @llvm.fma.f32(float, float, float)
7 declare double @llvm.fma.f64(double, double, double)
8 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
9 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
11 define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) {
12 ; CHECK-LABEL: test_fmla_ss4S_0:
14 ; CHECK-NEXT: fmadd s0, s1, s2, s0
16 %tmp1 = extractelement <4 x float> %v, i32 0
17 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
21 define float @test_fmla_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
22 ; CHECK-LABEL: test_fmla_ss4S_0_swap:
24 ; CHECK-NEXT: fmadd s0, s2, s1, s0
26 %tmp1 = extractelement <4 x float> %v, i32 0
27 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
31 define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) {
32 ; CHECK-LABEL: test_fmla_ss4S_3:
34 ; CHECK-NEXT: fmla s0, s1, v2.s[3]
36 %tmp1 = extractelement <4 x float> %v, i32 3
37 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
41 define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
42 ; CHECK-LABEL: test_fmla_ss4S_3_swap:
44 ; CHECK-NEXT: fmla s0, s0, v2.s[3]
46 %tmp1 = extractelement <4 x float> %v, i32 3
47 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
51 define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
52 ; CHECK-LABEL: test_fmla_ss2S_0:
54 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
55 ; CHECK-NEXT: fmadd s0, s1, s2, s0
57 %tmp1 = extractelement <2 x float> %v, i32 0
58 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
62 define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
63 ; CHECK-LABEL: test_fmla_ss2S_0_swap:
65 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
66 ; CHECK-NEXT: fmadd s0, s2, s1, s0
68 %tmp1 = extractelement <2 x float> %v, i32 0
69 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
73 define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
74 ; CHECK-LABEL: test_fmla_ss2S_1:
76 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
77 ; CHECK-NEXT: fmla s0, s1, v2.s[1]
79 %tmp1 = extractelement <2 x float> %v, i32 1
80 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
84 define float @test_fmla_ss4S_3_ext0(float %a, <4 x float> %v) {
85 ; CHECK-LABEL: test_fmla_ss4S_3_ext0:
87 ; CHECK-NEXT: fmla s0, s1, v1.s[3]
89 %tmp0 = extractelement <4 x float> %v, i32 0
90 %tmp1 = extractelement <4 x float> %v, i32 3
91 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
95 define float @test_fmla_ss4S_3_ext0_swp(float %a, <4 x float> %v) {
96 ; CHECK-LABEL: test_fmla_ss4S_3_ext0_swp:
98 ; CHECK-NEXT: fmla s0, s1, v1.s[3]
100 %tmp0 = extractelement <4 x float> %v, i32 0
101 %tmp1 = extractelement <4 x float> %v, i32 3
102 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %tmp0, float %a)
106 define float @test_fmla_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) {
107 ; CHECK-LABEL: test_fmla_ss4S_0_ext0:
109 ; CHECK-NEXT: fmadd s0, s1, s2, s0
111 %tmp0 = extractelement <4 x float> %v, i32 0
112 %tmp1 = extractelement <4 x float> %w, i32 0
113 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
117 define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) {
118 ; CHECK-LABEL: test_fmla_ss2S_3_ext0:
120 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
121 ; CHECK-NEXT: fmla s0, s1, v1.s[1]
123 %tmp0 = extractelement <2 x float> %v, i32 0
124 %tmp1 = extractelement <2 x float> %v, i32 1
125 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
129 define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) {
130 ; CHECK-LABEL: test_fmla_ss2S_3_ext0_swp:
132 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
133 ; CHECK-NEXT: fmla s0, s1, v1.s[1]
135 %tmp0 = extractelement <2 x float> %v, i32 0
136 %tmp1 = extractelement <2 x float> %v, i32 1
137 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %tmp0, float %a)
141 define float @test_fmla_ss2S_0_ext0(float %a, <2 x float> %v, <2 x float> %w) {
142 ; CHECK-LABEL: test_fmla_ss2S_0_ext0:
144 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
145 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
146 ; CHECK-NEXT: fmadd s0, s1, s2, s0
148 %tmp0 = extractelement <2 x float> %v, i32 0
149 %tmp1 = extractelement <2 x float> %w, i32 0
150 %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
154 define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) {
155 ; CHECK-LABEL: test_fmla_ddD_0:
157 ; CHECK-NEXT: fmadd d0, d1, d2, d0
159 %tmp1 = extractelement <1 x double> %v, i32 0
160 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
164 define double @test_fmla_ddD_0_swap(double %a, double %b, <1 x double> %v) {
165 ; CHECK-LABEL: test_fmla_ddD_0_swap:
167 ; CHECK-NEXT: fmadd d0, d2, d1, d0
169 %tmp1 = extractelement <1 x double> %v, i32 0
170 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
174 define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) {
175 ; CHECK-LABEL: test_fmla_dd2D_0:
177 ; CHECK-NEXT: fmadd d0, d1, d2, d0
179 %tmp1 = extractelement <2 x double> %v, i32 0
180 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
184 define double @test_fmla_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
185 ; CHECK-LABEL: test_fmla_dd2D_0_swap:
187 ; CHECK-NEXT: fmadd d0, d2, d1, d0
189 %tmp1 = extractelement <2 x double> %v, i32 0
190 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
194 define double @test_fmla_dd2D_1(double %a, double %b, <2 x double> %v) {
195 ; CHECK-LABEL: test_fmla_dd2D_1:
197 ; CHECK-NEXT: fmla d0, d1, v2.d[1]
199 %tmp1 = extractelement <2 x double> %v, i32 1
200 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
204 define double @test_fmla_dd2D_1_swap(double %a, double %b, <2 x double> %v) {
205 ; CHECK-LABEL: test_fmla_dd2D_1_swap:
207 ; CHECK-NEXT: fmla d0, d1, v2.d[1]
209 %tmp1 = extractelement <2 x double> %v, i32 1
210 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
214 define double @test_fmla_ss2D_1_ext0(double %a, <2 x double> %v) {
215 ; CHECK-LABEL: test_fmla_ss2D_1_ext0:
217 ; CHECK-NEXT: fmla d0, d1, v1.d[1]
219 %tmp0 = extractelement <2 x double> %v, i32 0
220 %tmp1 = extractelement <2 x double> %v, i32 1
221 %tmp2 = call double @llvm.fma.f64(double %tmp0, double %tmp1, double %a)
225 define double @test_fmla_ss2D_1_ext0_swp(double %a, <2 x double> %v) {
226 ; CHECK-LABEL: test_fmla_ss2D_1_ext0_swp:
228 ; CHECK-NEXT: fmla d0, d1, v1.d[1]
230 %tmp0 = extractelement <2 x double> %v, i32 0
231 %tmp1 = extractelement <2 x double> %v, i32 1
232 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %tmp0, double %a)
236 define double @test_fmla_ss2D_0_ext0(double %a, <2 x double> %v, <2 x double> %w) {
237 ; CHECK-LABEL: test_fmla_ss2D_0_ext0:
239 ; CHECK-NEXT: fmadd d0, d1, d2, d0
241 %tmp0 = extractelement <2 x double> %v, i32 0
242 %tmp1 = extractelement <2 x double> %w, i32 0
243 %tmp2 = call double @llvm.fma.f64(double %tmp0, double %tmp1, double %a)
247 define float @test_fmls_ss4S_0(float %a, float %b, <4 x float> %v) {
248 ; CHECK-LABEL: test_fmls_ss4S_0:
249 ; CHECK: // %bb.0: // %entry
250 ; CHECK-NEXT: fmsub s0, s2, s1, s0
253 %fneg = fneg float %b
254 %extract = extractelement <4 x float> %v, i64 0
255 %0 = tail call float @llvm.fma.f32(float %fneg, float %extract, float %a)
259 define float @test_fmls_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
260 ; CHECK-LABEL: test_fmls_ss4S_0_swap:
261 ; CHECK: // %bb.0: // %entry
262 ; CHECK-NEXT: fmsub s0, s2, s1, s0
265 %fneg = fneg float %b
266 %extract = extractelement <4 x float> %v, i64 0
267 %0 = tail call float @llvm.fma.f32(float %extract, float %fneg, float %a)
271 define float @test_fmls_ss4S_3(float %a, float %b, <4 x float> %v) {
272 ; CHECK-LABEL: test_fmls_ss4S_3:
274 ; CHECK-NEXT: mov s1, v2.s[3]
275 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
277 %tmp1 = extractelement <4 x float> %v, i32 3
278 %tmp2 = fsub float -0.0, %tmp1
279 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
283 define float @test_fmls_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
284 ; CHECK-LABEL: test_fmls_ss4S_3_swap:
286 ; CHECK-NEXT: mov s1, v2.s[3]
287 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
289 %tmp1 = extractelement <4 x float> %v, i32 3
290 %tmp2 = fsub float -0.0, %tmp1
291 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
295 define float @test_fmls_ss2S_0(float %a, float %b, <2 x float> %v) {
296 ; CHECK-LABEL: test_fmls_ss2S_0:
297 ; CHECK: // %bb.0: // %entry
298 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
299 ; CHECK-NEXT: fmsub s0, s2, s1, s0
302 %fneg = fneg float %b
303 %extract = extractelement <2 x float> %v, i64 0
304 %0 = tail call float @llvm.fma.f32(float %fneg, float %extract, float %a)
308 define float @test_fmls_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
309 ; CHECK-LABEL: test_fmls_ss2S_0_swap:
310 ; CHECK: // %bb.0: // %entry
311 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
312 ; CHECK-NEXT: fmsub s0, s2, s1, s0
315 %fneg = fneg float %b
316 %extract = extractelement <2 x float> %v, i64 0
317 %0 = tail call float @llvm.fma.f32(float %extract, float %fneg, float %a)
321 define float @test_fmls_ss2S_1(float %a, float %b, <2 x float> %v) {
322 ; CHECK-LABEL: test_fmls_ss2S_1:
324 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
325 ; CHECK-NEXT: mov s1, v2.s[1]
326 ; CHECK-NEXT: fmls s0, s1, v2.s[1]
328 %tmp1 = extractelement <2 x float> %v, i32 1
329 %tmp2 = fsub float -0.0, %tmp1
330 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
334 define float @test_fmls_ss4S_3_ext0(float %a, <4 x float> %v) {
335 ; CHECK-LABEL: test_fmls_ss4S_3_ext0:
337 ; CHECK-NEXT: fmls s0, s1, v1.s[3]
339 %tmp0 = extractelement <4 x float> %v, i32 0
340 %tmp1 = extractelement <4 x float> %v, i32 3
341 %tmp2 = fsub float -0.0, %tmp1
342 %tmp3 = call float @llvm.fma.f32(float %tmp0, float %tmp2, float %a)
346 define float @test_fmls_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) {
347 ; CHECK-LABEL: test_fmls_ss4S_0_ext0:
349 ; CHECK-NEXT: fmsub s0, s1, s2, s0
351 %tmp0 = extractelement <4 x float> %v, i32 0
352 %tmp1 = extractelement <4 x float> %w, i32 0
353 %tmp2 = fsub float -0.0, %tmp1
354 %tmp3 = call float @llvm.fma.f32(float %tmp0, float %tmp2, float %a)
358 define double @test_fmls_ddD_0(double %a, double %b, <1 x double> %v) {
359 ; CHECK-LABEL: test_fmls_ddD_0:
360 ; CHECK: // %bb.0: // %entry
361 ; CHECK-NEXT: fmsub d0, d1, d2, d0
364 %fneg = fneg double %b
365 %extract = extractelement <1 x double> %v, i64 0
366 %0 = tail call double @llvm.fma.f64(double %fneg, double %extract, double %a)
370 define double @test_fmls_ddD_0_swap(double %a, double %b, <1 x double> %v) {
371 ; CHECK-LABEL: test_fmls_ddD_0_swap:
372 ; CHECK: // %bb.0: // %entry
373 ; CHECK-NEXT: fmsub d0, d2, d1, d0
376 %fneg = fneg double %b
377 %extract = extractelement <1 x double> %v, i64 0
378 %0 = tail call double @llvm.fma.f64(double %extract, double %fneg, double %a)
382 define double @test_fmls_dd2D_0(double %a, double %b, <2 x double> %v) {
383 ; CHECK-LABEL: test_fmls_dd2D_0:
384 ; CHECK: // %bb.0: // %entry
385 ; CHECK-NEXT: fmsub d0, d2, d1, d0
388 %fneg = fneg double %b
389 %extract = extractelement <2 x double> %v, i64 0
390 %0 = tail call double @llvm.fma.f64(double %fneg, double %extract, double %a)
394 define double @test_fmls_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
395 ; CHECK-LABEL: test_fmls_dd2D_0_swap:
396 ; CHECK: // %bb.0: // %entry
397 ; CHECK-NEXT: fmsub d0, d2, d1, d0
400 %fneg = fneg double %b
401 %extract = extractelement <2 x double> %v, i64 0
402 %0 = tail call double @llvm.fma.f64(double %extract, double %fneg, double %a)
406 define double @test_fmls_dd2D_1(double %a, double %b, <2 x double> %v) {
407 ; CHECK-LABEL: test_fmls_dd2D_1:
409 ; CHECK-NEXT: mov d1, v2.d[1]
410 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
412 %tmp1 = extractelement <2 x double> %v, i32 1
413 %tmp2 = fsub double -0.0, %tmp1
414 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
418 define double @test_fmls_dd2D_1_swap(double %a, double %b, <2 x double> %v) {
419 ; CHECK-LABEL: test_fmls_dd2D_1_swap:
421 ; CHECK-NEXT: mov d1, v2.d[1]
422 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
424 %tmp1 = extractelement <2 x double> %v, i32 1
425 %tmp2 = fsub double -0.0, %tmp1
426 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
430 define double @test_fmls_dd2D_1_ext0(double %a, <2 x double> %v) {
431 ; CHECK-LABEL: test_fmls_dd2D_1_ext0:
433 ; CHECK-NEXT: fmls d0, d1, v1.d[1]
435 %tmp0 = extractelement <2 x double> %v, i32 0
436 %tmp1 = extractelement <2 x double> %v, i32 1
437 %tmp2 = fsub double -0.0, %tmp1
438 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp0, double %a)
442 define double @test_fmls_dd2D_0_ext0(double %a, <2 x double> %v, <2 x double> %w) {
443 ; CHECK-LABEL: test_fmls_dd2D_0_ext0:
445 ; CHECK-NEXT: fmsub d0, d1, d2, d0
447 %tmp0 = extractelement <2 x double> %v, i32 0
448 %tmp1 = extractelement <2 x double> %w, i32 0
449 %tmp2 = fsub double -0.0, %tmp1
450 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp0, double %a)
454 define float @test_fmla_ss4S_0_strict(float %a, float %b, <4 x float> %v) #0 {
455 ; CHECK-LABEL: test_fmla_ss4S_0_strict:
457 ; CHECK-NEXT: fmadd s0, s1, s2, s0
459 %tmp1 = extractelement <4 x float> %v, i32 0
460 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
464 define float @test_fmla_ss4S_0_swap_strict(float %a, float %b, <4 x float> %v) #0 {
465 ; CHECK-LABEL: test_fmla_ss4S_0_swap_strict:
467 ; CHECK-NEXT: fmadd s0, s2, s1, s0
469 %tmp1 = extractelement <4 x float> %v, i32 0
470 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %b, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
474 define float @test_fmla_ss4S_3_strict(float %a, float %b, <4 x float> %v) #0 {
475 ; CHECK-LABEL: test_fmla_ss4S_3_strict:
477 ; CHECK-NEXT: fmla s0, s1, v2.s[3]
479 %tmp1 = extractelement <4 x float> %v, i32 3
480 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
484 define float @test_fmla_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #0 {
485 ; CHECK-LABEL: test_fmla_ss4S_3_swap_strict:
487 ; CHECK-NEXT: fmla s0, s0, v2.s[3]
489 %tmp1 = extractelement <4 x float> %v, i32 3
490 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
494 define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
495 ; CHECK-LABEL: test_fmla_ss2S_0_strict:
497 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
498 ; CHECK-NEXT: fmadd s0, s1, s2, s0
500 %tmp1 = extractelement <2 x float> %v, i32 0
501 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
505 define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
506 ; CHECK-LABEL: test_fmla_ss2S_0_swap_strict:
508 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
509 ; CHECK-NEXT: fmadd s0, s2, s1, s0
511 %tmp1 = extractelement <2 x float> %v, i32 0
512 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %b, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
516 define float @test_fmla_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
517 ; CHECK-LABEL: test_fmla_ss2S_1_strict:
519 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
520 ; CHECK-NEXT: fmla s0, s1, v2.s[1]
522 %tmp1 = extractelement <2 x float> %v, i32 1
523 %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
527 define double @test_fmla_ddD_0_strict(double %a, double %b, <1 x double> %v) #0 {
528 ; CHECK-LABEL: test_fmla_ddD_0_strict:
530 ; CHECK-NEXT: fmadd d0, d1, d2, d0
532 %tmp1 = extractelement <1 x double> %v, i32 0
533 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
537 define double @test_fmla_ddD_0_swap_strict(double %a, double %b, <1 x double> %v) #0 {
538 ; CHECK-LABEL: test_fmla_ddD_0_swap_strict:
540 ; CHECK-NEXT: fmadd d0, d2, d1, d0
542 %tmp1 = extractelement <1 x double> %v, i32 0
543 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
547 define double @test_fmla_dd2D_0_strict(double %a, double %b, <2 x double> %v) #0 {
548 ; CHECK-LABEL: test_fmla_dd2D_0_strict:
550 ; CHECK-NEXT: fmadd d0, d1, d2, d0
552 %tmp1 = extractelement <2 x double> %v, i32 0
553 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
557 define double @test_fmla_dd2D_0_swap_strict(double %a, double %b, <2 x double> %v) #0 {
558 ; CHECK-LABEL: test_fmla_dd2D_0_swap_strict:
560 ; CHECK-NEXT: fmadd d0, d2, d1, d0
562 %tmp1 = extractelement <2 x double> %v, i32 0
563 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
567 define double @test_fmla_dd2D_1_strict(double %a, double %b, <2 x double> %v) #0 {
568 ; CHECK-LABEL: test_fmla_dd2D_1_strict:
570 ; CHECK-NEXT: fmla d0, d1, v2.d[1]
572 %tmp1 = extractelement <2 x double> %v, i32 1
573 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
577 define double @test_fmla_dd2D_1_swap_strict(double %a, double %b, <2 x double> %v) #0 {
578 ; CHECK-LABEL: test_fmla_dd2D_1_swap_strict:
580 ; CHECK-NEXT: fmla d0, d1, v2.d[1]
582 %tmp1 = extractelement <2 x double> %v, i32 1
583 %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
587 define float @test_fmls_ss4S_0_strict(float %a, float %b, <4 x float> %v) #0 {
588 ; CHECK-LABEL: test_fmls_ss4S_0_strict:
589 ; CHECK: // %bb.0: // %entry
590 ; CHECK-NEXT: fmsub s0, s2, s1, s0
593 %fneg = fneg float %b
594 %extract = extractelement <4 x float> %v, i64 0
595 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %fneg, float %extract, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
599 define float @test_fmls_ss4S_0_swap_strict(float %a, float %b, <4 x float> %v) #0 {
600 ; CHECK-LABEL: test_fmls_ss4S_0_swap_strict:
601 ; CHECK: // %bb.0: // %entry
602 ; CHECK-NEXT: fmsub s0, s2, s1, s0
605 %fneg = fneg float %b
606 %extract = extractelement <4 x float> %v, i64 0
607 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %extract, float %fneg, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
611 define float @test_fmls_ss4S_3_strict(float %a, float %b, <4 x float> %v) #0 {
612 ; CHECK-LABEL: test_fmls_ss4S_3_strict:
614 ; CHECK-NEXT: mov s1, v2.s[3]
615 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
617 %tmp1 = extractelement <4 x float> %v, i32 3
618 %tmp2 = fneg float %tmp1
619 %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
623 define float @test_fmls_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #0 {
624 ; CHECK-LABEL: test_fmls_ss4S_3_swap_strict:
626 ; CHECK-NEXT: mov s1, v2.s[3]
627 ; CHECK-NEXT: fmls s0, s1, v2.s[3]
629 %tmp1 = extractelement <4 x float> %v, i32 3
630 %tmp2 = fneg float %tmp1
631 %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %tmp2, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
635 define float @test_fmls_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
636 ; CHECK-LABEL: test_fmls_ss2S_0_strict:
637 ; CHECK: // %bb.0: // %entry
638 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
639 ; CHECK-NEXT: fmsub s0, s2, s1, s0
642 %fneg = fneg float %b
643 %extract = extractelement <2 x float> %v, i64 0
644 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %fneg, float %extract, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
648 define float @test_fmls_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
649 ; CHECK-LABEL: test_fmls_ss2S_0_swap_strict:
650 ; CHECK: // %bb.0: // %entry
651 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
652 ; CHECK-NEXT: fmsub s0, s2, s1, s0
655 %fneg = fneg float %b
656 %extract = extractelement <2 x float> %v, i64 0
657 %0 = tail call float @llvm.experimental.constrained.fma.f32(float %extract, float %fneg, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
661 define float @test_fmls_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
662 ; CHECK-LABEL: test_fmls_ss2S_1_strict:
664 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
665 ; CHECK-NEXT: mov s1, v2.s[1]
666 ; CHECK-NEXT: fmls s0, s1, v2.s[1]
668 %tmp1 = extractelement <2 x float> %v, i32 1
669 %tmp2 = fneg float %tmp1
670 %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
674 define double @test_fmls_ddD_0_strict(double %a, double %b, <1 x double> %v) #0 {
675 ; CHECK-LABEL: test_fmls_ddD_0_strict:
676 ; CHECK: // %bb.0: // %entry
677 ; CHECK-NEXT: fmsub d0, d2, d1, d0
680 %fneg = fneg double %b
681 %extract = extractelement <1 x double> %v, i64 0
682 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %fneg, double %extract, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
686 define double @test_fmls_ddD_0_swap_strict(double %a, double %b, <1 x double> %v) #0 {
687 ; CHECK-LABEL: test_fmls_ddD_0_swap_strict:
688 ; CHECK: // %bb.0: // %entry
689 ; CHECK-NEXT: fmsub d0, d2, d1, d0
692 %fneg = fneg double %b
693 %extract = extractelement <1 x double> %v, i64 0
694 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %extract, double %fneg, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
698 define double @test_fmls_dd2D_0_strict(double %a, double %b, <2 x double> %v) #0 {
699 ; CHECK-LABEL: test_fmls_dd2D_0_strict:
700 ; CHECK: // %bb.0: // %entry
701 ; CHECK-NEXT: fmsub d0, d2, d1, d0
704 %fneg = fneg double %b
705 %extract = extractelement <2 x double> %v, i64 0
706 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %fneg, double %extract, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
710 define double @test_fmls_dd2D_0_swap_strict(double %a, double %b, <2 x double> %v) #0 {
711 ; CHECK-LABEL: test_fmls_dd2D_0_swap_strict:
712 ; CHECK: // %bb.0: // %entry
713 ; CHECK-NEXT: fmsub d0, d2, d1, d0
716 %fneg = fneg double %b
717 %extract = extractelement <2 x double> %v, i64 0
718 %0 = tail call double @llvm.experimental.constrained.fma.f64(double %extract, double %fneg, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
722 define double @test_fmls_dd2D_1_strict(double %a, double %b, <2 x double> %v) #0 {
723 ; CHECK-LABEL: test_fmls_dd2D_1_strict:
725 ; CHECK-NEXT: mov d1, v2.d[1]
726 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
728 %tmp1 = extractelement <2 x double> %v, i32 1
729 %tmp2 = fneg double %tmp1
730 %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
734 define double @test_fmls_dd2D_1_swap_strict(double %a, double %b, <2 x double> %v) #0 {
735 ; CHECK-LABEL: test_fmls_dd2D_1_swap_strict:
737 ; CHECK-NEXT: mov d1, v2.d[1]
738 ; CHECK-NEXT: fmls d0, d1, v2.d[1]
740 %tmp1 = extractelement <2 x double> %v, i32 1
741 %tmp2 = fneg double %tmp1
742 %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %tmp2, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")