1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 declare <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32, <8 x half>, <8 x half>, <8 x half>)
5 declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>)
6 declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>)
7 declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>)
10 define arm_aapcs_vfpcc <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
11 ; CHECK-LABEL: reassoc_f32x4:
12 ; CHECK: @ %bb.0: @ %entry
13 ; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0
16 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
17 %res = fadd fast <4 x float> %d, %a
21 define arm_aapcs_vfpcc <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
22 ; CHECK-LABEL: reassoc_c_f32x4:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vcmla.f32 q0, q1, q2, #90
27 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
28 %res = fadd fast <4 x float> %a, %d
32 define arm_aapcs_vfpcc <8 x half> @reassoc_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
33 ; CHECK-LABEL: reassoc_f16x4:
34 ; CHECK: @ %bb.0: @ %entry
35 ; CHECK-NEXT: vcmla.f16 q0, q1, q2, #180
38 %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c)
39 %res = fadd fast <8 x half> %d, %a
43 define arm_aapcs_vfpcc <8 x half> @reassoc_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
44 ; CHECK-LABEL: reassoc_c_f16x4:
45 ; CHECK: @ %bb.0: @ %entry
46 ; CHECK-NEXT: vcmla.f16 q0, q1, q2, #270
49 %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c)
50 %res = fadd fast <8 x half> %a, %d
54 define arm_aapcs_vfpcc <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
55 ; CHECK-LABEL: reassoc_nonfast_f32x4:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vmov.i32 q3, #0x0
58 ; CHECK-NEXT: vcmla.f32 q3, q1, q2, #0
59 ; CHECK-NEXT: vadd.f32 q0, q3, q0
62 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c)
63 %res = fadd <4 x float> %d, %a
69 define arm_aapcs_vfpcc <4 x float> @muladd_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
70 ; CHECK-LABEL: muladd_f32x4:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0
75 %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
76 %res = fadd fast <4 x float> %d, %a
80 define arm_aapcs_vfpcc <4 x float> @muladd_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
81 ; CHECK-LABEL: muladd_c_f32x4:
82 ; CHECK: @ %bb.0: @ %entry
83 ; CHECK-NEXT: vcmla.f32 q0, q1, q2, #90
86 %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %b, <4 x float> %c)
87 %res = fadd fast <4 x float> %a, %d
91 define arm_aapcs_vfpcc <8 x half> @muladd_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
92 ; CHECK-LABEL: muladd_f16x4:
93 ; CHECK: @ %bb.0: @ %entry
94 ; CHECK-NEXT: vcmla.f16 q0, q1, q2, #180
97 %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %b, <8 x half> %c)
98 %res = fadd fast <8 x half> %d, %a
102 define arm_aapcs_vfpcc <8 x half> @muladd_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
103 ; CHECK-LABEL: muladd_c_f16x4:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: vcmla.f16 q0, q1, q2, #270
108 %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %b, <8 x half> %c)
109 %res = fadd fast <8 x half> %a, %d
113 define arm_aapcs_vfpcc <4 x float> @muladd_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
114 ; CHECK-LABEL: muladd_nonfast_f32x4:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vcmul.f32 q3, q1, q2, #0
117 ; CHECK-NEXT: vadd.f32 q0, q3, q0
120 %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
121 %res = fadd <4 x float> %d, %a