1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,-vfp2 -o - %s | FileCheck %s --check-prefix=CHECK-NOFP
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck --check-prefix=CHECK-FP %s
5 ; This file tests tests that we expand floating point operations correctly,
6 ; even if we do not have an fpu.
8 define arm_aapcs_vfpcc <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
9 ; CHECK-NOFP-LABEL: vector_add_f16:
10 ; CHECK-NOFP: @ %bb.0: @ %entry
11 ; CHECK-NOFP-NEXT: .save {r4, lr}
12 ; CHECK-NOFP-NEXT: push {r4, lr}
13 ; CHECK-NOFP-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
14 ; CHECK-NOFP-NEXT: vpush {d8, d9, d10, d11, d12, d13}
15 ; CHECK-NOFP-NEXT: vmov.u16 r0, q1[0]
16 ; CHECK-NOFP-NEXT: vmov q5, q1
17 ; CHECK-NOFP-NEXT: vmov q4, q0
18 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
19 ; CHECK-NOFP-NEXT: mov r4, r0
20 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[0]
21 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
22 ; CHECK-NOFP-NEXT: mov r1, r4
23 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
24 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
25 ; CHECK-NOFP-NEXT: vmov.16 q6[0], r0
26 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[1]
27 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
28 ; CHECK-NOFP-NEXT: mov r4, r0
29 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[1]
30 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
31 ; CHECK-NOFP-NEXT: mov r1, r4
32 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
33 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
34 ; CHECK-NOFP-NEXT: vmov.16 q6[1], r0
35 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[2]
36 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
37 ; CHECK-NOFP-NEXT: mov r4, r0
38 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[2]
39 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
40 ; CHECK-NOFP-NEXT: mov r1, r4
41 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
42 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
43 ; CHECK-NOFP-NEXT: vmov.16 q6[2], r0
44 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[3]
45 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
46 ; CHECK-NOFP-NEXT: mov r4, r0
47 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[3]
48 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
49 ; CHECK-NOFP-NEXT: mov r1, r4
50 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
51 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
52 ; CHECK-NOFP-NEXT: vmov.16 q6[3], r0
53 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[4]
54 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
55 ; CHECK-NOFP-NEXT: mov r4, r0
56 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[4]
57 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
58 ; CHECK-NOFP-NEXT: mov r1, r4
59 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
60 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
61 ; CHECK-NOFP-NEXT: vmov.16 q6[4], r0
62 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[5]
63 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
64 ; CHECK-NOFP-NEXT: mov r4, r0
65 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[5]
66 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
67 ; CHECK-NOFP-NEXT: mov r1, r4
68 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
69 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
70 ; CHECK-NOFP-NEXT: vmov.16 q6[5], r0
71 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[6]
72 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
73 ; CHECK-NOFP-NEXT: mov r4, r0
74 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[6]
75 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
76 ; CHECK-NOFP-NEXT: mov r1, r4
77 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
78 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
79 ; CHECK-NOFP-NEXT: vmov.16 q6[6], r0
80 ; CHECK-NOFP-NEXT: vmov.u16 r0, q5[7]
81 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
82 ; CHECK-NOFP-NEXT: mov r4, r0
83 ; CHECK-NOFP-NEXT: vmov.u16 r0, q4[7]
84 ; CHECK-NOFP-NEXT: bl __aeabi_h2f
85 ; CHECK-NOFP-NEXT: mov r1, r4
86 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
87 ; CHECK-NOFP-NEXT: bl __aeabi_f2h
88 ; CHECK-NOFP-NEXT: vmov.16 q6[7], r0
89 ; CHECK-NOFP-NEXT: vmov q0, q6
90 ; CHECK-NOFP-NEXT: vpop {d8, d9, d10, d11, d12, d13}
91 ; CHECK-NOFP-NEXT: pop {r4, pc}
93 ; CHECK-FP-LABEL: vector_add_f16:
94 ; CHECK-FP: @ %bb.0: @ %entry
95 ; CHECK-FP-NEXT: vadd.f16 q0, q0, q1
96 ; CHECK-FP-NEXT: bx lr
98 %sum = fadd <8 x half> %lhs, %rhs
102 define arm_aapcs_vfpcc <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
103 ; CHECK-NOFP-LABEL: vector_add_f32:
104 ; CHECK-NOFP: @ %bb.0: @ %entry
105 ; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr}
106 ; CHECK-NOFP-NEXT: push {r4, r5, r7, lr}
107 ; CHECK-NOFP-NEXT: .vsave {d8, d9, d10, d11}
108 ; CHECK-NOFP-NEXT: vpush {d8, d9, d10, d11}
109 ; CHECK-NOFP-NEXT: vmov q4, q1
110 ; CHECK-NOFP-NEXT: vmov q5, q0
111 ; CHECK-NOFP-NEXT: vmov r4, r0, d11
112 ; CHECK-NOFP-NEXT: vmov r5, r1, d9
113 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
114 ; CHECK-NOFP-NEXT: vmov s19, r0
115 ; CHECK-NOFP-NEXT: mov r0, r4
116 ; CHECK-NOFP-NEXT: mov r1, r5
117 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
118 ; CHECK-NOFP-NEXT: vmov s18, r0
119 ; CHECK-NOFP-NEXT: vmov r4, r0, d10
120 ; CHECK-NOFP-NEXT: vmov r5, r1, d8
121 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
122 ; CHECK-NOFP-NEXT: vmov s17, r0
123 ; CHECK-NOFP-NEXT: mov r0, r4
124 ; CHECK-NOFP-NEXT: mov r1, r5
125 ; CHECK-NOFP-NEXT: bl __aeabi_fadd
126 ; CHECK-NOFP-NEXT: vmov s16, r0
127 ; CHECK-NOFP-NEXT: vmov q0, q4
128 ; CHECK-NOFP-NEXT: vpop {d8, d9, d10, d11}
129 ; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc}
131 ; CHECK-FP-LABEL: vector_add_f32:
132 ; CHECK-FP: @ %bb.0: @ %entry
133 ; CHECK-FP-NEXT: vadd.f32 q0, q0, q1
134 ; CHECK-FP-NEXT: bx lr
136 %sum = fadd <4 x float> %lhs, %rhs