1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECKHARD
3 ; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=soft < %s | FileCheck %s --check-prefix=CHECKSOFT
5 define float @test_vget_lane_f16_1(<4 x half> %a) nounwind {
6 ; CHECKHARD-LABEL: test_vget_lane_f16_1:
7 ; CHECKHARD: @ %bb.0: @ %entry
8 ; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s0
9 ; CHECKHARD-NEXT: bx lr
11 ; CHECKSOFT-LABEL: test_vget_lane_f16_1:
12 ; CHECKSOFT: @ %bb.0: @ %entry
13 ; CHECKSOFT-NEXT: vmov d0, r0, r1
14 ; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s0
15 ; CHECKSOFT-NEXT: vmov r0, s0
16 ; CHECKSOFT-NEXT: bx lr
18 %elt = extractelement <4 x half> %a, i32 1
19 %conv = fpext half %elt to float
23 define float @test_vget_lane_f16_2(<4 x half> %a) nounwind {
24 ; CHECKHARD-LABEL: test_vget_lane_f16_2:
25 ; CHECKHARD: @ %bb.0: @ %entry
26 ; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s1
27 ; CHECKHARD-NEXT: bx lr
29 ; CHECKSOFT-LABEL: test_vget_lane_f16_2:
30 ; CHECKSOFT: @ %bb.0: @ %entry
31 ; CHECKSOFT-NEXT: vmov d0, r0, r1
32 ; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s1
33 ; CHECKSOFT-NEXT: vmov r0, s0
34 ; CHECKSOFT-NEXT: bx lr
36 %elt = extractelement <4 x half> %a, i32 2
37 %conv = fpext half %elt to float
41 define float @test_vget_laneq_f16_6(<8 x half> %a) nounwind {
42 ; CHECKHARD-LABEL: test_vget_laneq_f16_6:
43 ; CHECKHARD: @ %bb.0: @ %entry
44 ; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s3
45 ; CHECKHARD-NEXT: bx lr
47 ; CHECKSOFT-LABEL: test_vget_laneq_f16_6:
48 ; CHECKSOFT: @ %bb.0: @ %entry
49 ; CHECKSOFT-NEXT: vmov d1, r2, r3
50 ; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s3
51 ; CHECKSOFT-NEXT: vmov r0, s0
52 ; CHECKSOFT-NEXT: bx lr
54 %elt = extractelement <8 x half> %a, i32 6
55 %conv = fpext half %elt to float
59 define float @test_vget_laneq_f16_7(<8 x half> %a) nounwind {
60 ; CHECKHARD-LABEL: test_vget_laneq_f16_7:
61 ; CHECKHARD: @ %bb.0: @ %entry
62 ; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s3
63 ; CHECKHARD-NEXT: bx lr
65 ; CHECKSOFT-LABEL: test_vget_laneq_f16_7:
66 ; CHECKSOFT: @ %bb.0: @ %entry
67 ; CHECKSOFT-NEXT: vmov d1, r2, r3
68 ; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s3
69 ; CHECKSOFT-NEXT: vmov r0, s0
70 ; CHECKSOFT-NEXT: bx lr
72 %elt = extractelement <8 x half> %a, i32 7
73 %conv = fpext half %elt to float
77 define <4 x half> @insert_v4f16(half %a) {
78 ; CHECKHARD-LABEL: insert_v4f16:
79 ; CHECKHARD: @ %bb.0: @ %entry
80 ; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $d0
81 ; CHECKHARD-NEXT: bx lr
83 ; CHECKSOFT-LABEL: insert_v4f16:
84 ; CHECKSOFT: @ %bb.0: @ %entry
85 ; CHECKSOFT-NEXT: vmov.f16 s0, r0
86 ; CHECKSOFT-NEXT: vmov r0, r1, d0
87 ; CHECKSOFT-NEXT: bx lr
89 %res = insertelement <4 x half> undef, half %a, i32 0
93 define <8 x half> @insert_v8f16(half %a) {
94 ; CHECKHARD-LABEL: insert_v8f16:
95 ; CHECKHARD: @ %bb.0: @ %entry
96 ; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $q0
97 ; CHECKHARD-NEXT: bx lr
99 ; CHECKSOFT-LABEL: insert_v8f16:
100 ; CHECKSOFT: @ %bb.0: @ %entry
101 ; CHECKSOFT-NEXT: vmov.f16 s0, r0
102 ; CHECKSOFT-NEXT: vmov r2, r3, d1
103 ; CHECKSOFT-NEXT: vmov r0, r1, d0
104 ; CHECKSOFT-NEXT: bx lr
106 %res = insertelement <8 x half> undef, half %a, i32 0
110 define <4 x half> @test_vset_lane_f16(<4 x half> %a, float %fb) nounwind {
111 ; CHECKHARD-LABEL: test_vset_lane_f16:
112 ; CHECKHARD: @ %bb.0: @ %entry
113 ; CHECKHARD-NEXT: vcvtt.f16.f32 s1, s2
114 ; CHECKHARD-NEXT: bx lr
116 ; CHECKSOFT-LABEL: test_vset_lane_f16:
117 ; CHECKSOFT: @ %bb.0: @ %entry
118 ; CHECKSOFT-NEXT: vmov d0, r0, r1
119 ; CHECKSOFT-NEXT: vmov s2, r2
120 ; CHECKSOFT-NEXT: vcvtt.f16.f32 s1, s2
121 ; CHECKSOFT-NEXT: vmov r0, r1, d0
122 ; CHECKSOFT-NEXT: bx lr
124 %b = fptrunc float %fb to half
125 %x = insertelement <4 x half> %a, half %b, i32 3
129 define <8 x half> @test_vset_laneq_f16_1(<8 x half> %a, float %fb) nounwind {
130 ; CHECKHARD-LABEL: test_vset_laneq_f16_1:
131 ; CHECKHARD: @ %bb.0: @ %entry
132 ; CHECKHARD-NEXT: vcvtt.f16.f32 s0, s4
133 ; CHECKHARD-NEXT: bx lr
135 ; CHECKSOFT-LABEL: test_vset_laneq_f16_1:
136 ; CHECKSOFT: @ %bb.0: @ %entry
137 ; CHECKSOFT-NEXT: vmov d1, r2, r3
138 ; CHECKSOFT-NEXT: vldr s4, [sp]
139 ; CHECKSOFT-NEXT: vmov d0, r0, r1
140 ; CHECKSOFT-NEXT: vcvtt.f16.f32 s0, s4
141 ; CHECKSOFT-NEXT: vmov r2, r3, d1
142 ; CHECKSOFT-NEXT: vmov r0, r1, d0
143 ; CHECKSOFT-NEXT: bx lr
145 %b = fptrunc float %fb to half
146 %x = insertelement <8 x half> %a, half %b, i32 1
150 define <8 x half> @test_vset_laneq_f16_7(<8 x half> %a, float %fb) nounwind {
151 ; CHECKHARD-LABEL: test_vset_laneq_f16_7:
152 ; CHECKHARD: @ %bb.0: @ %entry
153 ; CHECKHARD-NEXT: vcvtt.f16.f32 s3, s4
154 ; CHECKHARD-NEXT: bx lr
156 ; CHECKSOFT-LABEL: test_vset_laneq_f16_7:
157 ; CHECKSOFT: @ %bb.0: @ %entry
158 ; CHECKSOFT-NEXT: vmov d1, r2, r3
159 ; CHECKSOFT-NEXT: vldr s4, [sp]
160 ; CHECKSOFT-NEXT: vmov d0, r0, r1
161 ; CHECKSOFT-NEXT: vcvtt.f16.f32 s3, s4
162 ; CHECKSOFT-NEXT: vmov r0, r1, d0
163 ; CHECKSOFT-NEXT: vmov r2, r3, d1
164 ; CHECKSOFT-NEXT: bx lr
166 %b = fptrunc float %fb to half
167 %x = insertelement <8 x half> %a, half %b, i32 7
171 define arm_aapcs_vfpcc <8 x half> @shuffle3step_f16(<32 x half> %src) {
172 ; CHECKHARD-LABEL: shuffle3step_f16:
173 ; CHECKHARD: @ %bb.0: @ %entry
174 ; CHECKHARD-NEXT: vmov r1, s0
175 ; CHECKHARD-NEXT: vmovx.f16 s12, s1
176 ; CHECKHARD-NEXT: vmov r0, s12
177 ; CHECKHARD-NEXT: vext.16 d16, d4, d5, #2
178 ; CHECKHARD-NEXT: vmovx.f16 s12, s4
179 ; CHECKHARD-NEXT: vdup.16 q11, d3[1]
180 ; CHECKHARD-NEXT: vrev32.16 d17, d16
181 ; CHECKHARD-NEXT: vext.16 d16, d16, d17, #3
182 ; CHECKHARD-NEXT: vrev32.16 d17, d3
183 ; CHECKHARD-NEXT: vext.16 d17, d17, d3, #1
184 ; CHECKHARD-NEXT: vext.16 d16, d16, d17, #2
185 ; CHECKHARD-NEXT: vext.16 d17, d16, d16, #2
186 ; CHECKHARD-NEXT: vmov.16 d16[0], r1
187 ; CHECKHARD-NEXT: vmov.16 d16[1], r0
188 ; CHECKHARD-NEXT: vmov r0, s3
189 ; CHECKHARD-NEXT: vmov.16 d16[2], r0
190 ; CHECKHARD-NEXT: vmov r0, s12
191 ; CHECKHARD-NEXT: vmovx.f16 s12, s0
192 ; CHECKHARD-NEXT: vmov r1, s12
193 ; CHECKHARD-NEXT: vmovx.f16 s12, s3
194 ; CHECKHARD-NEXT: vmov.16 d16[3], r0
195 ; CHECKHARD-NEXT: vmov r0, s2
196 ; CHECKHARD-NEXT: vmov.16 d18[0], r1
197 ; CHECKHARD-NEXT: vmov r1, s8
198 ; CHECKHARD-NEXT: vmov.16 d18[1], r0
199 ; CHECKHARD-NEXT: vmov r0, s12
200 ; CHECKHARD-NEXT: vmovx.f16 s12, s9
201 ; CHECKHARD-NEXT: vmov.16 d20[1], r1
202 ; CHECKHARD-NEXT: vmov.16 d18[2], r0
203 ; CHECKHARD-NEXT: vmov r0, s5
204 ; CHECKHARD-NEXT: vmov.16 d18[3], r0
205 ; CHECKHARD-NEXT: vmov r0, s12
206 ; CHECKHARD-NEXT: vmov.16 d20[2], r0
207 ; CHECKHARD-NEXT: vmov r0, s11
208 ; CHECKHARD-NEXT: vmov.16 d20[3], r0
209 ; CHECKHARD-NEXT: vmov r0, s10
210 ; CHECKHARD-NEXT: vext.16 d20, d20, d22, #1
211 ; CHECKHARD-NEXT: vdup.16 q11, d3[2]
212 ; CHECKHARD-NEXT: vext.16 d19, d20, d20, #3
213 ; CHECKHARD-NEXT: vadd.f16 q8, q8, q9
214 ; CHECKHARD-NEXT: vext.16 d18, d0, d1, #2
215 ; CHECKHARD-NEXT: vmovx.f16 s0, s8
216 ; CHECKHARD-NEXT: vmov r1, s0
217 ; CHECKHARD-NEXT: vmovx.f16 s0, s11
218 ; CHECKHARD-NEXT: vext.16 d19, d18, d2, #3
219 ; CHECKHARD-NEXT: vext.16 d18, d2, d18, #1
220 ; CHECKHARD-NEXT: vext.16 d18, d18, d19, #2
221 ; CHECKHARD-NEXT: vext.16 d18, d18, d18, #1
222 ; CHECKHARD-NEXT: vmov.16 d20[1], r1
223 ; CHECKHARD-NEXT: vmov.16 d20[2], r0
224 ; CHECKHARD-NEXT: vmov r0, s0
225 ; CHECKHARD-NEXT: vmov.16 d20[3], r0
226 ; CHECKHARD-NEXT: vext.16 d20, d20, d22, #1
227 ; CHECKHARD-NEXT: vext.16 d19, d20, d20, #3
228 ; CHECKHARD-NEXT: vadd.f16 q0, q8, q9
229 ; CHECKHARD-NEXT: bx lr
231 ; CHECKSOFT-LABEL: shuffle3step_f16:
232 ; CHECKSOFT: @ %bb.0: @ %entry
233 ; CHECKSOFT-NEXT: vmov r1, s0
234 ; CHECKSOFT-NEXT: vmovx.f16 s12, s1
235 ; CHECKSOFT-NEXT: vmov r0, s12
236 ; CHECKSOFT-NEXT: vext.16 d16, d4, d5, #2
237 ; CHECKSOFT-NEXT: vmovx.f16 s12, s4
238 ; CHECKSOFT-NEXT: vdup.16 q11, d3[1]
239 ; CHECKSOFT-NEXT: vrev32.16 d17, d16
240 ; CHECKSOFT-NEXT: vext.16 d16, d16, d17, #3
241 ; CHECKSOFT-NEXT: vrev32.16 d17, d3
242 ; CHECKSOFT-NEXT: vext.16 d17, d17, d3, #1
243 ; CHECKSOFT-NEXT: vext.16 d16, d16, d17, #2
244 ; CHECKSOFT-NEXT: vext.16 d17, d16, d16, #2
245 ; CHECKSOFT-NEXT: vmov.16 d16[0], r1
246 ; CHECKSOFT-NEXT: vmov.16 d16[1], r0
247 ; CHECKSOFT-NEXT: vmov r0, s3
248 ; CHECKSOFT-NEXT: vmov.16 d16[2], r0
249 ; CHECKSOFT-NEXT: vmov r0, s12
250 ; CHECKSOFT-NEXT: vmovx.f16 s12, s0
251 ; CHECKSOFT-NEXT: vmov r1, s12
252 ; CHECKSOFT-NEXT: vmovx.f16 s12, s3
253 ; CHECKSOFT-NEXT: vmov.16 d16[3], r0
254 ; CHECKSOFT-NEXT: vmov r0, s2
255 ; CHECKSOFT-NEXT: vmov.16 d18[0], r1
256 ; CHECKSOFT-NEXT: vmov r1, s8
257 ; CHECKSOFT-NEXT: vmov.16 d18[1], r0
258 ; CHECKSOFT-NEXT: vmov r0, s12
259 ; CHECKSOFT-NEXT: vmovx.f16 s12, s9
260 ; CHECKSOFT-NEXT: vmov.16 d20[1], r1
261 ; CHECKSOFT-NEXT: vmov.16 d18[2], r0
262 ; CHECKSOFT-NEXT: vmov r0, s5
263 ; CHECKSOFT-NEXT: vmov.16 d18[3], r0
264 ; CHECKSOFT-NEXT: vmov r0, s12
265 ; CHECKSOFT-NEXT: vmov.16 d20[2], r0
266 ; CHECKSOFT-NEXT: vmov r0, s11
267 ; CHECKSOFT-NEXT: vmov.16 d20[3], r0
268 ; CHECKSOFT-NEXT: vmov r0, s10
269 ; CHECKSOFT-NEXT: vext.16 d20, d20, d22, #1
270 ; CHECKSOFT-NEXT: vdup.16 q11, d3[2]
271 ; CHECKSOFT-NEXT: vext.16 d19, d20, d20, #3
272 ; CHECKSOFT-NEXT: vadd.f16 q8, q8, q9
273 ; CHECKSOFT-NEXT: vext.16 d18, d0, d1, #2
274 ; CHECKSOFT-NEXT: vmovx.f16 s0, s8
275 ; CHECKSOFT-NEXT: vmov r1, s0
276 ; CHECKSOFT-NEXT: vmovx.f16 s0, s11
277 ; CHECKSOFT-NEXT: vext.16 d19, d18, d2, #3
278 ; CHECKSOFT-NEXT: vext.16 d18, d2, d18, #1
279 ; CHECKSOFT-NEXT: vext.16 d18, d18, d19, #2
280 ; CHECKSOFT-NEXT: vext.16 d18, d18, d18, #1
281 ; CHECKSOFT-NEXT: vmov.16 d20[1], r1
282 ; CHECKSOFT-NEXT: vmov.16 d20[2], r0
283 ; CHECKSOFT-NEXT: vmov r0, s0
284 ; CHECKSOFT-NEXT: vmov.16 d20[3], r0
285 ; CHECKSOFT-NEXT: vext.16 d20, d20, d22, #1
286 ; CHECKSOFT-NEXT: vext.16 d19, d20, d20, #3
287 ; CHECKSOFT-NEXT: vadd.f16 q0, q8, q9
288 ; CHECKSOFT-NEXT: bx lr
290 %s1 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
291 %s2 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
292 %s3 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
293 %a = fadd <8 x half> %s1, %s2
294 %r = fadd <8 x half> %a, %s3