1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECKHARD
3 ; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=soft < %s | FileCheck %s --check-prefix=CHECKSOFT
5 define float @test_vget_lane_f16_1(<4 x half> %a) nounwind {
6 ; CHECKHARD-LABEL: test_vget_lane_f16_1:
7 ; CHECKHARD: @ %bb.0: @ %entry
8 ; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s0
9 ; CHECKHARD-NEXT: bx lr
11 ; CHECKSOFT-LABEL: test_vget_lane_f16_1:
12 ; CHECKSOFT: @ %bb.0: @ %entry
13 ; CHECKSOFT-NEXT: vmov d0, r0, r1
14 ; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s0
15 ; CHECKSOFT-NEXT: vmov r0, s0
16 ; CHECKSOFT-NEXT: bx lr
18 %elt = extractelement <4 x half> %a, i32 1
19 %conv = fpext half %elt to float
23 define float @test_vget_lane_f16_2(<4 x half> %a) nounwind {
24 ; CHECKHARD-LABEL: test_vget_lane_f16_2:
25 ; CHECKHARD: @ %bb.0: @ %entry
26 ; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s1
27 ; CHECKHARD-NEXT: bx lr
29 ; CHECKSOFT-LABEL: test_vget_lane_f16_2:
30 ; CHECKSOFT: @ %bb.0: @ %entry
31 ; CHECKSOFT-NEXT: vmov d0, r0, r1
32 ; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s1
33 ; CHECKSOFT-NEXT: vmov r0, s0
34 ; CHECKSOFT-NEXT: bx lr
36 %elt = extractelement <4 x half> %a, i32 2
37 %conv = fpext half %elt to float
41 define float @test_vget_laneq_f16_6(<8 x half> %a) nounwind {
42 ; CHECKHARD-LABEL: test_vget_laneq_f16_6:
43 ; CHECKHARD: @ %bb.0: @ %entry
44 ; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s3
45 ; CHECKHARD-NEXT: bx lr
47 ; CHECKSOFT-LABEL: test_vget_laneq_f16_6:
48 ; CHECKSOFT: @ %bb.0: @ %entry
49 ; CHECKSOFT-NEXT: vmov d1, r2, r3
50 ; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s3
51 ; CHECKSOFT-NEXT: vmov r0, s0
52 ; CHECKSOFT-NEXT: bx lr
54 %elt = extractelement <8 x half> %a, i32 6
55 %conv = fpext half %elt to float
59 define float @test_vget_laneq_f16_7(<8 x half> %a) nounwind {
60 ; CHECKHARD-LABEL: test_vget_laneq_f16_7:
61 ; CHECKHARD: @ %bb.0: @ %entry
62 ; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s3
63 ; CHECKHARD-NEXT: bx lr
65 ; CHECKSOFT-LABEL: test_vget_laneq_f16_7:
66 ; CHECKSOFT: @ %bb.0: @ %entry
67 ; CHECKSOFT-NEXT: vmov d1, r2, r3
68 ; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s3
69 ; CHECKSOFT-NEXT: vmov r0, s0
70 ; CHECKSOFT-NEXT: bx lr
72 %elt = extractelement <8 x half> %a, i32 7
73 %conv = fpext half %elt to float
77 define <4 x half> @insert_v4f16(half %a) {
78 ; CHECKHARD-LABEL: insert_v4f16:
79 ; CHECKHARD: @ %bb.0: @ %entry
80 ; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $d0
81 ; CHECKHARD-NEXT: bx lr
83 ; CHECKSOFT-LABEL: insert_v4f16:
84 ; CHECKSOFT: @ %bb.0: @ %entry
85 ; CHECKSOFT-NEXT: vmov.f16 s0, r0
86 ; CHECKSOFT-NEXT: vmov r0, r1, d0
87 ; CHECKSOFT-NEXT: bx lr
89 %res = insertelement <4 x half> undef, half %a, i32 0
93 define <8 x half> @insert_v8f16(half %a) {
94 ; CHECKHARD-LABEL: insert_v8f16:
95 ; CHECKHARD: @ %bb.0: @ %entry
96 ; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $q0
97 ; CHECKHARD-NEXT: bx lr
99 ; CHECKSOFT-LABEL: insert_v8f16:
100 ; CHECKSOFT: @ %bb.0: @ %entry
101 ; CHECKSOFT-NEXT: vmov.f16 s0, r0
102 ; CHECKSOFT-NEXT: vmov r2, r3, d1
103 ; CHECKSOFT-NEXT: vmov r0, r1, d0
104 ; CHECKSOFT-NEXT: bx lr
106 %res = insertelement <8 x half> undef, half %a, i32 0
110 define <4 x half> @test_vset_lane_f16(<4 x half> %a, float %fb) nounwind {
111 ; CHECKHARD-LABEL: test_vset_lane_f16:
112 ; CHECKHARD: @ %bb.0: @ %entry
113 ; CHECKHARD-NEXT: vcvtt.f16.f32 s1, s2
114 ; CHECKHARD-NEXT: bx lr
116 ; CHECKSOFT-LABEL: test_vset_lane_f16:
117 ; CHECKSOFT: @ %bb.0: @ %entry
118 ; CHECKSOFT-NEXT: vmov d0, r0, r1
119 ; CHECKSOFT-NEXT: vmov s2, r2
120 ; CHECKSOFT-NEXT: vcvtt.f16.f32 s1, s2
121 ; CHECKSOFT-NEXT: vmov r0, r1, d0
122 ; CHECKSOFT-NEXT: bx lr
124 %b = fptrunc float %fb to half
125 %x = insertelement <4 x half> %a, half %b, i32 3
129 define <8 x half> @test_vset_laneq_f16_1(<8 x half> %a, float %fb) nounwind {
130 ; CHECKHARD-LABEL: test_vset_laneq_f16_1:
131 ; CHECKHARD: @ %bb.0: @ %entry
132 ; CHECKHARD-NEXT: vcvtt.f16.f32 s0, s4
133 ; CHECKHARD-NEXT: bx lr
135 ; CHECKSOFT-LABEL: test_vset_laneq_f16_1:
136 ; CHECKSOFT: @ %bb.0: @ %entry
137 ; CHECKSOFT-NEXT: vmov d1, r2, r3
138 ; CHECKSOFT-NEXT: vldr s4, [sp]
139 ; CHECKSOFT-NEXT: vmov d0, r0, r1
140 ; CHECKSOFT-NEXT: vcvtt.f16.f32 s0, s4
141 ; CHECKSOFT-NEXT: vmov r2, r3, d1
142 ; CHECKSOFT-NEXT: vmov r0, r1, d0
143 ; CHECKSOFT-NEXT: bx lr
145 %b = fptrunc float %fb to half
146 %x = insertelement <8 x half> %a, half %b, i32 1
150 define <8 x half> @test_vset_laneq_f16_7(<8 x half> %a, float %fb) nounwind {
151 ; CHECKHARD-LABEL: test_vset_laneq_f16_7:
152 ; CHECKHARD: @ %bb.0: @ %entry
153 ; CHECKHARD-NEXT: vcvtt.f16.f32 s3, s4
154 ; CHECKHARD-NEXT: bx lr
156 ; CHECKSOFT-LABEL: test_vset_laneq_f16_7:
157 ; CHECKSOFT: @ %bb.0: @ %entry
158 ; CHECKSOFT-NEXT: vmov d1, r2, r3
159 ; CHECKSOFT-NEXT: vldr s4, [sp]
160 ; CHECKSOFT-NEXT: vmov d0, r0, r1
161 ; CHECKSOFT-NEXT: vcvtt.f16.f32 s3, s4
162 ; CHECKSOFT-NEXT: vmov r0, r1, d0
163 ; CHECKSOFT-NEXT: vmov r2, r3, d1
164 ; CHECKSOFT-NEXT: bx lr
166 %b = fptrunc float %fb to half
167 %x = insertelement <8 x half> %a, half %b, i32 7
171 define arm_aapcs_vfpcc <8 x half> @shuffle3step_f16(<32 x half> %src) {
172 ; CHECKHARD-LABEL: shuffle3step_f16:
173 ; CHECKHARD: @ %bb.0: @ %entry
174 ; CHECKHARD-NEXT: vmov r1, s0
175 ; CHECKHARD-NEXT: vmovx.f16 s12, s1
176 ; CHECKHARD-NEXT: vmov r0, s12
177 ; CHECKHARD-NEXT: vrev32.16 d16, d3
178 ; CHECKHARD-NEXT: vext.16 d17, d4, d5, #2
179 ; CHECKHARD-NEXT: vmovx.f16 s12, s4
180 ; CHECKHARD-NEXT: vext.16 d16, d16, d3, #1
181 ; CHECKHARD-NEXT: vext.16 d16, d17, d16, #2
182 ; CHECKHARD-NEXT: vext.16 d16, d16, d17, #1
183 ; CHECKHARD-NEXT: vext.16 d17, d16, d16, #1
184 ; CHECKHARD-NEXT: vmov.16 d16[0], r1
185 ; CHECKHARD-NEXT: vmov.16 d16[1], r0
186 ; CHECKHARD-NEXT: vmov r0, s3
187 ; CHECKHARD-NEXT: vmov.16 d16[2], r0
188 ; CHECKHARD-NEXT: vmov r0, s12
189 ; CHECKHARD-NEXT: vmovx.f16 s12, s0
190 ; CHECKHARD-NEXT: vmov r1, s12
191 ; CHECKHARD-NEXT: vmovx.f16 s12, s3
192 ; CHECKHARD-NEXT: vmov.16 d16[3], r0
193 ; CHECKHARD-NEXT: vmov r0, s2
194 ; CHECKHARD-NEXT: vmov.16 d18[0], r1
195 ; CHECKHARD-NEXT: vmov.16 d18[1], r0
196 ; CHECKHARD-NEXT: vmov r0, s12
197 ; CHECKHARD-NEXT: vdup.16 q3, d3[1]
198 ; CHECKHARD-NEXT: vmov r1, s12
199 ; CHECKHARD-NEXT: vmovx.f16 s12, s9
200 ; CHECKHARD-NEXT: vmov.16 d18[2], r0
201 ; CHECKHARD-NEXT: vmov r0, s5
202 ; CHECKHARD-NEXT: vmov.16 d18[3], r0
203 ; CHECKHARD-NEXT: vmov r0, s8
204 ; CHECKHARD-NEXT: vmov.16 d19[0], r1
205 ; CHECKHARD-NEXT: vmov.16 d19[1], r0
206 ; CHECKHARD-NEXT: vmov r0, s12
207 ; CHECKHARD-NEXT: vmov.16 d19[2], r0
208 ; CHECKHARD-NEXT: vmov r0, s11
209 ; CHECKHARD-NEXT: vmov.16 d19[3], r0
210 ; CHECKHARD-NEXT: vadd.f16 q8, q8, q9
211 ; CHECKHARD-NEXT: vext.16 d18, d0, d1, #2
212 ; CHECKHARD-NEXT: vmovx.f16 s0, s8
213 ; CHECKHARD-NEXT: vmov r0, s0
214 ; CHECKHARD-NEXT: vdup.16 q0, d3[2]
215 ; CHECKHARD-NEXT: vext.16 d19, d18, d2, #3
216 ; CHECKHARD-NEXT: vmov r1, s0
217 ; CHECKHARD-NEXT: vext.16 d18, d2, d18, #1
218 ; CHECKHARD-NEXT: vmovx.f16 s0, s11
219 ; CHECKHARD-NEXT: vext.16 d18, d18, d19, #2
220 ; CHECKHARD-NEXT: vext.16 d18, d18, d18, #1
221 ; CHECKHARD-NEXT: vmov.16 d19[0], r1
222 ; CHECKHARD-NEXT: vmov.16 d19[1], r0
223 ; CHECKHARD-NEXT: vmov r0, s10
224 ; CHECKHARD-NEXT: vmov.16 d19[2], r0
225 ; CHECKHARD-NEXT: vmov r0, s0
226 ; CHECKHARD-NEXT: vmov.16 d19[3], r0
227 ; CHECKHARD-NEXT: vadd.f16 q0, q8, q9
228 ; CHECKHARD-NEXT: bx lr
230 ; CHECKSOFT-LABEL: shuffle3step_f16:
231 ; CHECKSOFT: @ %bb.0: @ %entry
232 ; CHECKSOFT-NEXT: vmov r1, s0
233 ; CHECKSOFT-NEXT: vmovx.f16 s12, s1
234 ; CHECKSOFT-NEXT: vmov r0, s12
235 ; CHECKSOFT-NEXT: vrev32.16 d16, d3
236 ; CHECKSOFT-NEXT: vext.16 d17, d4, d5, #2
237 ; CHECKSOFT-NEXT: vmovx.f16 s12, s4
238 ; CHECKSOFT-NEXT: vext.16 d16, d16, d3, #1
239 ; CHECKSOFT-NEXT: vext.16 d16, d17, d16, #2
240 ; CHECKSOFT-NEXT: vext.16 d16, d16, d17, #1
241 ; CHECKSOFT-NEXT: vext.16 d17, d16, d16, #1
242 ; CHECKSOFT-NEXT: vmov.16 d16[0], r1
243 ; CHECKSOFT-NEXT: vmov.16 d16[1], r0
244 ; CHECKSOFT-NEXT: vmov r0, s3
245 ; CHECKSOFT-NEXT: vmov.16 d16[2], r0
246 ; CHECKSOFT-NEXT: vmov r0, s12
247 ; CHECKSOFT-NEXT: vmovx.f16 s12, s0
248 ; CHECKSOFT-NEXT: vmov r1, s12
249 ; CHECKSOFT-NEXT: vmovx.f16 s12, s3
250 ; CHECKSOFT-NEXT: vmov.16 d16[3], r0
251 ; CHECKSOFT-NEXT: vmov r0, s2
252 ; CHECKSOFT-NEXT: vmov.16 d18[0], r1
253 ; CHECKSOFT-NEXT: vmov.16 d18[1], r0
254 ; CHECKSOFT-NEXT: vmov r0, s12
255 ; CHECKSOFT-NEXT: vdup.16 q3, d3[1]
256 ; CHECKSOFT-NEXT: vmov r1, s12
257 ; CHECKSOFT-NEXT: vmovx.f16 s12, s9
258 ; CHECKSOFT-NEXT: vmov.16 d18[2], r0
259 ; CHECKSOFT-NEXT: vmov r0, s5
260 ; CHECKSOFT-NEXT: vmov.16 d18[3], r0
261 ; CHECKSOFT-NEXT: vmov r0, s8
262 ; CHECKSOFT-NEXT: vmov.16 d19[0], r1
263 ; CHECKSOFT-NEXT: vmov.16 d19[1], r0
264 ; CHECKSOFT-NEXT: vmov r0, s12
265 ; CHECKSOFT-NEXT: vmov.16 d19[2], r0
266 ; CHECKSOFT-NEXT: vmov r0, s11
267 ; CHECKSOFT-NEXT: vmov.16 d19[3], r0
268 ; CHECKSOFT-NEXT: vadd.f16 q8, q8, q9
269 ; CHECKSOFT-NEXT: vext.16 d18, d0, d1, #2
270 ; CHECKSOFT-NEXT: vmovx.f16 s0, s8
271 ; CHECKSOFT-NEXT: vmov r0, s0
272 ; CHECKSOFT-NEXT: vdup.16 q0, d3[2]
273 ; CHECKSOFT-NEXT: vext.16 d19, d18, d2, #3
274 ; CHECKSOFT-NEXT: vmov r1, s0
275 ; CHECKSOFT-NEXT: vext.16 d18, d2, d18, #1
276 ; CHECKSOFT-NEXT: vmovx.f16 s0, s11
277 ; CHECKSOFT-NEXT: vext.16 d18, d18, d19, #2
278 ; CHECKSOFT-NEXT: vext.16 d18, d18, d18, #1
279 ; CHECKSOFT-NEXT: vmov.16 d19[0], r1
280 ; CHECKSOFT-NEXT: vmov.16 d19[1], r0
281 ; CHECKSOFT-NEXT: vmov r0, s10
282 ; CHECKSOFT-NEXT: vmov.16 d19[2], r0
283 ; CHECKSOFT-NEXT: vmov r0, s0
284 ; CHECKSOFT-NEXT: vmov.16 d19[3], r0
285 ; CHECKSOFT-NEXT: vadd.f16 q0, q8, q9
286 ; CHECKSOFT-NEXT: bx lr
288 %s1 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
289 %s2 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
290 %s3 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
291 %a = fadd <8 x half> %s1, %s2
292 %r = fadd <8 x half> %a, %s3
296 define i16 @extract_v4i16(<4 x half> %a) {
297 ; CHECKHARD-LABEL: extract_v4i16:
298 ; CHECKHARD: @ %bb.0: @ %entry
299 ; CHECKHARD-NEXT: vmov.u16 r0, d0[0]
300 ; CHECKHARD-NEXT: bx lr
302 ; CHECKSOFT-LABEL: extract_v4i16:
303 ; CHECKSOFT: @ %bb.0: @ %entry
304 ; CHECKSOFT-NEXT: vmov d16, r0, r1
305 ; CHECKSOFT-NEXT: vmov.u16 r0, d16[0]
306 ; CHECKSOFT-NEXT: bx lr
308 %elt = extractelement <4 x half> %a, i32 0
309 %t = bitcast half %elt to i16
313 define i16 @extract_v8i16(<8 x half> %a) {
314 ; CHECKHARD-LABEL: extract_v8i16:
315 ; CHECKHARD: @ %bb.0: @ %entry
316 ; CHECKHARD-NEXT: vmov.u16 r0, d0[0]
317 ; CHECKHARD-NEXT: bx lr
319 ; CHECKSOFT-LABEL: extract_v8i16:
320 ; CHECKSOFT: @ %bb.0: @ %entry
321 ; CHECKSOFT-NEXT: vmov d16, r0, r1
322 ; CHECKSOFT-NEXT: vmov.u16 r0, d16[0]
323 ; CHECKSOFT-NEXT: bx lr
325 %elt = extractelement <8 x half> %a, i32 0
326 %t = bitcast half %elt to i16
330 define i32 @extract_v4s32(<4 x half> %a) {
331 ; CHECKHARD-LABEL: extract_v4s32:
332 ; CHECKHARD: @ %bb.0: @ %entry
333 ; CHECKHARD-NEXT: vmov.u16 r0, d0[0]
334 ; CHECKHARD-NEXT: sxth r0, r0
335 ; CHECKHARD-NEXT: bx lr
337 ; CHECKSOFT-LABEL: extract_v4s32:
338 ; CHECKSOFT: @ %bb.0: @ %entry
339 ; CHECKSOFT-NEXT: vmov d16, r0, r1
340 ; CHECKSOFT-NEXT: vmov.u16 r0, d16[0]
341 ; CHECKSOFT-NEXT: sxth r0, r0
342 ; CHECKSOFT-NEXT: bx lr
344 %elt = extractelement <4 x half> %a, i32 0
345 %t = bitcast half %elt to i16
346 %s = sext i16 %t to i32
350 define i32 @extract_v8s32(<8 x half> %a) {
351 ; CHECKHARD-LABEL: extract_v8s32:
352 ; CHECKHARD: @ %bb.0: @ %entry
353 ; CHECKHARD-NEXT: vmov.u16 r0, d0[0]
354 ; CHECKHARD-NEXT: sxth r0, r0
355 ; CHECKHARD-NEXT: bx lr
357 ; CHECKSOFT-LABEL: extract_v8s32:
358 ; CHECKSOFT: @ %bb.0: @ %entry
359 ; CHECKSOFT-NEXT: vmov d16, r0, r1
360 ; CHECKSOFT-NEXT: vmov.u16 r0, d16[0]
361 ; CHECKSOFT-NEXT: sxth r0, r0
362 ; CHECKSOFT-NEXT: bx lr
364 %elt = extractelement <8 x half> %a, i32 0
365 %t = bitcast half %elt to i16
366 %s = sext i16 %t to i32