1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s
4 %struct.float16x4x2_t = type { [2 x <4 x half>] }
5 %struct.float16x8x2_t = type { [2 x <8 x half>] }
7 define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) {
8 ; CHECKLABEL: test_vabs_f16:
9 ; CHECK-LABEL: test_vabs_f16:
10 ; CHECK: @ %bb.0: @ %entry
11 ; CHECK-NEXT: vabs.f16 d0, d0
14 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
15 ret <4 x half> %vabs1.i
18 define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) {
19 ; CHECKLABEL: test_vabsq_f16:
20 ; CHECK-LABEL: test_vabsq_f16:
21 ; CHECK: @ %bb.0: @ %entry
22 ; CHECK-NEXT: vabs.f16 q0, q0
25 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
26 ret <8 x half> %vabs1.i
29 define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) {
30 ; CHECKLABEL: test_vceqz_f16:
31 ; CHECK-LABEL: test_vceqz_f16:
32 ; CHECK: @ %bb.0: @ %entry
33 ; CHECK-NEXT: vceq.f16 d0, d0, #0
36 %0 = fcmp oeq <4 x half> %a, zeroinitializer
37 %vceqz.i = sext <4 x i1> %0 to <4 x i16>
38 ret <4 x i16> %vceqz.i
41 define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) {
42 ; CHECKLABEL: test_vceqzq_f16:
43 ; CHECK-LABEL: test_vceqzq_f16:
44 ; CHECK: @ %bb.0: @ %entry
45 ; CHECK-NEXT: vceq.f16 q0, q0, #0
48 %0 = fcmp oeq <8 x half> %a, zeroinitializer
49 %vceqz.i = sext <8 x i1> %0 to <8 x i16>
50 ret <8 x i16> %vceqz.i
53 define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) {
54 ; CHECKLABEL: test_vcgez_f16:
55 ; CHECK-LABEL: test_vcgez_f16:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vcge.f16 d0, d0, #0
60 %0 = fcmp oge <4 x half> %a, zeroinitializer
61 %vcgez.i = sext <4 x i1> %0 to <4 x i16>
62 ret <4 x i16> %vcgez.i
65 define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) {
66 ; CHECKLABEL: test_vcgezq_f16:
67 ; CHECK-LABEL: test_vcgezq_f16:
68 ; CHECK: @ %bb.0: @ %entry
69 ; CHECK-NEXT: vcge.f16 q0, q0, #0
72 %0 = fcmp oge <8 x half> %a, zeroinitializer
73 %vcgez.i = sext <8 x i1> %0 to <8 x i16>
74 ret <8 x i16> %vcgez.i
77 define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) {
78 ; CHECKLABEL: test_vcgtz_f16:
79 ; CHECK-LABEL: test_vcgtz_f16:
80 ; CHECK: @ %bb.0: @ %entry
81 ; CHECK-NEXT: vcgt.f16 d0, d0, #0
84 %0 = fcmp ogt <4 x half> %a, zeroinitializer
85 %vcgtz.i = sext <4 x i1> %0 to <4 x i16>
86 ret <4 x i16> %vcgtz.i
89 define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) {
90 ; CHECKLABEL: test_vcgtzq_f16:
91 ; CHECK-LABEL: test_vcgtzq_f16:
92 ; CHECK: @ %bb.0: @ %entry
93 ; CHECK-NEXT: vcgt.f16 q0, q0, #0
96 %0 = fcmp ogt <8 x half> %a, zeroinitializer
97 %vcgtz.i = sext <8 x i1> %0 to <8 x i16>
98 ret <8 x i16> %vcgtz.i
101 define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) {
102 ; CHECKLABEL: test_vclez_f16:
103 ; CHECK-LABEL: test_vclez_f16:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: vcle.f16 d0, d0, #0
108 %0 = fcmp ole <4 x half> %a, zeroinitializer
109 %vclez.i = sext <4 x i1> %0 to <4 x i16>
110 ret <4 x i16> %vclez.i
113 define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) {
114 ; CHECKLABEL: test_vclezq_f16:
115 ; CHECK-LABEL: test_vclezq_f16:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vcle.f16 q0, q0, #0
120 %0 = fcmp ole <8 x half> %a, zeroinitializer
121 %vclez.i = sext <8 x i1> %0 to <8 x i16>
122 ret <8 x i16> %vclez.i
125 define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) {
126 ; CHECKLABEL: test_vcltz_f16:
127 ; CHECK-LABEL: test_vcltz_f16:
128 ; CHECK: @ %bb.0: @ %entry
129 ; CHECK-NEXT: vclt.f16 d0, d0, #0
132 %0 = fcmp olt <4 x half> %a, zeroinitializer
133 %vcltz.i = sext <4 x i1> %0 to <4 x i16>
134 ret <4 x i16> %vcltz.i
137 define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) {
138 ; CHECKLABEL: test_vcltzq_f16:
139 ; CHECK-LABEL: test_vcltzq_f16:
140 ; CHECK: @ %bb.0: @ %entry
141 ; CHECK-NEXT: vclt.f16 q0, q0, #0
144 %0 = fcmp olt <8 x half> %a, zeroinitializer
145 %vcltz.i = sext <8 x i1> %0 to <8 x i16>
146 ret <8 x i16> %vcltz.i
149 define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
150 ; CHECK-LABEL: test_vcvt_f16_s16:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vcvt.f16.s16 d0, d0
155 %vcvt.i = sitofp <4 x i16> %a to <4 x half>
156 ret <4 x half> %vcvt.i
159 define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
160 ; CHECK-LABEL: test_vcvtq_f16_s16:
161 ; CHECK: @ %bb.0: @ %entry
162 ; CHECK-NEXT: vcvt.f16.s16 q0, q0
165 %vcvt.i = sitofp <8 x i16> %a to <8 x half>
166 ret <8 x half> %vcvt.i
169 define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
170 ; CHECK-LABEL: test_vcvt_f16_u16:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vcvt.f16.u16 d0, d0
175 %vcvt.i = uitofp <4 x i16> %a to <4 x half>
176 ret <4 x half> %vcvt.i
179 define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
180 ; CHECK-LABEL: test_vcvtq_f16_u16:
181 ; CHECK: @ %bb.0: @ %entry
182 ; CHECK-NEXT: vcvt.f16.u16 q0, q0
185 %vcvt.i = uitofp <8 x i16> %a to <8 x half>
186 ret <8 x half> %vcvt.i
189 define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
190 ; CHECK-LABEL: test_vcvt_s16_f16:
191 ; CHECK: @ %bb.0: @ %entry
192 ; CHECK-NEXT: vcvt.s16.f16 d0, d0
195 %vcvt.i = fptosi <4 x half> %a to <4 x i16>
196 ret <4 x i16> %vcvt.i
199 define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
200 ; CHECK-LABEL: test_vcvtq_s16_f16:
201 ; CHECK: @ %bb.0: @ %entry
202 ; CHECK-NEXT: vcvt.s16.f16 q0, q0
205 %vcvt.i = fptosi <8 x half> %a to <8 x i16>
206 ret <8 x i16> %vcvt.i
209 define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
210 ; CHECK-LABEL: test_vcvt_u16_f16:
211 ; CHECK: @ %bb.0: @ %entry
212 ; CHECK-NEXT: vcvt.u16.f16 d0, d0
215 %vcvt.i = fptoui <4 x half> %a to <4 x i16>
216 ret <4 x i16> %vcvt.i
219 define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
220 ; CHECK-LABEL: test_vcvtq_u16_f16:
221 ; CHECK: @ %bb.0: @ %entry
222 ; CHECK-NEXT: vcvt.u16.f16 q0, q0
225 %vcvt.i = fptoui <8 x half> %a to <8 x i16>
226 ret <8 x i16> %vcvt.i
229 define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) {
230 ; CHECK-LABEL: test_vcvta_s16_f16:
231 ; CHECK: @ %bb.0: @ %entry
232 ; CHECK-NEXT: vcvta.s16.f16 d0, d0
235 %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
236 ret <4 x i16> %vcvta_s16_v1.i
239 define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) {
240 ; CHECK-LABEL: test_vcvta_u16_f16:
241 ; CHECK: @ %bb.0: @ %entry
242 ; CHECK-NEXT: vcvta.u16.f16 d0, d0
245 %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
246 ret <4 x i16> %vcvta_u16_v1.i
249 define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) {
250 ; CHECK-LABEL: test_vcvtaq_s16_f16:
251 ; CHECK: @ %bb.0: @ %entry
252 ; CHECK-NEXT: vcvta.s16.f16 q0, q0
255 %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
256 ret <8 x i16> %vcvtaq_s16_v1.i
259 define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) {
260 ; CHECK-LABEL: test_vcvtm_s16_f16:
261 ; CHECK: @ %bb.0: @ %entry
262 ; CHECK-NEXT: vcvtm.s16.f16 d0, d0
265 %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
266 ret <4 x i16> %vcvtm_s16_v1.i
269 define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) {
270 ; CHECK-LABEL: test_vcvtmq_s16_f16:
271 ; CHECK: @ %bb.0: @ %entry
272 ; CHECK-NEXT: vcvtm.s16.f16 q0, q0
275 %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
276 ret <8 x i16> %vcvtmq_s16_v1.i
279 define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) {
280 ; CHECK-LABEL: test_vcvtm_u16_f16:
281 ; CHECK: @ %bb.0: @ %entry
282 ; CHECK-NEXT: vcvtm.u16.f16 d0, d0
285 %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
286 ret <4 x i16> %vcvtm_u16_v1.i
289 define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) {
290 ; CHECK-LABEL: test_vcvtmq_u16_f16:
291 ; CHECK: @ %bb.0: @ %entry
292 ; CHECK-NEXT: vcvtm.u16.f16 q0, q0
295 %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
296 ret <8 x i16> %vcvtmq_u16_v1.i
299 define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) {
300 ; CHECK-LABEL: test_vcvtn_s16_f16:
301 ; CHECK: @ %bb.0: @ %entry
302 ; CHECK-NEXT: vcvtn.s16.f16 d0, d0
305 %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
306 ret <4 x i16> %vcvtn_s16_v1.i
309 define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) {
310 ; CHECK-LABEL: test_vcvtnq_s16_f16:
311 ; CHECK: @ %bb.0: @ %entry
312 ; CHECK-NEXT: vcvtn.s16.f16 q0, q0
315 %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
316 ret <8 x i16> %vcvtnq_s16_v1.i
319 define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) {
320 ; CHECK-LABEL: test_vcvtn_u16_f16:
321 ; CHECK: @ %bb.0: @ %entry
322 ; CHECK-NEXT: vcvtn.u16.f16 d0, d0
325 %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
326 ret <4 x i16> %vcvtn_u16_v1.i
329 define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) {
330 ; CHECK-LABEL: test_vcvtnq_u16_f16:
331 ; CHECK: @ %bb.0: @ %entry
332 ; CHECK-NEXT: vcvtn.u16.f16 q0, q0
335 %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
336 ret <8 x i16> %vcvtnq_u16_v1.i
339 define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) {
340 ; CHECK-LABEL: test_vcvtp_s16_f16:
341 ; CHECK: @ %bb.0: @ %entry
342 ; CHECK-NEXT: vcvtp.s16.f16 d0, d0
345 %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
346 ret <4 x i16> %vcvtp_s16_v1.i
349 define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) {
350 ; CHECK-LABEL: test_vcvtpq_s16_f16:
351 ; CHECK: @ %bb.0: @ %entry
352 ; CHECK-NEXT: vcvtp.s16.f16 q0, q0
355 %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
356 ret <8 x i16> %vcvtpq_s16_v1.i
359 define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) {
360 ; CHECK-LABEL: test_vcvtp_u16_f16:
361 ; CHECK: @ %bb.0: @ %entry
362 ; CHECK-NEXT: vcvtp.u16.f16 d0, d0
365 %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
366 ret <4 x i16> %vcvtp_u16_v1.i
369 define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) {
370 ; CHECK-LABEL: test_vcvtpq_u16_f16:
371 ; CHECK: @ %bb.0: @ %entry
372 ; CHECK-NEXT: vcvtp.u16.f16 q0, q0
375 %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
376 ret <8 x i16> %vcvtpq_u16_v1.i
379 define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) {
380 ; CHECKLABEL: test_vneg_f16:
381 ; CHECK-LABEL: test_vneg_f16:
382 ; CHECK: @ %bb.0: @ %entry
383 ; CHECK-NEXT: vneg.f16 d0, d0
386 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
387 ret <4 x half> %sub.i
390 define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) {
391 ; CHECKLABEL: test_vnegq_f16:
392 ; CHECK-LABEL: test_vnegq_f16:
393 ; CHECK: @ %bb.0: @ %entry
394 ; CHECK-NEXT: vneg.f16 q0, q0
397 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
398 ret <8 x half> %sub.i
401 define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) {
402 ; CHECKLABEL: test_vrecpe_f16:
403 ; CHECK-LABEL: test_vrecpe_f16:
404 ; CHECK: @ %bb.0: @ %entry
405 ; CHECK-NEXT: vrecpe.f16 d0, d0
408 %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
409 ret <4 x half> %vrecpe_v1.i
412 define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) {
413 ; CHECKLABEL: test_vrecpeq_f16:
414 ; CHECK-LABEL: test_vrecpeq_f16:
415 ; CHECK: @ %bb.0: @ %entry
416 ; CHECK-NEXT: vrecpe.f16 q0, q0
419 %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
420 ret <8 x half> %vrecpeq_v1.i
423 define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) {
424 ; CHECKLABEL: test_vrnd_f16:
425 ; CHECK-LABEL: test_vrnd_f16:
426 ; CHECK: @ %bb.0: @ %entry
427 ; CHECK-NEXT: vrintz.f16 d0, d0
430 %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
431 ret <4 x half> %vrnd_v1.i
434 define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) {
435 ; CHECKLABEL: test_vrndq_f16:
436 ; CHECK-LABEL: test_vrndq_f16:
437 ; CHECK: @ %bb.0: @ %entry
438 ; CHECK-NEXT: vrintz.f16 q0, q0
441 %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
442 ret <8 x half> %vrndq_v1.i
445 define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) {
446 ; CHECKLABEL: test_vrnda_f16:
447 ; CHECK-LABEL: test_vrnda_f16:
448 ; CHECK: @ %bb.0: @ %entry
449 ; CHECK-NEXT: vrinta.f16 d0, d0
452 %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
453 ret <4 x half> %vrnda_v1.i
456 define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) {
457 ; CHECKLABEL: test_vrndaq_f16:
458 ; CHECK-LABEL: test_vrndaq_f16:
459 ; CHECK: @ %bb.0: @ %entry
460 ; CHECK-NEXT: vrinta.f16 q0, q0
463 %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
464 ret <8 x half> %vrndaq_v1.i
467 define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) {
468 ; CHECKLABEL: test_vrndm_f16:
469 ; CHECK-LABEL: test_vrndm_f16:
470 ; CHECK: @ %bb.0: @ %entry
471 ; CHECK-NEXT: vrintm.f16 d0, d0
474 %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
475 ret <4 x half> %vrndm_v1.i
478 define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) {
479 ; CHECKLABEL: test_vrndmq_f16:
480 ; CHECK-LABEL: test_vrndmq_f16:
481 ; CHECK: @ %bb.0: @ %entry
482 ; CHECK-NEXT: vrintm.f16 q0, q0
485 %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
486 ret <8 x half> %vrndmq_v1.i
489 define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) {
490 ; CHECKLABEL: test_vrndn_f16:
491 ; CHECK-LABEL: test_vrndn_f16:
492 ; CHECK: @ %bb.0: @ %entry
493 ; CHECK-NEXT: vrintn.f16 d0, d0
496 %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
497 ret <4 x half> %vrndn_v1.i
500 define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) {
501 ; CHECKLABEL: test_vrndnq_f16:
502 ; CHECK-LABEL: test_vrndnq_f16:
503 ; CHECK: @ %bb.0: @ %entry
504 ; CHECK-NEXT: vrintn.f16 q0, q0
507 %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
508 ret <8 x half> %vrndnq_v1.i
511 define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) {
512 ; CHECKLABEL: test_vrndp_f16:
513 ; CHECK-LABEL: test_vrndp_f16:
514 ; CHECK: @ %bb.0: @ %entry
515 ; CHECK-NEXT: vrintp.f16 d0, d0
518 %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
519 ret <4 x half> %vrndp_v1.i
522 define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) {
523 ; CHECKLABEL: test_vrndpq_f16:
524 ; CHECK-LABEL: test_vrndpq_f16:
525 ; CHECK: @ %bb.0: @ %entry
526 ; CHECK-NEXT: vrintp.f16 q0, q0
529 %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
530 ret <8 x half> %vrndpq_v1.i
533 define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) {
534 ; CHECKLABEL: test_vrndx_f16:
535 ; CHECK-LABEL: test_vrndx_f16:
536 ; CHECK: @ %bb.0: @ %entry
537 ; CHECK-NEXT: vrintx.f16 d0, d0
540 %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
541 ret <4 x half> %vrndx_v1.i
544 define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) {
545 ; CHECKLABEL: test_vrndxq_f16:
546 ; CHECK-LABEL: test_vrndxq_f16:
547 ; CHECK: @ %bb.0: @ %entry
548 ; CHECK-NEXT: vrintx.f16 q0, q0
551 %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
552 ret <8 x half> %vrndxq_v1.i
555 define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) {
556 ; CHECKLABEL: test_vrsqrte_f16:
557 ; CHECK-LABEL: test_vrsqrte_f16:
558 ; CHECK: @ %bb.0: @ %entry
559 ; CHECK-NEXT: vrsqrte.f16 d0, d0
562 %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
563 ret <4 x half> %vrsqrte_v1.i
566 define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) {
567 ; CHECKLABEL: test_vrsqrteq_f16:
568 ; CHECK-LABEL: test_vrsqrteq_f16:
569 ; CHECK: @ %bb.0: @ %entry
570 ; CHECK-NEXT: vrsqrte.f16 q0, q0
573 %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
574 ret <8 x half> %vrsqrteq_v1.i
577 define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) {
578 ; CHECKLABEL: test_vadd_f16:
579 ; CHECK-LABEL: test_vadd_f16:
580 ; CHECK: @ %bb.0: @ %entry
581 ; CHECK-NEXT: vadd.f16 d0, d0, d1
584 %add.i = fadd <4 x half> %a, %b
585 ret <4 x half> %add.i
588 define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
589 ; CHECKLABEL: test_vaddq_f16:
590 ; CHECK-LABEL: test_vaddq_f16:
591 ; CHECK: @ %bb.0: @ %entry
592 ; CHECK-NEXT: vadd.f16 q0, q0, q1
595 %add.i = fadd <8 x half> %a, %b
596 ret <8 x half> %add.i
599 define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) {
600 ; CHECKLABEL: test_vabd_f16:
601 ; CHECK-LABEL: test_vabd_f16:
602 ; CHECK: @ %bb.0: @ %entry
603 ; CHECK-NEXT: vabd.f16 d0, d0, d1
606 %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
607 ret <4 x half> %vabd_v2.i
610 define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) {
611 ; CHECKLABEL: test_vabdq_f16:
612 ; CHECK-LABEL: test_vabdq_f16:
613 ; CHECK: @ %bb.0: @ %entry
614 ; CHECK-NEXT: vabd.f16 q0, q0, q1
617 %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
618 ret <8 x half> %vabdq_v2.i
621 define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) {
622 ; CHECKLABEL: test_vcage_f16:
623 ; CHECK-LABEL: test_vcage_f16:
624 ; CHECK: @ %bb.0: @ %entry
625 ; CHECK-NEXT: vacge.f16 d0, d0, d1
628 %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
629 ret <4 x i16> %vcage_v2.i
632 define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) {
633 ; CHECKLABEL: test_vcageq_f16:
634 ; CHECK-LABEL: test_vcageq_f16:
635 ; CHECK: @ %bb.0: @ %entry
636 ; CHECK-NEXT: vacge.f16 q0, q0, q1
639 %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
640 ret <8 x i16> %vcageq_v2.i
643 define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) {
644 ; CHECK-LABEL: test_vcagt_f16:
645 ; CHECK: @ %bb.0: @ %entry
646 ; CHECK-NEXT: vacgt.f16 d0, d0, d1
649 %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
650 ret <4 x i16> %vcagt_v2.i
653 define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) {
654 ; CHECK-LABEL: test_vcagtq_f16:
655 ; CHECK: @ %bb.0: @ %entry
656 ; CHECK-NEXT: vacgt.f16 q0, q0, q1
659 %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
660 ret <8 x i16> %vcagtq_v2.i
663 define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) {
664 ; CHECKLABEL: test_vcale_f16:
665 ; CHECK-LABEL: test_vcale_f16:
666 ; CHECK: @ %bb.0: @ %entry
667 ; CHECK-NEXT: vacge.f16 d0, d1, d0
670 %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
671 ret <4 x i16> %vcale_v2.i
674 define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) {
675 ; CHECKLABEL: test_vcaleq_f16:
676 ; CHECK-LABEL: test_vcaleq_f16:
677 ; CHECK: @ %bb.0: @ %entry
678 ; CHECK-NEXT: vacge.f16 q0, q1, q0
681 %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
682 ret <8 x i16> %vcaleq_v2.i
685 define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) {
686 ; CHECKLABEL: test_vceq_f16:
687 ; CHECK-LABEL: test_vceq_f16:
688 ; CHECK: @ %bb.0: @ %entry
689 ; CHECK-NEXT: vceq.f16 d0, d0, d1
692 %cmp.i = fcmp oeq <4 x half> %a, %b
693 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
694 ret <4 x i16> %sext.i
697 define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) {
698 ; CHECKLABEL: test_vceqq_f16:
699 ; CHECK-LABEL: test_vceqq_f16:
700 ; CHECK: @ %bb.0: @ %entry
701 ; CHECK-NEXT: vceq.f16 q0, q0, q1
704 %cmp.i = fcmp oeq <8 x half> %a, %b
705 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
706 ret <8 x i16> %sext.i
709 define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) {
710 ; CHECKLABEL: test_vcge_f16:
711 ; CHECK-LABEL: test_vcge_f16:
712 ; CHECK: @ %bb.0: @ %entry
713 ; CHECK-NEXT: vcge.f16 d0, d0, d1
716 %cmp.i = fcmp oge <4 x half> %a, %b
717 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
718 ret <4 x i16> %sext.i
721 define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) {
722 ; CHECKLABEL: test_vcgeq_f16:
723 ; CHECK-LABEL: test_vcgeq_f16:
724 ; CHECK: @ %bb.0: @ %entry
725 ; CHECK-NEXT: vcge.f16 q0, q0, q1
728 %cmp.i = fcmp oge <8 x half> %a, %b
729 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
730 ret <8 x i16> %sext.i
733 define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) {
734 ; CHECKLABEL: test_vcgt_f16:
735 ; CHECK-LABEL: test_vcgt_f16:
736 ; CHECK: @ %bb.0: @ %entry
737 ; CHECK-NEXT: vcgt.f16 d0, d0, d1
740 %cmp.i = fcmp ogt <4 x half> %a, %b
741 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
742 ret <4 x i16> %sext.i
745 define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) {
746 ; CHECKLABEL: test_vcgtq_f16:
747 ; CHECK-LABEL: test_vcgtq_f16:
748 ; CHECK: @ %bb.0: @ %entry
749 ; CHECK-NEXT: vcgt.f16 q0, q0, q1
752 %cmp.i = fcmp ogt <8 x half> %a, %b
753 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
754 ret <8 x i16> %sext.i
757 define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) {
758 ; CHECKLABEL: test_vcle_f16:
759 ; CHECK-LABEL: test_vcle_f16:
760 ; CHECK: @ %bb.0: @ %entry
761 ; CHECK-NEXT: vcge.f16 d0, d1, d0
764 %cmp.i = fcmp ole <4 x half> %a, %b
765 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
766 ret <4 x i16> %sext.i
769 define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) {
770 ; CHECKLABEL: test_vcleq_f16:
771 ; CHECK-LABEL: test_vcleq_f16:
772 ; CHECK: @ %bb.0: @ %entry
773 ; CHECK-NEXT: vcge.f16 q0, q1, q0
776 %cmp.i = fcmp ole <8 x half> %a, %b
777 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
778 ret <8 x i16> %sext.i
781 define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) {
782 ; CHECKLABEL: test_vclt_f16:
783 ; CHECK-LABEL: test_vclt_f16:
784 ; CHECK: @ %bb.0: @ %entry
785 ; CHECK-NEXT: vcgt.f16 d0, d1, d0
788 %cmp.i = fcmp olt <4 x half> %a, %b
789 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
790 ret <4 x i16> %sext.i
793 define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) {
794 ; CHECKLABEL: test_vcltq_f16:
795 ; CHECK-LABEL: test_vcltq_f16:
796 ; CHECK: @ %bb.0: @ %entry
797 ; CHECK-NEXT: vcgt.f16 q0, q1, q0
800 %cmp.i = fcmp olt <8 x half> %a, %b
801 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
802 ret <8 x i16> %sext.i
805 define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) {
806 ; CHECKLABEL: test_vcvt_n_f16_s16:
807 ; CHECK-LABEL: test_vcvt_n_f16_s16:
808 ; CHECK: @ %bb.0: @ %entry
809 ; CHECK-NEXT: vcvt.f16.s16 d0, d0, #2
812 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
813 ret <4 x half> %vcvt_n1
816 declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2
818 define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
819 ; CHECKLABEL: test_vcvtq_n_f16_s16:
820 ; CHECK-LABEL: test_vcvtq_n_f16_s16:
821 ; CHECK: @ %bb.0: @ %entry
822 ; CHECK-NEXT: vcvt.f16.s16 q0, q0, #2
825 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
826 ret <8 x half> %vcvt_n1
829 declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2
831 define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) {
832 ; CHECKLABEL: test_vcvt_n_f16_u16:
833 ; CHECK-LABEL: test_vcvt_n_f16_u16:
834 ; CHECK: @ %bb.0: @ %entry
835 ; CHECK-NEXT: vcvt.f16.u16 d0, d0, #2
838 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
839 ret <4 x half> %vcvt_n1
842 declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2
844 define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
845 ; CHECKLABEL: test_vcvtq_n_f16_u16:
846 ; CHECK-LABEL: test_vcvtq_n_f16_u16:
847 ; CHECK: @ %bb.0: @ %entry
848 ; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2
851 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
852 ret <8 x half> %vcvt_n1
855 declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2
857 define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) {
858 ; CHECKLABEL: test_vcvt_n_s16_f16:
859 ; CHECK-LABEL: test_vcvt_n_s16_f16:
860 ; CHECK: @ %bb.0: @ %entry
861 ; CHECK-NEXT: vcvt.s16.f16 d0, d0, #2
864 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2)
865 ret <4 x i16> %vcvt_n1
868 declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2
870 define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
871 ; CHECKLABEL: test_vcvtq_n_s16_f16:
872 ; CHECK-LABEL: test_vcvtq_n_s16_f16:
873 ; CHECK: @ %bb.0: @ %entry
874 ; CHECK-NEXT: vcvt.s16.f16 q0, q0, #2
877 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2)
878 ret <8 x i16> %vcvt_n1
881 declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2
883 define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) {
884 ; CHECKLABEL: test_vcvt_n_u16_f16:
885 ; CHECK-LABEL: test_vcvt_n_u16_f16:
886 ; CHECK: @ %bb.0: @ %entry
887 ; CHECK-NEXT: vcvt.u16.f16 d0, d0, #2
890 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2)
891 ret <4 x i16> %vcvt_n1
894 declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2
896 define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
897 ; CHECKLABEL: test_vcvtq_n_u16_f16:
898 ; CHECK-LABEL: test_vcvtq_n_u16_f16:
899 ; CHECK: @ %bb.0: @ %entry
900 ; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2
903 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2)
904 ret <8 x i16> %vcvt_n1
907 declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2
909 define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) {
910 ; CHECKLABEL: test_vmax_f16:
911 ; CHECK-LABEL: test_vmax_f16:
912 ; CHECK: @ %bb.0: @ %entry
913 ; CHECK-NEXT: vmax.f16 d0, d0, d1
916 %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
917 ret <4 x half> %vmax_v2.i
920 define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) {
921 ; CHECKLABEL: test_vmaxq_f16:
922 ; CHECK-LABEL: test_vmaxq_f16:
923 ; CHECK: @ %bb.0: @ %entry
924 ; CHECK-NEXT: vmax.f16 q0, q0, q1
927 %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
928 ret <8 x half> %vmaxq_v2.i
931 define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) {
932 ; CHECK-LABEL: test_vmaxnm_f16:
933 ; CHECK: @ %bb.0: @ %entry
934 ; CHECK-NEXT: vmaxnm.f16 d0, d0, d1
937 %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
938 ret <4 x half> %vmaxnm_v2.i
941 define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) {
942 ; CHECK-LABEL: test_vmaxnmq_f16:
943 ; CHECK: @ %bb.0: @ %entry
944 ; CHECK-NEXT: vmaxnm.f16 q0, q0, q1
947 %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
948 ret <8 x half> %vmaxnmq_v2.i
951 define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) {
952 ; CHECK-LABEL: test_vmin_f16:
953 ; CHECK: @ %bb.0: @ %entry
954 ; CHECK-NEXT: vmin.f16 d0, d0, d1
957 %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
958 ret <4 x half> %vmin_v2.i
961 define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) {
962 ; CHECK-LABEL: test_vminq_f16:
963 ; CHECK: @ %bb.0: @ %entry
964 ; CHECK-NEXT: vmin.f16 q0, q0, q1
967 %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
968 ret <8 x half> %vminq_v2.i
971 define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) {
972 ; CHECK-LABEL: test_vminnm_f16:
973 ; CHECK: @ %bb.0: @ %entry
974 ; CHECK-NEXT: vminnm.f16 d0, d0, d1
977 %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
978 ret <4 x half> %vminnm_v2.i
981 define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) {
982 ; CHECK-LABEL: test_vminnmq_f16:
983 ; CHECK: @ %bb.0: @ %entry
984 ; CHECK-NEXT: vminnm.f16 q0, q0, q1
987 %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
988 ret <8 x half> %vminnmq_v2.i
991 define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) {
992 ; CHECKLABEL: test_vmul_f16:
993 ; CHECK-LABEL: test_vmul_f16:
994 ; CHECK: @ %bb.0: @ %entry
995 ; CHECK-NEXT: vmul.f16 d0, d0, d1
998 %mul.i = fmul <4 x half> %a, %b
999 ret <4 x half> %mul.i
1002 define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
1003 ; CHECKLABEL: test_vmulq_f16:
1004 ; CHECK-LABEL: test_vmulq_f16:
1005 ; CHECK: @ %bb.0: @ %entry
1006 ; CHECK-NEXT: vmul.f16 q0, q0, q1
1009 %mul.i = fmul <8 x half> %a, %b
1010 ret <8 x half> %mul.i
1013 define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) {
1014 ; CHECKLABEL: test_vpadd_f16:
1015 ; CHECK-LABEL: test_vpadd_f16:
1016 ; CHECK: @ %bb.0: @ %entry
1017 ; CHECK-NEXT: vpadd.f16 d0, d0, d1
1020 %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
1021 ret <4 x half> %vpadd_v2.i
1024 define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) {
1025 ; CHECKLABEL: test_vpmax_f16:
1026 ; CHECK-LABEL: test_vpmax_f16:
1027 ; CHECK: @ %bb.0: @ %entry
1028 ; CHECK-NEXT: vpmax.f16 d0, d0, d1
1031 %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
1032 ret <4 x half> %vpmax_v2.i
1035 define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) {
1036 ; CHECKLABEL: test_vpmin_f16:
1037 ; CHECK-LABEL: test_vpmin_f16:
1038 ; CHECK: @ %bb.0: @ %entry
1039 ; CHECK-NEXT: vpmin.f16 d0, d0, d1
1042 %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
1043 ret <4 x half> %vpmin_v2.i
1046 define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) {
1047 ; CHECKLABEL: test_vrecps_f16:
1048 ; CHECK-LABEL: test_vrecps_f16:
1049 ; CHECK: @ %bb.0: @ %entry
1050 ; CHECK-NEXT: vrecps.f16 d0, d0, d1
1053 %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
1054 ret <4 x half> %vrecps_v2.i
1057 define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) {
1058 ; CHECKLABEL: test_vrecpsq_f16:
1059 ; CHECK-LABEL: test_vrecpsq_f16:
1060 ; CHECK: @ %bb.0: @ %entry
1061 ; CHECK-NEXT: vrecps.f16 q0, q0, q1
1064 %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
1065 ret <8 x half> %vrecpsq_v2.i
1068 define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) {
1069 ; CHECKLABEL: test_vrsqrts_f16:
1070 ; CHECK-LABEL: test_vrsqrts_f16:
1071 ; CHECK: @ %bb.0: @ %entry
1072 ; CHECK-NEXT: vrsqrts.f16 d0, d0, d1
1075 %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
1076 ret <4 x half> %vrsqrts_v2.i
1079 define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) {
1080 ; CHECKLABEL: test_vrsqrtsq_f16:
1081 ; CHECK-LABEL: test_vrsqrtsq_f16:
1082 ; CHECK: @ %bb.0: @ %entry
1083 ; CHECK-NEXT: vrsqrts.f16 q0, q0, q1
1086 %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
1087 ret <8 x half> %vrsqrtsq_v2.i
1090 define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) {
1091 ; CHECKLABEL: test_vsub_f16:
1092 ; CHECK-LABEL: test_vsub_f16:
1093 ; CHECK: @ %bb.0: @ %entry
1094 ; CHECK-NEXT: vsub.f16 d0, d0, d1
1097 %sub.i = fsub <4 x half> %a, %b
1098 ret <4 x half> %sub.i
1101 define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
1102 ; CHECKLABEL: test_vsubq_f16:
1103 ; CHECK-LABEL: test_vsubq_f16:
1104 ; CHECK: @ %bb.0: @ %entry
1105 ; CHECK-NEXT: vsub.f16 q0, q0, q1
1108 %sub.i = fsub <8 x half> %a, %b
1109 ret <8 x half> %sub.i
1112 define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1113 ; CHECK-LABEL: test_vfma_f16:
1114 ; CHECK: @ %bb.0: @ %entry
1115 ; CHECK-NEXT: vfma.f16 d0, d1, d2
1118 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
1122 define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1123 ; CHECK-LABEL: test_vfmaq_f16:
1124 ; CHECK: @ %bb.0: @ %entry
1125 ; CHECK-NEXT: vfma.f16 q0, q1, q2
1128 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
1132 define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1133 ; CHECK-LABEL: test_vfms_f16:
1134 ; CHECK: @ %bb.0: @ %entry
1135 ; CHECK-NEXT: vneg.f16 d16, d1
1136 ; CHECK-NEXT: vfma.f16 d0, d16, d2
1139 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
1140 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
1144 define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1145 ; CHECK-LABEL: test_vfmsq_f16:
1146 ; CHECK: @ %bb.0: @ %entry
1147 ; CHECK-NEXT: vneg.f16 q8, q1
1148 ; CHECK-NEXT: vfma.f16 q0, q8, q2
1151 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
1152 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
1156 define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
1157 ; CHECK-LABEL: test_vmul_lane_f16:
1158 ; CHECK: @ %bb.0: @ %entry
1159 ; CHECK-NEXT: vmul.f16 d0, d0, d1[3]
1162 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1163 %mul = fmul <4 x half> %shuffle, %a
1167 define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
1168 ; CHECK-LABEL: test_vmulq_lane_f16:
1169 ; CHECK: @ %bb.0: @ %entry
1170 ; CHECK-NEXT: @ kill: def $d2 killed $d2 def $q1
1171 ; CHECK-NEXT: vmul.f16 q0, q0, d2[3]
1174 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1175 %mul = fmul <8 x half> %shuffle, %a
1179 define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
1180 ; CHECK-LABEL: test_vmul_n_f16:
1181 ; CHECK: @ %bb.0: @ %entry
1182 ; CHECK-NEXT: @ kill: def $s2 killed $s2 def $d1
1183 ; CHECK-NEXT: vmul.f16 d0, d0, d1[0]
1186 %0 = bitcast float %b.coerce to i32
1187 %tmp.0.extract.trunc = trunc i32 %0 to i16
1188 %1 = bitcast i16 %tmp.0.extract.trunc to half
1189 %vecinit = insertelement <4 x half> undef, half %1, i32 0
1190 %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1191 %mul = fmul <4 x half> %vecinit4, %a
1195 define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
1196 ; CHECK-LABEL: test_vmulq_n_f16:
1197 ; CHECK: @ %bb.0: @ %entry
1198 ; CHECK-NEXT: @ kill: def $s4 killed $s4 def $d2
1199 ; CHECK-NEXT: vmul.f16 q0, q0, d2[0]
1202 %0 = bitcast float %b.coerce to i32
1203 %tmp.0.extract.trunc = trunc i32 %0 to i16
1204 %1 = bitcast i16 %tmp.0.extract.trunc to half
1205 %vecinit = insertelement <8 x half> undef, half %1, i32 0
1206 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1207 %mul = fmul <8 x half> %vecinit8, %a
1211 define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
1212 ; CHECKLABEL: test_vbsl_f16:
1213 ; CHECK-LABEL: test_vbsl_f16:
1214 ; CHECK: @ %bb.0: @ %entry
1215 ; CHECK-NEXT: vbsl d0, d1, d2
1218 %0 = bitcast <4 x i16> %a to <8 x i8>
1219 %1 = bitcast <4 x half> %b to <8 x i8>
1220 %2 = bitcast <4 x half> %c to <8 x i8>
1221 %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2)
1222 %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half>
1226 define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) {
1227 ; CHECKLABEL: test_vbslq_f16:
1228 ; CHECK-LABEL: test_vbslq_f16:
1229 ; CHECK: @ %bb.0: @ %entry
1230 ; CHECK-NEXT: vbsl q0, q1, q2
1233 %0 = bitcast <8 x i16> %a to <16 x i8>
1234 %1 = bitcast <8 x half> %b to <16 x i8>
1235 %2 = bitcast <8 x half> %c to <16 x i8>
1236 %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
1237 %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half>
1241 define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
1242 ; CHECK-LABEL: test_vzip_f16:
1243 ; CHECK: @ %bb.0: @ %entry
1244 ; CHECK-NEXT: vzip.16 d0, d1
1247 %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1248 %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1249 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
1250 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
1251 ret %struct.float16x4x2_t %.fca.0.1.insert
1254 define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
1255 ; CHECK-LABEL: test_vzipq_f16:
1256 ; CHECK: @ %bb.0: @ %entry
1257 ; CHECK-NEXT: vzip.16 q0, q1
1260 %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1261 %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1262 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
1263 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
1264 ret %struct.float16x8x2_t %.fca.0.1.insert
1267 define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
1268 ; CHECK-LABEL: test_vuzp_f16:
1269 ; CHECK: @ %bb.0: @ %entry
1270 ; CHECK-NEXT: vuzp.16 d0, d1
1273 %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1274 %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1275 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
1276 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
1277 ret %struct.float16x4x2_t %.fca.0.1.insert
1280 define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
1281 ; CHECK-LABEL: test_vuzpq_f16:
1282 ; CHECK: @ %bb.0: @ %entry
1283 ; CHECK-NEXT: vuzp.16 q0, q1
1286 %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1287 %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1288 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
1289 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
1290 ret %struct.float16x8x2_t %.fca.0.1.insert
1293 define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
1294 ; CHECK-LABEL: test_vtrn_f16:
1295 ; CHECK: @ %bb.0: @ %entry
1296 ; CHECK-NEXT: vtrn.16 d0, d1
1299 %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1300 %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1301 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
1302 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
1303 ret %struct.float16x4x2_t %.fca.0.1.insert
1306 define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
1307 ; CHECK-LABEL: test_vtrnq_f16:
1308 ; CHECK: @ %bb.0: @ %entry
1309 ; CHECK-NEXT: vtrn.16 q0, q1
1312 %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1313 %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1314 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
1315 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
1316 ret %struct.float16x8x2_t %.fca.0.1.insert
1319 define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
1320 ; CHECK-LABEL: test_vmov_n_f16:
1321 ; CHECK: @ %bb.0: @ %entry
1322 ; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0
1323 ; CHECK-NEXT: vdup.16 d0, d0[0]
1326 %0 = bitcast float %a.coerce to i32
1327 %tmp.0.extract.trunc = trunc i32 %0 to i16
1328 %1 = bitcast i16 %tmp.0.extract.trunc to half
1329 %vecinit = insertelement <4 x half> undef, half %1, i32 0
1330 %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1331 ret <4 x half> %vecinit4
1334 define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
1335 ; CHECK-LABEL: test_vmovq_n_f16:
1336 ; CHECK: @ %bb.0: @ %entry
1337 ; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0
1338 ; CHECK-NEXT: vdup.16 q0, d0[0]
1341 %0 = bitcast float %a.coerce to i32
1342 %tmp.0.extract.trunc = trunc i32 %0 to i16
1343 %1 = bitcast i16 %tmp.0.extract.trunc to half
1344 %vecinit = insertelement <8 x half> undef, half %1, i32 0
1345 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1346 ret <8 x half> %vecinit8
1349 define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
1350 ; CHECK-LABEL: test_vdup_n_f16:
1351 ; CHECK: @ %bb.0: @ %entry
1352 ; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0
1353 ; CHECK-NEXT: vdup.16 d0, d0[0]
1356 %0 = bitcast float %a.coerce to i32
1357 %tmp.0.extract.trunc = trunc i32 %0 to i16
1358 %1 = bitcast i16 %tmp.0.extract.trunc to half
1359 %vecinit = insertelement <4 x half> undef, half %1, i32 0
1360 %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1361 ret <4 x half> %vecinit4
1364 define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
1365 ; CHECK-LABEL: test_vdupq_n_f16:
1366 ; CHECK: @ %bb.0: @ %entry
1367 ; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0
1368 ; CHECK-NEXT: vdup.16 q0, d0[0]
1371 %0 = bitcast float %a.coerce to i32
1372 %tmp.0.extract.trunc = trunc i32 %0 to i16
1373 %1 = bitcast i16 %tmp.0.extract.trunc to half
1374 %vecinit = insertelement <8 x half> undef, half %1, i32 0
1375 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1376 ret <8 x half> %vecinit8
1379 define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
1380 ; CHECK-LABEL: test_vdup_lane_f16:
1381 ; CHECK: @ %bb.0: @ %entry
1382 ; CHECK-NEXT: vdup.16 d0, d0[3]
1385 %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1386 ret <4 x half> %shuffle
1389 define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
1390 ; CHECK-LABEL: test_vdupq_lane_f16:
1391 ; CHECK: @ %bb.0: @ %entry
1392 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
1393 ; CHECK-NEXT: vdup.16 q0, d0[3]
1396 %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1397 ret <8 x half> %shuffle
1400 define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
1401 ; CHECK-LABEL: test_vext_f16:
1402 ; CHECK: @ %bb.0: @ %entry
1403 ; CHECK-NEXT: vext.16 d0, d0, d1, #2
1406 %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1407 ret <4 x half> %vext
1410 define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
1411 ; CHECK-LABEL: test_vextq_f16:
1412 ; CHECK: @ %bb.0: @ %entry
1413 ; CHECK-NEXT: vext.16 q0, q0, q1, #5
1416 %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1417 ret <8 x half> %vext
1420 define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) {
1421 ; CHECK-LABEL: test_vrev64_f16:
1422 ; CHECK: @ %bb.0: @ %entry
1423 ; CHECK-NEXT: vrev64.16 d0, d0
1426 %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1427 ret <4 x half> %shuffle.i
1430 define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) {
1431 ; CHECK-LABEL: test_vrev64q_f16:
1432 ; CHECK: @ %bb.0: @ %entry
1433 ; CHECK-NEXT: vrev64.16 q0, q0
1436 %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1437 ret <8 x half> %shuffle.i
1440 define <4 x half> @test_vld_dup1_4xhalf(ptr %b) {
1441 ; CHECK-LABEL: test_vld_dup1_4xhalf:
1442 ; CHECK: @ %bb.0: @ %entry
1443 ; CHECK-NEXT: vld1.16 {d0[]}, [r0:16]
1447 %b1 = load half, ptr %b, align 2
1448 %vecinit = insertelement <4 x half> undef, half %b1, i32 0
1449 %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1
1450 %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2
1451 %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3
1452 ret <4 x half> %vecinit4
1455 define <8 x half> @test_vld_dup1_8xhalf(ptr %b) local_unnamed_addr {
1456 ; CHECK-LABEL: test_vld_dup1_8xhalf:
1457 ; CHECK: @ %bb.0: @ %entry
1458 ; CHECK-NEXT: vld1.16 {d0[], d1[]}, [r0:16]
1462 %b1 = load half, ptr %b, align 2
1463 %vecinit = insertelement <8 x half> undef, half %b1, i32 0
1464 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1465 ret <8 x half> %vecinit8
1468 define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) {
1469 ; CHECK-LABEL: test_shufflevector8xhalf:
1470 ; CHECK: @ %bb.0: @ %entry
1471 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
1472 ; CHECK-NEXT: vmov.f64 d1, d0
1476 %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1480 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
1481 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
1482 declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)
1483 declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>)
1484 declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>)
1485 declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>)
1486 declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>)
1487 declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>)
1488 declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>)
1489 declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>)
1490 declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>)
1491 declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>)
1492 declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>)
1493 declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>)
1494 declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>)
1495 declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>)
1496 declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>)
1497 declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>)
1498 declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>)
1499 declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>)
1500 declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>)
1501 declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>)
1502 declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>)
1503 declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>)
1504 declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>)
1505 declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>)
1506 declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>)
1507 declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>)
1508 declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>)
1509 declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>)
1510 declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>)
1511 declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>)
1512 declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>)
1513 declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>)
1514 declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>)
1515 declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>)
1516 declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>)
1517 declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>)
1518 declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>)
1519 declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>)
1520 declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>)
1521 declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>)
1522 declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>)
1523 declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>)
1524 declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>)
1525 declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>)
1526 declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>)
1527 declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>)
1528 declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>)
1529 declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>)
1530 declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>)
1531 declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>)
1532 declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>)
1533 declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>)
1534 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
1535 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1536 declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
1537 declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
1538 declare { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr, <8 x half>, <8 x half>, i32, i32)
1539 declare { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr, <4 x half>, <4 x half>, i32, i32)
1540 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1541 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1542 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1543 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1544 declare void @llvm.arm.neon.vst2lane.p0.v8f16(ptr, <8 x half>, <8 x half>, i32, i32)
1545 declare void @llvm.arm.neon.vst2lane.p0.v4f16(ptr, <4 x half>, <4 x half>, i32, i32)
1546 declare void @llvm.arm.neon.vst3lane.p0.v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1547 declare void @llvm.arm.neon.vst3lane.p0.v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1548 declare void @llvm.arm.neon.vst4lane.p0.v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1549 declare void @llvm.arm.neon.vst4lane.p0.v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1551 define { <8 x half>, <8 x half> } @test_vld2q_lane_f16(ptr, <8 x half>, <8 x half>) {
1552 ; CHECK-LABEL: test_vld2q_lane_f16:
1553 ; CHECK: @ %bb.0: @ %entry
1554 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1555 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1556 ; CHECK-NEXT: vld2.16 {d1[3], d3[3]}, [r0]
1559 %3 = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, i32 7, i32 2)
1560 ret { <8 x half>, <8 x half> } %3
1563 define { <4 x half>, <4 x half> } @test_vld2_lane_f16(ptr, <4 x half>, <4 x half>) {
1564 ; CHECK-LABEL: test_vld2_lane_f16:
1565 ; CHECK: @ %bb.0: @ %entry
1566 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
1567 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
1568 ; CHECK-NEXT: vld2.16 {d0[3], d1[3]}, [r0]
1571 %3 = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2)
1572 ret { <4 x half>, <4 x half> } %3
1575 define { <8 x half>, <8 x half>, <8 x half> } @test_vld3q_lane_f16(ptr, <8 x half>, <8 x half>, <8 x half>) {
1576 ; CHECK-LABEL: test_vld3q_lane_f16:
1577 ; CHECK: @ %bb.0: @ %entry
1578 ; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1579 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1580 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1581 ; CHECK-NEXT: vld3.16 {d1[3], d3[3], d5[3]}, [r0]
1584 %4 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 7, i32 2)
1585 ret { <8 x half>, <8 x half>, <8 x half> } %4
1588 define { <4 x half>, <4 x half>, <4 x half> } @test_vld3_lane_f16(ptr, <4 x half>, <4 x half>, <4 x half>) {
1589 ; CHECK-LABEL: test_vld3_lane_f16:
1590 ; CHECK: @ %bb.0: @ %entry
1591 ; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1592 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1593 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1594 ; CHECK-NEXT: vld3.16 {d0[3], d1[3], d2[3]}, [r0]
1597 %4 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2)
1598 ret { <4 x half>, <4 x half>, <4 x half> } %4
1600 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @test_vld4lane_v8f16_p0i8(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1601 ; CHECK-LABEL: test_vld4lane_v8f16_p0i8:
1602 ; CHECK: @ %bb.0: @ %entry
1603 ; CHECK-NEXT: @ kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1604 ; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1605 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1606 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1607 ; CHECK-NEXT: vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0]
1610 %5 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 7, i32 2)
1611 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %5
1613 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @test_vld4lane_v4f16_p0i8(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1614 ; CHECK-LABEL: test_vld4lane_v4f16_p0i8:
1615 ; CHECK: @ %bb.0: @ %entry
1616 ; CHECK-NEXT: @ kill: def $d3 killed $d3 killed $q0_q1 def $q0_q1
1617 ; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1618 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1619 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1620 ; CHECK-NEXT: vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0]
1623 %5 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 3, i32 2)
1624 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %5
1626 define void @test_vst2lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>) {
1627 ; CHECK-LABEL: test_vst2lane_p0i8_v8f16:
1628 ; CHECK: @ %bb.0: @ %entry
1629 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1630 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1631 ; CHECK-NEXT: vst2.16 {d0[0], d2[0]}, [r0]
1634 tail call void @llvm.arm.neon.vst2lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, i32 0, i32 1)
1637 define void @test_vst2lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>) {
1638 ; CHECK-LABEL: test_vst2lane_p0i8_v4f16:
1639 ; CHECK: @ %bb.0: @ %entry
1640 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0
1641 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0
1642 ; CHECK-NEXT: vst2.16 {d0[0], d1[0]}, [r0:32]
1645 tail call void @llvm.arm.neon.vst2lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, i32 0, i32 0)
1648 define void @test_vst3lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>, <8 x half>) {
1649 ; CHECK-LABEL: test_vst3lane_p0i8_v8f16:
1650 ; CHECK: @ %bb.0: @ %entry
1651 ; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1652 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1653 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1654 ; CHECK-NEXT: vst3.16 {d0[0], d2[0], d4[0]}, [r0]
1657 tail call void @llvm.arm.neon.vst3lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 0, i32 0)
1660 define void @test_vst3lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>, <4 x half>) {
1661 ; CHECK-LABEL: test_vst3lane_p0i8_v4f16:
1662 ; CHECK: @ %bb.0: @ %entry
1663 ; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1664 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1665 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1666 ; CHECK-NEXT: vst3.16 {d0[0], d1[0], d2[0]}, [r0]
1669 tail call void @llvm.arm.neon.vst3lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 0, i32 0)
1672 define void @test_vst4lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1673 ; CHECK-LABEL: test_vst4lane_p0i8_v8f16:
1674 ; CHECK: @ %bb.0: @ %entry
1675 ; CHECK-NEXT: @ kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1676 ; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1677 ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1678 ; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1679 ; CHECK-NEXT: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0:64]
1682 tail call void @llvm.arm.neon.vst4lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 0, i32 0)
1685 define void @test_vst4lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1686 ; CHECK-LABEL: test_vst4lane_p0i8_v4f16:
1687 ; CHECK: @ %bb.0: @ %entry
1688 ; CHECK-NEXT: @ kill: def $d3 killed $d3 killed $q0_q1 def $q0_q1
1689 ; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1690 ; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1691 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1692 ; CHECK-NEXT: vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0:64]
1695 tail call void @llvm.arm.neon.vst4lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 0, i32 0)