2 ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3 ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
6 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
8 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
10 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
12 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
15 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
19 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
23 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
28 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
31 ; TODO: we can't pass half-precision arguments as "half" types yet. We do
32 ; that for the time being by passing "float %f.coerce" and the necessary
33 ; bitconverts/truncates. But when we can pass half types, we do want to use
36 define float @RetValBug(float %A.coerce) {
39 ; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
40 ; any operands) when FullFP16 is enabled.
42 ; CHECK-LABEL: RetValBug:
43 ; CHECK-HARDFP-FULLFP16: {{.*}} lr
49 define float @Add(float %a.coerce, float %b.coerce) {
51 %0 = bitcast float %a.coerce to i32
52 %tmp.0.extract.trunc = trunc i32 %0 to i16
53 %1 = bitcast i16 %tmp.0.extract.trunc to half
54 %2 = bitcast float %b.coerce to i32
55 %tmp1.0.extract.trunc = trunc i32 %2 to i16
56 %3 = bitcast i16 %tmp1.0.extract.trunc to half
57 %add = fadd half %1, %3
58 %4 = bitcast half %add to i16
59 %tmp4.0.insert.ext = zext i16 %4 to i32
60 %5 = bitcast i32 %tmp4.0.insert.ext to float
65 ; CHECK-SOFT: bl __aeabi_h2f
66 ; CHECK-SOFT: bl __aeabi_h2f
67 ; CHECK-SOFT: bl __aeabi_fadd
68 ; CHECK-SOFT: bl __aeabi_f2h
70 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
71 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
72 ; CHECK-SOFTFP-VFP3: vadd.f32
73 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
75 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
76 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
77 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
78 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
79 ; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
80 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
81 ; CHECK-SOFTFP-FP16: vmov r0, s0
83 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
84 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
85 ; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
86 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
88 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
89 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
90 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
91 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
92 ; CHECK-HARDFP-VFP3: vadd.f32
93 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
94 ; CHECK-HARDFP-VFP3: vmov s0, r0
96 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
97 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
98 ; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
99 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
101 ; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
105 define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
107 %0 = bitcast float %F.coerce to i32
108 %tmp.0.extract.trunc = trunc i32 %0 to i16
109 %1 = bitcast i16 %tmp.0.extract.trunc to half
110 %2 = bitcast float %G.coerce to i32
111 %tmp1.0.extract.trunc = trunc i32 %2 to i16
112 %3 = bitcast i16 %tmp1.0.extract.trunc to half
113 %cmp = fcmp une half %1, %3
116 ; CHECK-LABEL: VCMP1:
118 ; CHECK-SOFT: bl __aeabi_fcmpeq
120 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
121 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
122 ; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}}
124 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
125 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
126 ; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}}
128 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
129 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
130 ; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]]
132 ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
133 ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
134 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1
138 define zeroext i1 @VCMP2(float %F.coerce) {
140 %0 = bitcast float %F.coerce to i32
141 %tmp.0.extract.trunc = trunc i32 %0 to i16
142 %1 = bitcast i16 %tmp.0.extract.trunc to half
143 %cmp = fcmp une half %1, 0.000000e+00
146 ; CHECK-LABEL: VCMP2:
148 ; CHECK-SOFT: bl __aeabi_fcmpeq
149 ; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0
150 ; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0
151 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0
155 define i32 @VCMPE1(float %F.coerce) {
157 %0 = bitcast float %F.coerce to i32
158 %tmp.0.extract.trunc = trunc i32 %0 to i16
159 %1 = bitcast i16 %tmp.0.extract.trunc to half
160 %tmp = fcmp olt half %1, 0.000000e+00
161 %tmp1 = zext i1 %tmp to i32
164 ; CHECK-LABEL: VCMPE1:
166 ; CHECK-SOFT: bl __aeabi_fcmplt
167 ; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0
168 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0
169 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0
172 define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
174 %0 = bitcast float %F.coerce to i32
175 %tmp.0.extract.trunc = trunc i32 %0 to i16
176 %1 = bitcast i16 %tmp.0.extract.trunc to half
177 %2 = bitcast float %G.coerce to i32
178 %tmp.1.extract.trunc = trunc i32 %2 to i16
179 %3 = bitcast i16 %tmp.1.extract.trunc to half
180 %tmp = fcmp olt half %1, %3
181 %tmp1 = zext i1 %tmp to i32
184 ; CHECK-LABEL: VCMPE2:
186 ; CHECK-SOFT: bl __aeabi_fcmplt
187 ; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
188 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
189 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
192 ; Test lowering of BR_CC
193 define hidden i32 @VCMPBRCC() {
195 %f = alloca half, align 2
199 %0 = load half, half* %f, align 2
200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
201 br i1 %cmp, label %for.body, label %for.end
209 ; CHECK-LABEL: VCMPBRCC:
211 ; CHECK-SOFT: bl __aeabi_fcmpgt
212 ; CHECK-SOFT: cmp r0, #0
214 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
215 ; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0
216 ; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr
218 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
219 ; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr
222 ; 5. VCVT (between floating-point and fixed-point)
223 ; Only assembly/disassembly support
225 ; 6. VCVT (between floating-point and integer, both directions)
226 define i32 @fptosi(i32 %A.coerce) {
228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
229 %0 = bitcast i16 %tmp.0.extract.trunc to half
230 %conv = fptosi half %0 to i32
233 ; CHECK-LABEL: fptosi:
235 ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
236 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
237 ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
240 define i32 @fptoui(i32 %A.coerce) {
242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
243 %0 = bitcast i16 %tmp.0.extract.trunc to half
244 %conv = fptoui half %0 to i32
247 ; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
248 ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
251 define float @UintToH(i32 %a, i32 %b) {
253 %0 = uitofp i32 %a to half
254 %1 = bitcast half %0 to i16
255 %tmp0.insert.ext = zext i16 %1 to i32
256 %2 = bitcast i32 %tmp0.insert.ext to float
259 ; CHECK-LABEL: UintToH:
261 ; CHECK-HARDFP-FULLFP16: vmov s0, r0
262 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
265 define float @SintToH(i32 %a, i32 %b) {
267 %0 = sitofp i32 %a to half
268 %1 = bitcast half %0 to i16
269 %tmp0.insert.ext = zext i16 %1 to i32
270 %2 = bitcast i32 %tmp0.insert.ext to float
273 ; CHECK-LABEL: SintToH:
275 ; CHECK-HARDFP-FULLFP16: vmov s0, r0
276 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
279 define i32 @f2h(float %f) {
281 %conv = fptrunc float %f to half
282 %0 = bitcast half %conv to i16
283 %tmp.0.insert.ext = zext i16 %0 to i32
284 ret i32 %tmp.0.insert.ext
287 ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0
290 define float @h2f(i32 %h.coerce) {
292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
293 %0 = bitcast i16 %tmp.0.extract.trunc to half
294 %conv = fpext half %0 to float
298 ; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0
302 define double @h2d(i32 %h.coerce) {
304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
305 %0 = bitcast i16 %tmp.0.extract.trunc to half
306 %conv = fpext half %0 to double
310 ; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}}
313 define i32 @d2h(double %d) {
315 %conv = fptrunc double %d to half
316 %0 = bitcast half %conv to i16
317 %tmp.0.insert.ext = zext i16 %0 to i32
318 ret i32 %tmp.0.insert.ext
321 ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}}
332 define float @Div(float %a.coerce, float %b.coerce) {
334 %0 = bitcast float %a.coerce to i32
335 %tmp.0.extract.trunc = trunc i32 %0 to i16
336 %1 = bitcast i16 %tmp.0.extract.trunc to half
337 %2 = bitcast float %b.coerce to i32
338 %tmp1.0.extract.trunc = trunc i32 %2 to i16
339 %3 = bitcast i16 %tmp1.0.extract.trunc to half
340 %add = fdiv half %1, %3
341 %4 = bitcast half %add to i16
342 %tmp4.0.insert.ext = zext i16 %4 to i32
343 %5 = bitcast i32 %tmp4.0.insert.ext to float
348 ; CHECK-SOFT: bl __aeabi_h2f
349 ; CHECK-SOFT: bl __aeabi_h2f
350 ; CHECK-SOFT: bl __aeabi_fdiv
351 ; CHECK-SOFT: bl __aeabi_f2h
353 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
354 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
355 ; CHECK-SOFTFP-VFP3: vdiv.f32
356 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
358 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
359 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
360 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
361 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
362 ; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
363 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
364 ; CHECK-SOFTFP-FP16: vmov r0, s0
366 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
367 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
368 ; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
369 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
371 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
372 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
373 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
374 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
375 ; CHECK-HARDFP-VFP3: vdiv.f32
376 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
377 ; CHECK-HARDFP-VFP3: vmov s0, r0
379 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
380 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
381 ; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
382 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
384 ; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
388 define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
390 %0 = bitcast float %a.coerce to i32
391 %tmp.0.extract.trunc = trunc i32 %0 to i16
392 %1 = bitcast i16 %tmp.0.extract.trunc to half
393 %2 = bitcast float %b.coerce to i32
394 %tmp1.0.extract.trunc = trunc i32 %2 to i16
395 %3 = bitcast i16 %tmp1.0.extract.trunc to half
396 %4 = bitcast float %c.coerce to i32
397 %tmp2.0.extract.trunc = trunc i32 %4 to i16
398 %5 = bitcast i16 %tmp2.0.extract.trunc to half
399 %mul = fmul half %1, %3
400 %add = fadd half %mul, %5
401 %6 = bitcast half %add to i16
402 %tmp4.0.insert.ext = zext i16 %6 to i32
403 %7 = bitcast i32 %tmp4.0.insert.ext to float
407 ; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
408 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
412 define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
414 %0 = bitcast float %a.coerce to i32
415 %tmp.0.extract.trunc = trunc i32 %0 to i16
416 %1 = bitcast i16 %tmp.0.extract.trunc to half
417 %2 = bitcast float %b.coerce to i32
418 %tmp1.0.extract.trunc = trunc i32 %2 to i16
419 %3 = bitcast i16 %tmp1.0.extract.trunc to half
420 %4 = bitcast float %c.coerce to i32
421 %tmp2.0.extract.trunc = trunc i32 %4 to i16
422 %5 = bitcast i16 %tmp2.0.extract.trunc to half
423 %mul = fmul half %1, %3
424 %sub = fsub half %5, %mul
425 %6 = bitcast half %sub to i16
426 %tmp4.0.insert.ext = zext i16 %6 to i32
427 %7 = bitcast i32 %tmp4.0.insert.ext to float
431 ; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
432 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
436 define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
438 %0 = bitcast float %a.coerce to i32
439 %tmp.0.extract.trunc = trunc i32 %0 to i16
440 %1 = bitcast i16 %tmp.0.extract.trunc to half
441 %2 = bitcast float %b.coerce to i32
442 %tmp1.0.extract.trunc = trunc i32 %2 to i16
443 %3 = bitcast i16 %tmp1.0.extract.trunc to half
444 %4 = bitcast float %c.coerce to i32
445 %tmp2.0.extract.trunc = trunc i32 %4 to i16
446 %5 = bitcast i16 %tmp2.0.extract.trunc to half
447 %mul = fmul half %1, %3
448 %sub = fsub half -0.0, %mul
449 %sub2 = fsub half %sub, %5
450 %6 = bitcast half %sub2 to i16
451 %tmp4.0.insert.ext = zext i16 %6 to i32
452 %7 = bitcast i32 %tmp4.0.insert.ext to float
455 ; CHECK-LABEL: VFNMA:
456 ; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
457 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
461 define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
463 %0 = bitcast float %a.coerce to i32
464 %tmp.0.extract.trunc = trunc i32 %0 to i16
465 %1 = bitcast i16 %tmp.0.extract.trunc to half
466 %2 = bitcast float %b.coerce to i32
467 %tmp1.0.extract.trunc = trunc i32 %2 to i16
468 %3 = bitcast i16 %tmp1.0.extract.trunc to half
469 %4 = bitcast float %c.coerce to i32
470 %tmp2.0.extract.trunc = trunc i32 %4 to i16
471 %5 = bitcast i16 %tmp2.0.extract.trunc to half
472 %mul = fmul half %1, %3
473 %sub2 = fsub half %mul, %5
474 %6 = bitcast half %sub2 to i16
475 %tmp4.0.insert.ext = zext i16 %6 to i32
476 %7 = bitcast i32 %tmp4.0.insert.ext to float
479 ; CHECK-LABEL: VFNMS:
480 ; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
481 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
486 ; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
489 define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
491 %0 = bitcast float %a.coerce to i32
492 %tmp.0.extract.trunc = trunc i32 %0 to i16
493 %1 = bitcast i16 %tmp.0.extract.trunc to half
494 %2 = bitcast float %b.coerce to i32
495 %tmp1.0.extract.trunc = trunc i32 %2 to i16
496 %3 = bitcast i16 %tmp1.0.extract.trunc to half
497 %4 = bitcast float %c.coerce to i32
498 %tmp2.0.extract.trunc = trunc i32 %4 to i16
499 %5 = bitcast i16 %tmp2.0.extract.trunc to half
500 %mul = fmul half %1, %3
501 %add = fadd half %5, %mul
502 %6 = bitcast half %add to i16
503 %tmp4.0.insert.ext = zext i16 %6 to i32
504 %7 = bitcast i32 %tmp4.0.insert.ext to float
508 ; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
509 ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
513 define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
515 %0 = bitcast float %a.coerce to i32
516 %tmp.0.extract.trunc = trunc i32 %0 to i16
517 %1 = bitcast i16 %tmp.0.extract.trunc to half
518 %2 = bitcast float %b.coerce to i32
519 %tmp1.0.extract.trunc = trunc i32 %2 to i16
520 %3 = bitcast i16 %tmp1.0.extract.trunc to half
521 %4 = bitcast float %c.coerce to i32
522 %tmp2.0.extract.trunc = trunc i32 %4 to i16
523 %5 = bitcast i16 %tmp2.0.extract.trunc to half
524 %mul = fmul half %1, %3
525 %add = fsub half %5, %mul
526 %6 = bitcast half %add to i16
527 %tmp4.0.insert.ext = zext i16 %6 to i32
528 %7 = bitcast i32 %tmp4.0.insert.ext to float
532 ; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
533 ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
536 ; TODO: fix immediates.
537 ; 21. VMOV (between general-purpose register and half-precision register)
539 ; 22. VMOV (immediate)
540 define i32 @movi(i32 %a.coerce) {
542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
543 %0 = bitcast i16 %tmp.0.extract.trunc to half
544 %add = fadd half %0, 0xHC000
545 %1 = bitcast half %add to i16
546 %tmp2.0.insert.ext = zext i16 %1 to i32
547 ret i32 %tmp2.0.insert.ext
550 ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00
554 define float @Mul(float %a.coerce, float %b.coerce) {
556 %0 = bitcast float %a.coerce to i32
557 %tmp.0.extract.trunc = trunc i32 %0 to i16
558 %1 = bitcast i16 %tmp.0.extract.trunc to half
559 %2 = bitcast float %b.coerce to i32
560 %tmp1.0.extract.trunc = trunc i32 %2 to i16
561 %3 = bitcast i16 %tmp1.0.extract.trunc to half
562 %add = fmul half %1, %3
563 %4 = bitcast half %add to i16
564 %tmp4.0.insert.ext = zext i16 %4 to i32
565 %5 = bitcast i32 %tmp4.0.insert.ext to float
570 ; CHECK-SOFT: bl __aeabi_h2f
571 ; CHECK-SOFT: bl __aeabi_h2f
572 ; CHECK-SOFT: bl __aeabi_fmul
573 ; CHECK-SOFT: bl __aeabi_f2h
575 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
576 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
577 ; CHECK-SOFTFP-VFP3: vmul.f32
578 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
580 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
581 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
582 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
583 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
584 ; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
585 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
586 ; CHECK-SOFTFP-FP16: vmov r0, s0
588 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
589 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
590 ; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
591 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
593 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
594 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
595 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
596 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
597 ; CHECK-HARDFP-VFP3: vmul.f32
598 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
599 ; CHECK-HARDFP-VFP3: vmov s0, r0
601 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
602 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
603 ; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
604 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
606 ; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
610 define float @Neg(float %a.coerce) {
612 %0 = bitcast float %a.coerce to i32
613 %tmp.0.extract.trunc = trunc i32 %0 to i16
614 %1 = bitcast i16 %tmp.0.extract.trunc to half
615 %2 = fsub half -0.000000e+00, %1
616 %3 = bitcast half %2 to i16
617 %tmp4.0.insert.ext = zext i16 %3 to i32
618 %4 = bitcast i32 %tmp4.0.insert.ext to float
622 ; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
626 define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
628 %0 = bitcast float %a.coerce to i32
629 %tmp.0.extract.trunc = trunc i32 %0 to i16
630 %1 = bitcast i16 %tmp.0.extract.trunc to half
631 %2 = bitcast float %b.coerce to i32
632 %tmp1.0.extract.trunc = trunc i32 %2 to i16
633 %3 = bitcast i16 %tmp1.0.extract.trunc to half
634 %4 = bitcast float %c.coerce to i32
635 %tmp2.0.extract.trunc = trunc i32 %4 to i16
636 %5 = bitcast i16 %tmp2.0.extract.trunc to half
637 %add = fmul half %1, %3
638 %add2 = fsub half -0.000000e+00, %add
639 %add3 = fsub half %add2, %5
640 %6 = bitcast half %add3 to i16
641 %tmp4.0.insert.ext = zext i16 %6 to i32
642 %7 = bitcast i32 %tmp4.0.insert.ext to float
645 ; CHECK-LABEL: VNMLA:
646 ; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
647 ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
651 define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
653 %0 = bitcast float %a.coerce to i32
654 %tmp.0.extract.trunc = trunc i32 %0 to i16
655 %1 = bitcast i16 %tmp.0.extract.trunc to half
656 %2 = bitcast float %b.coerce to i32
657 %tmp1.0.extract.trunc = trunc i32 %2 to i16
658 %3 = bitcast i16 %tmp1.0.extract.trunc to half
659 %4 = bitcast float %c.coerce to i32
660 %tmp2.0.extract.trunc = trunc i32 %4 to i16
661 %5 = bitcast i16 %tmp2.0.extract.trunc to half
662 %add = fmul half %1, %3
663 %add2 = fsub half %add, %5
664 %6 = bitcast half %add2 to i16
665 %tmp4.0.insert.ext = zext i16 %6 to i32
666 %7 = bitcast i32 %tmp4.0.insert.ext to float
669 ; CHECK-LABEL: VNMLS:
670 ; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
671 ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
675 define float @NMul(float %a.coerce, float %b.coerce) {
677 %0 = bitcast float %a.coerce to i32
678 %tmp.0.extract.trunc = trunc i32 %0 to i16
679 %1 = bitcast i16 %tmp.0.extract.trunc to half
680 %2 = bitcast float %b.coerce to i32
681 %tmp1.0.extract.trunc = trunc i32 %2 to i16
682 %3 = bitcast i16 %tmp1.0.extract.trunc to half
683 %add = fmul half %1, %3
684 %add2 = fsub half -0.0, %add
685 %4 = bitcast half %add2 to i16
686 %tmp4.0.insert.ext = zext i16 %4 to i32
687 %5 = bitcast i32 %tmp4.0.insert.ext to float
691 ; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
704 define half @select_cc1(half* %a0) {
705 %1 = load half, half* %a0
706 %2 = fcmp nsz oeq half %1, 0xH0001
707 %3 = select i1 %2, half 0xHC000, half 0xH0002
710 ; CHECK-LABEL: select_cc1:
712 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
713 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
714 ; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
716 ; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
717 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
718 ; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
720 ; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
721 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
722 ; CHECK-SOFTFP-FP16-T32: it eq
723 ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
726 ; FIXME: more tests need to be added for VSELGE and VSELGT.
727 ; That is, more combinations of immediate operands that can or can't
728 ; be encoded as an FP16 immediate need to be added here.
731 define half @select_cc_ge1(half* %a0) {
732 %1 = load half, half* %a0
733 %2 = fcmp nsz oge half %1, 0xH0001
734 %3 = select i1 %2, half 0xHC000, half 0xH0002
737 ; CHECK-LABEL: select_cc_ge1:
739 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
740 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
741 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
743 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
744 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
745 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
747 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
748 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
749 ; CHECK-SOFTFP-FP16-T32-NEXT: it ge
750 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
753 define half @select_cc_ge2(half* %a0) {
754 %1 = load half, half* %a0
755 %2 = fcmp nsz ole half %1, 0xH0001
756 %3 = select i1 %2, half 0xHC000, half 0xH0002
759 ; CHECK-LABEL: select_cc_ge2:
761 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
762 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
763 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
765 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
766 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
767 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}}
769 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
770 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
771 ; CHECK-SOFTFP-FP16-T32-NEXT: it ls
772 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}}
775 define half @select_cc_ge3(half* %a0) {
776 %1 = load half, half* %a0
777 %2 = fcmp nsz ugt half %1, 0xH0001
778 %3 = select i1 %2, half 0xHC000, half 0xH0002
781 ; CHECK-LABEL: select_cc_ge3:
783 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
784 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
785 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
787 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
788 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
789 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
791 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
792 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
793 ; CHECK-SOFTFP-FP16-T32-NEXT: it hi
794 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
797 define half @select_cc_ge4(half* %a0) {
798 %1 = load half, half* %a0
799 %2 = fcmp nsz ult half %1, 0xH0001
800 %3 = select i1 %2, half 0xHC000, half 0xH0002
803 ; CHECK-LABEL: select_cc_ge4:
805 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
806 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
807 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
809 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
810 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
811 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
813 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
814 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
815 ; CHECK-SOFTFP-FP16-T32-NEXT: it lt
816 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
820 define half @select_cc_gt1(half* %a0) {
821 %1 = load half, half* %a0
822 %2 = fcmp nsz ogt half %1, 0xH0001
823 %3 = select i1 %2, half 0xHC000, half 0xH0002
826 ; CHECK-LABEL: select_cc_gt1:
828 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
829 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
830 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
832 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
833 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
834 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
836 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
837 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
838 ; CHECK-SOFTFP-FP16-T32-NEXT: it gt
839 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
842 define half @select_cc_gt2(half* %a0) {
843 %1 = load half, half* %a0
844 %2 = fcmp nsz uge half %1, 0xH0001
845 %3 = select i1 %2, half 0xHC000, half 0xH0002
848 ; CHECK-LABEL: select_cc_gt2:
850 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
851 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
852 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
854 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
855 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
856 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
858 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
859 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
860 ; CHECK-SOFTFP-FP16-T32-NEXT: it pl
861 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
864 define half @select_cc_gt3(half* %a0) {
865 %1 = load half, half* %a0
866 %2 = fcmp nsz ule half %1, 0xH0001
867 %3 = select i1 %2, half 0xHC000, half 0xH0002
870 ; CHECK-LABEL: select_cc_gt3:
872 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
873 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
874 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
876 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
877 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
878 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}}
880 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
881 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
882 ; CHECK-SOFTFP-FP16-T32-NEXT: it le
883 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}}
886 define half @select_cc_gt4(half* %a0) {
887 %1 = load half, half* %a0
888 %2 = fcmp nsz olt half %1, 0xH0001
889 %3 = select i1 %2, half 0xHC000, half 0xH0002
892 ; CHECK-LABEL: select_cc_gt4:
894 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
895 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
896 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
898 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
899 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
900 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
902 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
903 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
904 ; CHECK-SOFTFP-FP16-T32-NEXT: it mi
905 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
909 define float @select_cc4(float %a.coerce) {
911 %0 = bitcast float %a.coerce to i32
912 %tmp.0.extract.trunc = trunc i32 %0 to i16
913 %1 = bitcast i16 %tmp.0.extract.trunc to half
915 %2 = fcmp nsz ueq half %1, 0xH0001
916 %3 = select i1 %2, half 0xHC000, half 0xH0002
918 %4 = bitcast half %3 to i16
919 %tmp4.0.insert.ext = zext i16 %4 to i32
920 %5 = bitcast i32 %tmp4.0.insert.ext to float
923 ; CHECK-LABEL: select_cc4:
925 ; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
926 ; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
927 ; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
928 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
929 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
930 ; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
931 ; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
933 ; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
934 ; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
935 ; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
936 ; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
937 ; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
938 ; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
939 ; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
940 ; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
941 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
942 ; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
944 ; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
945 ; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
946 ; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
947 ; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
948 ; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
949 ; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
950 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
951 ; CHECK-SOFTFP-FP16-T32: it eq
952 ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
953 ; CHECK-SOFTFP-FP16-T32: it vs
954 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
955 ; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
961 define float @Sub(float %a.coerce, float %b.coerce) {
963 %0 = bitcast float %a.coerce to i32
964 %tmp.0.extract.trunc = trunc i32 %0 to i16
965 %1 = bitcast i16 %tmp.0.extract.trunc to half
966 %2 = bitcast float %b.coerce to i32
967 %tmp1.0.extract.trunc = trunc i32 %2 to i16
968 %3 = bitcast i16 %tmp1.0.extract.trunc to half
969 %add = fsub half %1, %3
970 %4 = bitcast half %add to i16
971 %tmp4.0.insert.ext = zext i16 %4 to i32
972 %5 = bitcast i32 %tmp4.0.insert.ext to float
977 ; CHECK-SOFT: bl __aeabi_h2f
978 ; CHECK-SOFT: bl __aeabi_h2f
979 ; CHECK-SOFT: bl __aeabi_fsub
980 ; CHECK-SOFT: bl __aeabi_f2h
982 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
983 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
984 ; CHECK-SOFTFP-VFP3: vsub.f32
985 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
987 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
988 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
989 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
990 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
991 ; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
992 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
993 ; CHECK-SOFTFP-FP16: vmov r0, s0
995 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
996 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
997 ; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
998 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
1000 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
1001 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
1002 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
1003 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
1004 ; CHECK-HARDFP-VFP3: vsub.f32
1005 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
1006 ; CHECK-HARDFP-VFP3: vmov s0, r0
1008 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
1009 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
1010 ; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
1011 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
1013 ; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
1016 ; Check for VSTRH with a FCONSTH, this checks that addressing mode
1017 ; AddrMode5FP16 is supported.
1018 define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
1020 %S = alloca half, align 2
1021 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
1022 %0 = bitcast i16 %tmp.0.extract.trunc to half
1023 %S.0.S.0..sroa_cast = bitcast half* %S to i8*
1024 store volatile half 0xH3C00, half* %S, align 2
1025 %S.0.S.0. = load volatile half, half* %S, align 2
1026 %add = fadd half %S.0.S.0., %0
1027 %1 = bitcast half %add to i16
1028 %tmp2.0.insert.ext = zext i16 %1 to i32
1029 ret i32 %tmp2.0.insert.ext
1031 ; CHECK-LABEL: ThumbAddrMode5FP16
1033 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00
1034 ; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}]
1035 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0
1036 ; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}]
1037 ; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]]
1040 ; Test function calls to check store/load reg to/from stack
1043 %coerce = alloca half, align 2
1044 %tmp2 = alloca i32, align 4
1045 store half 0xH7C00, half* %coerce, align 2
1046 %0 = load i32, i32* %tmp2, align 4
1047 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
1048 store half 0xH7C00, half* %coerce, align 2
1049 %1 = load i32, i32* %tmp2, align 4
1050 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
1053 ; CHECK-SPILL-RELOAD-LABEL: fn1:
1054 ; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill
1055 ; CHECK-SPILL-RELOAD: bl fn2
1056 ; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload
1059 declare dso_local i32 @fn2(...)
1060 declare dso_local i32 @fn3(...)