2 ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3 ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
6 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
8 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
10 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
12 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
15 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
19 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
23 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
28 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
31 ; TODO: we can't pass half-precision arguments as "half" types yet. We do
32 ; that for the time being by passing "float %f.coerce" and the necessary
33 ; bitconverts/truncates. But when we can pass half types, we do want to use
36 define float @RetValBug(float %A.coerce) {
39 ; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
40 ; any operands) when FullFP16 is enabled.
42 ; CHECK-LABEL: RetValBug:
43 ; CHECK-HARDFP-FULLFP16: {{.*}} lr
49 define float @Add(float %a.coerce, float %b.coerce) {
51 %0 = bitcast float %a.coerce to i32
52 %tmp.0.extract.trunc = trunc i32 %0 to i16
53 %1 = bitcast i16 %tmp.0.extract.trunc to half
54 %2 = bitcast float %b.coerce to i32
55 %tmp1.0.extract.trunc = trunc i32 %2 to i16
56 %3 = bitcast i16 %tmp1.0.extract.trunc to half
57 %add = fadd half %1, %3
58 %4 = bitcast half %add to i16
59 %tmp4.0.insert.ext = zext i16 %4 to i32
60 %5 = bitcast i32 %tmp4.0.insert.ext to float
65 ; CHECK-SOFT: bl __aeabi_h2f
66 ; CHECK-SOFT: bl __aeabi_h2f
67 ; CHECK-SOFT: bl __aeabi_fadd
68 ; CHECK-SOFT: bl __aeabi_f2h
70 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
71 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
72 ; CHECK-SOFTFP-VFP3: vadd.f32
73 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
75 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
76 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
77 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
78 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
79 ; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
80 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
81 ; CHECK-SOFTFP-FP16: vmov r0, s0
83 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
84 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
85 ; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
86 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
88 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
89 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
90 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
91 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
92 ; CHECK-HARDFP-VFP3: vadd.f32
93 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
94 ; CHECK-HARDFP-VFP3: vmov s0, r0
96 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
97 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
98 ; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
99 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
101 ; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
105 define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
107 %0 = bitcast float %F.coerce to i32
108 %tmp.0.extract.trunc = trunc i32 %0 to i16
109 %1 = bitcast i16 %tmp.0.extract.trunc to half
110 %2 = bitcast float %G.coerce to i32
111 %tmp1.0.extract.trunc = trunc i32 %2 to i16
112 %3 = bitcast i16 %tmp1.0.extract.trunc to half
113 %cmp = fcmp une half %1, %3
116 ; CHECK-LABEL: VCMP1:
118 ; CHECK-SOFT: bl __aeabi_fcmpeq
120 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
121 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
122 ; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}}
124 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
125 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
126 ; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}}
128 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
129 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
130 ; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]]
132 ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
133 ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
134 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1
138 define zeroext i1 @VCMP2(float %F.coerce) {
140 %0 = bitcast float %F.coerce to i32
141 %tmp.0.extract.trunc = trunc i32 %0 to i16
142 %1 = bitcast i16 %tmp.0.extract.trunc to half
143 %cmp = fcmp une half %1, 0.000000e+00
146 ; CHECK-LABEL: VCMP2:
148 ; CHECK-SOFT: bl __aeabi_fcmpeq
149 ; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0
150 ; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0
151 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0
155 define i32 @VCMPE1(float %F.coerce) {
157 %0 = bitcast float %F.coerce to i32
158 %tmp.0.extract.trunc = trunc i32 %0 to i16
159 %1 = bitcast i16 %tmp.0.extract.trunc to half
160 %tmp = fcmp olt half %1, 0.000000e+00
161 %tmp1 = zext i1 %tmp to i32
164 ; CHECK-LABEL: VCMPE1:
166 ; CHECK-SOFT: bl __aeabi_fcmplt
167 ; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0
168 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0
169 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0
172 define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
174 %0 = bitcast float %F.coerce to i32
175 %tmp.0.extract.trunc = trunc i32 %0 to i16
176 %1 = bitcast i16 %tmp.0.extract.trunc to half
177 %2 = bitcast float %G.coerce to i32
178 %tmp.1.extract.trunc = trunc i32 %2 to i16
179 %3 = bitcast i16 %tmp.1.extract.trunc to half
180 %tmp = fcmp olt half %1, %3
181 %tmp1 = zext i1 %tmp to i32
184 ; CHECK-LABEL: VCMPE2:
186 ; CHECK-SOFT: bl __aeabi_fcmplt
187 ; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
188 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
189 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
192 ; Test lowering of BR_CC
193 define hidden i32 @VCMPBRCC() {
195 %f = alloca half, align 2
199 %0 = load half, half* %f, align 2
200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
201 br i1 %cmp, label %for.body, label %for.end
209 ; CHECK-LABEL: VCMPBRCC:
211 ; CHECK-SOFT: bl __aeabi_fcmpgt
212 ; CHECK-SOFT: cmp r0, #0
214 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
215 ; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0
216 ; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr
218 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
219 ; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr
222 ; 5. VCVT (between floating-point and fixed-point)
223 ; Only assembly/disassembly support
225 ; 6. VCVT (between floating-point and integer, both directions)
226 define i32 @fptosi(i32 %A.coerce) {
228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
229 %0 = bitcast i16 %tmp.0.extract.trunc to half
230 %conv = fptosi half %0 to i32
233 ; CHECK-LABEL: fptosi:
235 ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
236 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
237 ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
240 define i32 @fptoui(i32 %A.coerce) {
242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
243 %0 = bitcast i16 %tmp.0.extract.trunc to half
244 %conv = fptoui half %0 to i32
247 ; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
248 ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
251 define float @UintToH(i32 %a, i32 %b) {
253 %0 = uitofp i32 %a to half
254 %1 = bitcast half %0 to i16
255 %tmp0.insert.ext = zext i16 %1 to i32
256 %2 = bitcast i32 %tmp0.insert.ext to float
259 ; CHECK-LABEL: UintToH:
261 ; CHECK-HARDFP-FULLFP16: vmov s0, r0
262 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
265 define float @SintToH(i32 %a, i32 %b) {
267 %0 = sitofp i32 %a to half
268 %1 = bitcast half %0 to i16
269 %tmp0.insert.ext = zext i16 %1 to i32
270 %2 = bitcast i32 %tmp0.insert.ext to float
273 ; CHECK-LABEL: SintToH:
275 ; CHECK-HARDFP-FULLFP16: vmov s0, r0
276 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
279 define i32 @f2h(float %f) {
281 %conv = fptrunc float %f to half
282 %0 = bitcast half %conv to i16
283 %tmp.0.insert.ext = zext i16 %0 to i32
284 ret i32 %tmp.0.insert.ext
287 ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0
290 define float @h2f(i32 %h.coerce) {
292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
293 %0 = bitcast i16 %tmp.0.extract.trunc to half
294 %conv = fpext half %0 to float
298 ; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0
302 define double @h2d(i32 %h.coerce) {
304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
305 %0 = bitcast i16 %tmp.0.extract.trunc to half
306 %conv = fpext half %0 to double
310 ; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}}
313 define i32 @d2h(double %d) {
315 %conv = fptrunc double %d to half
316 %0 = bitcast half %conv to i16
317 %tmp.0.insert.ext = zext i16 %0 to i32
318 ret i32 %tmp.0.insert.ext
321 ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}}
332 define float @Div(float %a.coerce, float %b.coerce) {
334 %0 = bitcast float %a.coerce to i32
335 %tmp.0.extract.trunc = trunc i32 %0 to i16
336 %1 = bitcast i16 %tmp.0.extract.trunc to half
337 %2 = bitcast float %b.coerce to i32
338 %tmp1.0.extract.trunc = trunc i32 %2 to i16
339 %3 = bitcast i16 %tmp1.0.extract.trunc to half
340 %add = fdiv half %1, %3
341 %4 = bitcast half %add to i16
342 %tmp4.0.insert.ext = zext i16 %4 to i32
343 %5 = bitcast i32 %tmp4.0.insert.ext to float
348 ; CHECK-SOFT: bl __aeabi_h2f
349 ; CHECK-SOFT: bl __aeabi_h2f
350 ; CHECK-SOFT: bl __aeabi_fdiv
351 ; CHECK-SOFT: bl __aeabi_f2h
353 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
354 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
355 ; CHECK-SOFTFP-VFP3: vdiv.f32
356 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
358 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
359 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
360 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
361 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
362 ; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
363 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
364 ; CHECK-SOFTFP-FP16: vmov r0, s0
366 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
367 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
368 ; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
369 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
371 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
372 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
373 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
374 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
375 ; CHECK-HARDFP-VFP3: vdiv.f32
376 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
377 ; CHECK-HARDFP-VFP3: vmov s0, r0
379 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
380 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
381 ; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
382 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
384 ; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
388 define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
390 %0 = bitcast float %a.coerce to i32
391 %tmp.0.extract.trunc = trunc i32 %0 to i16
392 %1 = bitcast i16 %tmp.0.extract.trunc to half
393 %2 = bitcast float %b.coerce to i32
394 %tmp1.0.extract.trunc = trunc i32 %2 to i16
395 %3 = bitcast i16 %tmp1.0.extract.trunc to half
396 %4 = bitcast float %c.coerce to i32
397 %tmp2.0.extract.trunc = trunc i32 %4 to i16
398 %5 = bitcast i16 %tmp2.0.extract.trunc to half
399 %mul = fmul half %1, %3
400 %add = fadd half %mul, %5
401 %6 = bitcast half %add to i16
402 %tmp4.0.insert.ext = zext i16 %6 to i32
403 %7 = bitcast i32 %tmp4.0.insert.ext to float
407 ; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
408 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
412 define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
414 %0 = bitcast float %a.coerce to i32
415 %tmp.0.extract.trunc = trunc i32 %0 to i16
416 %1 = bitcast i16 %tmp.0.extract.trunc to half
417 %2 = bitcast float %b.coerce to i32
418 %tmp1.0.extract.trunc = trunc i32 %2 to i16
419 %3 = bitcast i16 %tmp1.0.extract.trunc to half
420 %4 = bitcast float %c.coerce to i32
421 %tmp2.0.extract.trunc = trunc i32 %4 to i16
422 %5 = bitcast i16 %tmp2.0.extract.trunc to half
423 %mul = fmul half %1, %3
424 %sub = fsub half %5, %mul
425 %6 = bitcast half %sub to i16
426 %tmp4.0.insert.ext = zext i16 %6 to i32
427 %7 = bitcast i32 %tmp4.0.insert.ext to float
431 ; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
432 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
436 define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
438 %0 = bitcast float %a.coerce to i32
439 %tmp.0.extract.trunc = trunc i32 %0 to i16
440 %1 = bitcast i16 %tmp.0.extract.trunc to half
441 %2 = bitcast float %b.coerce to i32
442 %tmp1.0.extract.trunc = trunc i32 %2 to i16
443 %3 = bitcast i16 %tmp1.0.extract.trunc to half
444 %4 = bitcast float %c.coerce to i32
445 %tmp2.0.extract.trunc = trunc i32 %4 to i16
446 %5 = bitcast i16 %tmp2.0.extract.trunc to half
447 %mul = fmul half %1, %3
448 %sub = fsub half -0.0, %mul
449 %sub2 = fsub half %sub, %5
450 %6 = bitcast half %sub2 to i16
451 %tmp4.0.insert.ext = zext i16 %6 to i32
452 %7 = bitcast i32 %tmp4.0.insert.ext to float
455 ; CHECK-LABEL: VFNMA:
456 ; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
457 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
461 define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
463 %0 = bitcast float %a.coerce to i32
464 %tmp.0.extract.trunc = trunc i32 %0 to i16
465 %1 = bitcast i16 %tmp.0.extract.trunc to half
466 %2 = bitcast float %b.coerce to i32
467 %tmp1.0.extract.trunc = trunc i32 %2 to i16
468 %3 = bitcast i16 %tmp1.0.extract.trunc to half
469 %4 = bitcast float %c.coerce to i32
470 %tmp2.0.extract.trunc = trunc i32 %4 to i16
471 %5 = bitcast i16 %tmp2.0.extract.trunc to half
472 %mul = fmul half %1, %3
473 %sub2 = fsub half %mul, %5
474 %6 = bitcast half %sub2 to i16
475 %tmp4.0.insert.ext = zext i16 %6 to i32
476 %7 = bitcast i32 %tmp4.0.insert.ext to float
479 ; CHECK-LABEL: VFNMS:
480 ; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
481 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
486 ; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
489 define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
491 %0 = bitcast float %a.coerce to i32
492 %tmp.0.extract.trunc = trunc i32 %0 to i16
493 %1 = bitcast i16 %tmp.0.extract.trunc to half
494 %2 = bitcast float %b.coerce to i32
495 %tmp1.0.extract.trunc = trunc i32 %2 to i16
496 %3 = bitcast i16 %tmp1.0.extract.trunc to half
497 %4 = bitcast float %c.coerce to i32
498 %tmp2.0.extract.trunc = trunc i32 %4 to i16
499 %5 = bitcast i16 %tmp2.0.extract.trunc to half
500 %mul = fmul half %1, %3
501 %add = fadd half %5, %mul
502 %6 = bitcast half %add to i16
503 %tmp4.0.insert.ext = zext i16 %6 to i32
504 %7 = bitcast i32 %tmp4.0.insert.ext to float
508 ; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
509 ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
513 define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
515 %0 = bitcast float %a.coerce to i32
516 %tmp.0.extract.trunc = trunc i32 %0 to i16
517 %1 = bitcast i16 %tmp.0.extract.trunc to half
518 %2 = bitcast float %b.coerce to i32
519 %tmp1.0.extract.trunc = trunc i32 %2 to i16
520 %3 = bitcast i16 %tmp1.0.extract.trunc to half
521 %4 = bitcast float %c.coerce to i32
522 %tmp2.0.extract.trunc = trunc i32 %4 to i16
523 %5 = bitcast i16 %tmp2.0.extract.trunc to half
524 %mul = fmul half %1, %3
525 %add = fsub half %5, %mul
526 %6 = bitcast half %add to i16
527 %tmp4.0.insert.ext = zext i16 %6 to i32
528 %7 = bitcast i32 %tmp4.0.insert.ext to float
532 ; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
533 ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
536 ; TODO: fix immediates.
537 ; 21. VMOV (between general-purpose register and half-precision register)
539 ; 22. VMOV (immediate)
540 define i32 @movi(i32 %a.coerce) {
542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
543 %0 = bitcast i16 %tmp.0.extract.trunc to half
544 %add = fadd half %0, 0xHC000
545 %1 = bitcast half %add to i16
546 %tmp2.0.insert.ext = zext i16 %1 to i32
547 ret i32 %tmp2.0.insert.ext
550 ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00
554 define float @Mul(float %a.coerce, float %b.coerce) {
556 %0 = bitcast float %a.coerce to i32
557 %tmp.0.extract.trunc = trunc i32 %0 to i16
558 %1 = bitcast i16 %tmp.0.extract.trunc to half
559 %2 = bitcast float %b.coerce to i32
560 %tmp1.0.extract.trunc = trunc i32 %2 to i16
561 %3 = bitcast i16 %tmp1.0.extract.trunc to half
562 %add = fmul half %1, %3
563 %4 = bitcast half %add to i16
564 %tmp4.0.insert.ext = zext i16 %4 to i32
565 %5 = bitcast i32 %tmp4.0.insert.ext to float
570 ; CHECK-SOFT: bl __aeabi_h2f
571 ; CHECK-SOFT: bl __aeabi_h2f
572 ; CHECK-SOFT: bl __aeabi_fmul
573 ; CHECK-SOFT: bl __aeabi_f2h
575 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
576 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
577 ; CHECK-SOFTFP-VFP3: vmul.f32
578 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
580 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
581 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
582 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
583 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
584 ; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
585 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
586 ; CHECK-SOFTFP-FP16: vmov r0, s0
588 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
589 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
590 ; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
591 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
593 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
594 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
595 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
596 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
597 ; CHECK-HARDFP-VFP3: vmul.f32
598 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
599 ; CHECK-HARDFP-VFP3: vmov s0, r0
601 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
602 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
603 ; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
604 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
606 ; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
610 define float @Neg(float %a.coerce) {
612 %0 = bitcast float %a.coerce to i32
613 %tmp.0.extract.trunc = trunc i32 %0 to i16
614 %1 = bitcast i16 %tmp.0.extract.trunc to half
615 %2 = fsub half -0.000000e+00, %1
616 %3 = bitcast half %2 to i16
617 %tmp4.0.insert.ext = zext i16 %3 to i32
618 %4 = bitcast i32 %tmp4.0.insert.ext to float
622 ; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
626 define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
628 %0 = bitcast float %a.coerce to i32
629 %tmp.0.extract.trunc = trunc i32 %0 to i16
630 %1 = bitcast i16 %tmp.0.extract.trunc to half
631 %2 = bitcast float %b.coerce to i32
632 %tmp1.0.extract.trunc = trunc i32 %2 to i16
633 %3 = bitcast i16 %tmp1.0.extract.trunc to half
634 %4 = bitcast float %c.coerce to i32
635 %tmp2.0.extract.trunc = trunc i32 %4 to i16
636 %5 = bitcast i16 %tmp2.0.extract.trunc to half
637 %add = fmul half %1, %3
638 %add2 = fsub half -0.000000e+00, %add
639 %add3 = fsub half %add2, %5
640 %6 = bitcast half %add3 to i16
641 %tmp4.0.insert.ext = zext i16 %6 to i32
642 %7 = bitcast i32 %tmp4.0.insert.ext to float
645 ; CHECK-LABEL: VNMLA:
646 ; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
647 ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
651 define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
653 %0 = bitcast float %a.coerce to i32
654 %tmp.0.extract.trunc = trunc i32 %0 to i16
655 %1 = bitcast i16 %tmp.0.extract.trunc to half
656 %2 = bitcast float %b.coerce to i32
657 %tmp1.0.extract.trunc = trunc i32 %2 to i16
658 %3 = bitcast i16 %tmp1.0.extract.trunc to half
659 %4 = bitcast float %c.coerce to i32
660 %tmp2.0.extract.trunc = trunc i32 %4 to i16
661 %5 = bitcast i16 %tmp2.0.extract.trunc to half
662 %add = fmul half %1, %3
663 %add2 = fsub half %add, %5
664 %6 = bitcast half %add2 to i16
665 %tmp4.0.insert.ext = zext i16 %6 to i32
666 %7 = bitcast i32 %tmp4.0.insert.ext to float
669 ; CHECK-LABEL: VNMLS:
670 ; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
671 ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
675 define float @NMul(float %a.coerce, float %b.coerce) {
677 %0 = bitcast float %a.coerce to i32
678 %tmp.0.extract.trunc = trunc i32 %0 to i16
679 %1 = bitcast i16 %tmp.0.extract.trunc to half
680 %2 = bitcast float %b.coerce to i32
681 %tmp1.0.extract.trunc = trunc i32 %2 to i16
682 %3 = bitcast i16 %tmp1.0.extract.trunc to half
683 %add = fmul half %1, %3
684 %add2 = fsub half -0.0, %add
685 %4 = bitcast half %add2 to i16
686 %tmp4.0.insert.ext = zext i16 %4 to i32
687 %5 = bitcast i32 %tmp4.0.insert.ext to float
691 ; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
704 define half @select_cc1() {
705 %1 = fcmp nsz oeq half undef, 0xH0001
706 %2 = select i1 %1, half 0xHC000, half 0xH0002
709 ; CHECK-LABEL: select_cc1:
711 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0
712 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
713 ; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
715 ; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0
716 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
717 ; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
719 ; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0
720 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
721 ; CHECK-SOFTFP-FP16-T32: it eq
722 ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
725 ; FIXME: more tests need to be added for VSELGE and VSELGT.
726 ; That is, more combinations of immediate operands that can or can't
727 ; be encoded as an FP16 immediate need to be added here.
730 define half @select_cc_ge1() {
731 %1 = fcmp nsz oge half undef, 0xH0001
732 %2 = select i1 %1, half 0xHC000, half 0xH0002
735 ; CHECK-LABEL: select_cc_ge1:
737 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
738 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
739 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
741 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
742 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
743 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
745 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
746 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
747 ; CHECK-SOFTFP-FP16-T32-NEXT: it ge
748 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
751 define half @select_cc_ge2() {
752 %1 = fcmp nsz ole half undef, 0xH0001
753 %2 = select i1 %1, half 0xHC000, half 0xH0002
756 ; CHECK-LABEL: select_cc_ge2:
758 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
759 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
760 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
762 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
763 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
764 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}}
766 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
767 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
768 ; CHECK-SOFTFP-FP16-T32-NEXT: it ls
769 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}}
772 define half @select_cc_ge3() {
773 %1 = fcmp nsz ugt half undef, 0xH0001
774 %2 = select i1 %1, half 0xHC000, half 0xH0002
777 ; CHECK-LABEL: select_cc_ge3:
779 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
780 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
781 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
783 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
784 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
785 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
787 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
788 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
789 ; CHECK-SOFTFP-FP16-T32-NEXT: it hi
790 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
793 define half @select_cc_ge4() {
794 %1 = fcmp nsz ult half undef, 0xH0001
795 %2 = select i1 %1, half 0xHC000, half 0xH0002
798 ; CHECK-LABEL: select_cc_ge4:
800 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
801 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
802 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
804 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
805 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
806 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
808 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
809 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
810 ; CHECK-SOFTFP-FP16-T32-NEXT: it lt
811 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
815 define half @select_cc_gt1() {
816 %1 = fcmp nsz ogt half undef, 0xH0001
817 %2 = select i1 %1, half 0xHC000, half 0xH0002
820 ; CHECK-LABEL: select_cc_gt1:
822 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
823 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
824 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
826 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
827 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
828 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
830 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
831 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
832 ; CHECK-SOFTFP-FP16-T32-NEXT: it gt
833 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
836 define half @select_cc_gt2() {
837 %1 = fcmp nsz uge half undef, 0xH0001
838 %2 = select i1 %1, half 0xHC000, half 0xH0002
841 ; CHECK-LABEL: select_cc_gt2:
843 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
844 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
845 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
847 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
848 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
849 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
851 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
852 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
853 ; CHECK-SOFTFP-FP16-T32-NEXT: it pl
854 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
857 define half @select_cc_gt3() {
858 %1 = fcmp nsz ule half undef, 0xH0001
859 %2 = select i1 %1, half 0xHC000, half 0xH0002
862 ; CHECK-LABEL: select_cc_gt3:
864 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
865 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
866 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
868 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
869 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
870 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}}
872 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
873 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
874 ; CHECK-SOFTFP-FP16-T32-NEXT: it le
875 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}}
878 define half @select_cc_gt4() {
879 %1 = fcmp nsz olt half undef, 0xH0001
880 %2 = select i1 %1, half 0xHC000, half 0xH0002
883 ; CHECK-LABEL: select_cc_gt4:
885 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
886 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
887 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
889 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
890 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
891 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
893 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
894 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
895 ; CHECK-SOFTFP-FP16-T32-NEXT: it mi
896 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
900 define float @select_cc4(float %a.coerce) {
902 %0 = bitcast float %a.coerce to i32
903 %tmp.0.extract.trunc = trunc i32 %0 to i16
904 %1 = bitcast i16 %tmp.0.extract.trunc to half
906 %2 = fcmp nsz ueq half %1, 0xH0001
907 %3 = select i1 %2, half 0xHC000, half 0xH0002
909 %4 = bitcast half %3 to i16
910 %tmp4.0.insert.ext = zext i16 %4 to i32
911 %5 = bitcast i32 %tmp4.0.insert.ext to float
914 ; CHECK-LABEL: select_cc4:
916 ; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
917 ; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
918 ; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
919 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
920 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
921 ; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
922 ; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
924 ; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
925 ; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
926 ; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
927 ; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
928 ; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
929 ; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
930 ; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
931 ; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
932 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
933 ; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
935 ; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
936 ; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
937 ; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
938 ; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
939 ; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
940 ; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
941 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
942 ; CHECK-SOFTFP-FP16-T32: it eq
943 ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
944 ; CHECK-SOFTFP-FP16-T32: it vs
945 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
946 ; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
952 define float @Sub(float %a.coerce, float %b.coerce) {
954 %0 = bitcast float %a.coerce to i32
955 %tmp.0.extract.trunc = trunc i32 %0 to i16
956 %1 = bitcast i16 %tmp.0.extract.trunc to half
957 %2 = bitcast float %b.coerce to i32
958 %tmp1.0.extract.trunc = trunc i32 %2 to i16
959 %3 = bitcast i16 %tmp1.0.extract.trunc to half
960 %add = fsub half %1, %3
961 %4 = bitcast half %add to i16
962 %tmp4.0.insert.ext = zext i16 %4 to i32
963 %5 = bitcast i32 %tmp4.0.insert.ext to float
968 ; CHECK-SOFT: bl __aeabi_h2f
969 ; CHECK-SOFT: bl __aeabi_h2f
970 ; CHECK-SOFT: bl __aeabi_fsub
971 ; CHECK-SOFT: bl __aeabi_f2h
973 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
974 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
975 ; CHECK-SOFTFP-VFP3: vsub.f32
976 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
978 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
979 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
980 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
981 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
982 ; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
983 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
984 ; CHECK-SOFTFP-FP16: vmov r0, s0
986 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
987 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
988 ; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
989 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
991 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
992 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
993 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
994 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
995 ; CHECK-HARDFP-VFP3: vsub.f32
996 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
997 ; CHECK-HARDFP-VFP3: vmov s0, r0
999 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
1000 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
1001 ; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
1002 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
1004 ; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
1007 ; Check for VSTRH with a FCONSTH, this checks that addressing mode
1008 ; AddrMode5FP16 is supported.
1009 define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
1011 %S = alloca half, align 2
1012 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
1013 %0 = bitcast i16 %tmp.0.extract.trunc to half
1014 %S.0.S.0..sroa_cast = bitcast half* %S to i8*
1015 store volatile half 0xH3C00, half* %S, align 2
1016 %S.0.S.0. = load volatile half, half* %S, align 2
1017 %add = fadd half %S.0.S.0., %0
1018 %1 = bitcast half %add to i16
1019 %tmp2.0.insert.ext = zext i16 %1 to i32
1020 ret i32 %tmp2.0.insert.ext
1022 ; CHECK-LABEL: ThumbAddrMode5FP16
1024 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00
1025 ; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}]
1026 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0
1027 ; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}]
1028 ; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]]
1031 ; Test function calls to check store/load reg to/from stack
1034 %coerce = alloca half, align 2
1035 %tmp2 = alloca i32, align 4
1036 store half 0xH7C00, half* %coerce, align 2
1037 %0 = load i32, i32* %tmp2, align 4
1038 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
1039 store half 0xH7C00, half* %coerce, align 2
1040 %1 = load i32, i32* %tmp2, align 4
1041 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
1044 ; CHECK-SPILL-RELOAD-LABEL: fn1:
1045 ; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill
1046 ; CHECK-SPILL-RELOAD: bl fn2
1047 ; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload
1050 declare dso_local i32 @fn2(...)
1051 declare dso_local i32 @fn3(...)