2 ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3 ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
4 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
5 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
8 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
9 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
10 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
12 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
13 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
14 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
17 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
18 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
21 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
22 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
23 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
25 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
26 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
27 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
30 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
31 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
33 ; TODO: we can't pass half-precision arguments as "half" types yet. We do
34 ; that for the time being by passing "float %f.coerce" and the necessary
35 ; bitconverts/truncates. But when we can pass half types, we do want to use
38 define float @RetValBug(float %A.coerce) {
41 ; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
42 ; any operands) when FullFP16 is enabled.
44 ; CHECK-LABEL: RetValBug:
45 ; CHECK-HARDFP-FULLFP16: {{.*}} lr
49 define float @Add(float %a.coerce, float %b.coerce) {
51 %0 = bitcast float %a.coerce to i32
52 %tmp.0.extract.trunc = trunc i32 %0 to i16
53 %1 = bitcast i16 %tmp.0.extract.trunc to half
54 %2 = bitcast float %b.coerce to i32
55 %tmp1.0.extract.trunc = trunc i32 %2 to i16
56 %3 = bitcast i16 %tmp1.0.extract.trunc to half
57 %add = fadd half %1, %3
58 %4 = bitcast half %add to i16
59 %tmp4.0.insert.ext = zext i16 %4 to i32
60 %5 = bitcast i32 %tmp4.0.insert.ext to float
65 ; CHECK-SOFT: bl __aeabi_h2f
66 ; CHECK-SOFT: bl __aeabi_h2f
67 ; CHECK-SOFT: bl __aeabi_fadd
68 ; CHECK-SOFT: bl __aeabi_f2h
70 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
71 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
72 ; CHECK-SOFTFP-VFP3: vadd.f32
73 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
75 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
76 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
77 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
78 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
79 ; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
80 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
81 ; CHECK-SOFTFP-FP16: vmov r0, s0
83 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
84 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
85 ; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
86 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
88 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
89 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
90 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
91 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
92 ; CHECK-HARDFP-VFP3: vadd.f32
93 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
94 ; CHECK-HARDFP-VFP3: vmov s0, r0
96 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
97 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
98 ; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
99 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
101 ; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
105 define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
107 %0 = bitcast float %F.coerce to i32
108 %tmp.0.extract.trunc = trunc i32 %0 to i16
109 %1 = bitcast i16 %tmp.0.extract.trunc to half
110 %2 = bitcast float %G.coerce to i32
111 %tmp1.0.extract.trunc = trunc i32 %2 to i16
112 %3 = bitcast i16 %tmp1.0.extract.trunc to half
113 %cmp = fcmp une half %1, %3
116 ; CHECK-LABEL: VCMP1:
118 ; CHECK-SOFT: bl __aeabi_fcmpeq
120 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
121 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
122 ; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}}
124 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
125 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
126 ; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}}
128 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
129 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
130 ; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]]
132 ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
133 ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
134 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1
138 define zeroext i1 @VCMP2(float %F.coerce) {
140 %0 = bitcast float %F.coerce to i32
141 %tmp.0.extract.trunc = trunc i32 %0 to i16
142 %1 = bitcast i16 %tmp.0.extract.trunc to half
143 %cmp = fcmp une half %1, 0.000000e+00
146 ; CHECK-LABEL: VCMP2:
148 ; CHECK-SOFT: bl __aeabi_fcmpeq
149 ; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0
150 ; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0
151 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0
155 define i32 @VCMPE1(float %F.coerce) {
157 %0 = bitcast float %F.coerce to i32
158 %tmp.0.extract.trunc = trunc i32 %0 to i16
159 %1 = bitcast i16 %tmp.0.extract.trunc to half
160 %tmp = fcmp olt half %1, 0.000000e+00
161 %tmp1 = zext i1 %tmp to i32
164 ; CHECK-LABEL: VCMPE1:
166 ; CHECK-SOFT: bl __aeabi_fcmplt
167 ; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0
168 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0
169 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0
172 define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
174 %0 = bitcast float %F.coerce to i32
175 %tmp.0.extract.trunc = trunc i32 %0 to i16
176 %1 = bitcast i16 %tmp.0.extract.trunc to half
177 %2 = bitcast float %G.coerce to i32
178 %tmp.1.extract.trunc = trunc i32 %2 to i16
179 %3 = bitcast i16 %tmp.1.extract.trunc to half
180 %tmp = fcmp olt half %1, %3
181 %tmp1 = zext i1 %tmp to i32
184 ; CHECK-LABEL: VCMPE2:
186 ; CHECK-SOFT: bl __aeabi_fcmplt
187 ; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
188 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
189 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
192 ; Test lowering of BR_CC
193 define hidden i32 @VCMPBRCC() {
195 %f = alloca half, align 2
199 %0 = load half, half* %f, align 2
200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
201 br i1 %cmp, label %for.body, label %for.end
209 ; CHECK-LABEL: VCMPBRCC:
211 ; CHECK-SOFT: bl __aeabi_fcmp{{gt|le}}
212 ; CHECK-SOFT: cmp r0, #{{0|1}}
214 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
215 ; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0
216 ; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr
218 ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
219 ; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr
222 ; 5. VCVT (between floating-point and fixed-point)
223 ; Only assembly/disassembly support
225 ; 6. VCVT (between floating-point and integer, both directions)
226 define i32 @fptosi(i32 %A.coerce) {
228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
229 %0 = bitcast i16 %tmp.0.extract.trunc to half
230 %conv = fptosi half %0 to i32
233 ; CHECK-LABEL: fptosi:
235 ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
236 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
237 ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
240 define i32 @fptoui(i32 %A.coerce) {
242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
243 %0 = bitcast i16 %tmp.0.extract.trunc to half
244 %conv = fptoui half %0 to i32
247 ; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
248 ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
251 define float @UintToH(i32 %a, i32 %b) {
253 %0 = uitofp i32 %a to half
254 %1 = bitcast half %0 to i16
255 %tmp0.insert.ext = zext i16 %1 to i32
256 %2 = bitcast i32 %tmp0.insert.ext to float
259 ; CHECK-LABEL: UintToH:
261 ; CHECK-HARDFP-FULLFP16: vmov s0, r0
262 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
265 define float @SintToH(i32 %a, i32 %b) {
267 %0 = sitofp i32 %a to half
268 %1 = bitcast half %0 to i16
269 %tmp0.insert.ext = zext i16 %1 to i32
270 %2 = bitcast i32 %tmp0.insert.ext to float
273 ; CHECK-LABEL: SintToH:
275 ; CHECK-HARDFP-FULLFP16: vmov s0, r0
276 ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
279 define i32 @f2h(float %f) {
281 %conv = fptrunc float %f to half
282 %0 = bitcast half %conv to i16
283 %tmp.0.insert.ext = zext i16 %0 to i32
284 ret i32 %tmp.0.insert.ext
287 ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0
290 define float @h2f(i32 %h.coerce) {
292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
293 %0 = bitcast i16 %tmp.0.extract.trunc to half
294 %conv = fpext half %0 to float
298 ; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0
302 define double @h2d(i32 %h.coerce) {
304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
305 %0 = bitcast i16 %tmp.0.extract.trunc to half
306 %conv = fpext half %0 to double
310 ; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}}
313 define i32 @d2h(double %d) {
315 %conv = fptrunc double %d to half
316 %0 = bitcast half %conv to i16
317 %tmp.0.insert.ext = zext i16 %0 to i32
318 ret i32 %tmp.0.insert.ext
321 ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}}
332 define float @Div(float %a.coerce, float %b.coerce) {
334 %0 = bitcast float %a.coerce to i32
335 %tmp.0.extract.trunc = trunc i32 %0 to i16
336 %1 = bitcast i16 %tmp.0.extract.trunc to half
337 %2 = bitcast float %b.coerce to i32
338 %tmp1.0.extract.trunc = trunc i32 %2 to i16
339 %3 = bitcast i16 %tmp1.0.extract.trunc to half
340 %add = fdiv half %1, %3
341 %4 = bitcast half %add to i16
342 %tmp4.0.insert.ext = zext i16 %4 to i32
343 %5 = bitcast i32 %tmp4.0.insert.ext to float
348 ; CHECK-SOFT: bl __aeabi_h2f
349 ; CHECK-SOFT: bl __aeabi_h2f
350 ; CHECK-SOFT: bl __aeabi_fdiv
351 ; CHECK-SOFT: bl __aeabi_f2h
353 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
354 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
355 ; CHECK-SOFTFP-VFP3: vdiv.f32
356 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
358 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
359 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
360 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
361 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
362 ; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
363 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
364 ; CHECK-SOFTFP-FP16: vmov r0, s0
366 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
367 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
368 ; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
369 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
371 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
372 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
373 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
374 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
375 ; CHECK-HARDFP-VFP3: vdiv.f32
376 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
377 ; CHECK-HARDFP-VFP3: vmov s0, r0
379 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
380 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
381 ; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
382 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
384 ; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
388 define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
390 %0 = bitcast float %a.coerce to i32
391 %tmp.0.extract.trunc = trunc i32 %0 to i16
392 %1 = bitcast i16 %tmp.0.extract.trunc to half
393 %2 = bitcast float %b.coerce to i32
394 %tmp1.0.extract.trunc = trunc i32 %2 to i16
395 %3 = bitcast i16 %tmp1.0.extract.trunc to half
396 %4 = bitcast float %c.coerce to i32
397 %tmp2.0.extract.trunc = trunc i32 %4 to i16
398 %5 = bitcast i16 %tmp2.0.extract.trunc to half
399 %mul = fmul half %1, %3
400 %add = fadd half %mul, %5
401 %6 = bitcast half %add to i16
402 %tmp4.0.insert.ext = zext i16 %6 to i32
403 %7 = bitcast i32 %tmp4.0.insert.ext to float
407 ; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
408 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
412 define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
414 %0 = bitcast float %a.coerce to i32
415 %tmp.0.extract.trunc = trunc i32 %0 to i16
416 %1 = bitcast i16 %tmp.0.extract.trunc to half
417 %2 = bitcast float %b.coerce to i32
418 %tmp1.0.extract.trunc = trunc i32 %2 to i16
419 %3 = bitcast i16 %tmp1.0.extract.trunc to half
420 %4 = bitcast float %c.coerce to i32
421 %tmp2.0.extract.trunc = trunc i32 %4 to i16
422 %5 = bitcast i16 %tmp2.0.extract.trunc to half
423 %mul = fmul half %1, %3
424 %sub = fsub half %5, %mul
425 %6 = bitcast half %sub to i16
426 %tmp4.0.insert.ext = zext i16 %6 to i32
427 %7 = bitcast i32 %tmp4.0.insert.ext to float
431 ; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
432 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
436 define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
438 %0 = bitcast float %a.coerce to i32
439 %tmp.0.extract.trunc = trunc i32 %0 to i16
440 %1 = bitcast i16 %tmp.0.extract.trunc to half
441 %2 = bitcast float %b.coerce to i32
442 %tmp1.0.extract.trunc = trunc i32 %2 to i16
443 %3 = bitcast i16 %tmp1.0.extract.trunc to half
444 %4 = bitcast float %c.coerce to i32
445 %tmp2.0.extract.trunc = trunc i32 %4 to i16
446 %5 = bitcast i16 %tmp2.0.extract.trunc to half
447 %mul = fmul half %1, %3
448 %sub = fsub half -0.0, %mul
449 %sub2 = fsub half %sub, %5
450 %6 = bitcast half %sub2 to i16
451 %tmp4.0.insert.ext = zext i16 %6 to i32
452 %7 = bitcast i32 %tmp4.0.insert.ext to float
455 ; CHECK-LABEL: VFNMA:
456 ; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
457 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
461 define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
463 %0 = bitcast float %a.coerce to i32
464 %tmp.0.extract.trunc = trunc i32 %0 to i16
465 %1 = bitcast i16 %tmp.0.extract.trunc to half
466 %2 = bitcast float %b.coerce to i32
467 %tmp1.0.extract.trunc = trunc i32 %2 to i16
468 %3 = bitcast i16 %tmp1.0.extract.trunc to half
469 %4 = bitcast float %c.coerce to i32
470 %tmp2.0.extract.trunc = trunc i32 %4 to i16
471 %5 = bitcast i16 %tmp2.0.extract.trunc to half
472 %mul = fmul half %1, %3
473 %sub2 = fsub half %mul, %5
474 %6 = bitcast half %sub2 to i16
475 %tmp4.0.insert.ext = zext i16 %6 to i32
476 %7 = bitcast i32 %tmp4.0.insert.ext to float
479 ; CHECK-LABEL: VFNMS:
480 ; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
481 ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
486 ; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
489 define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
491 %0 = bitcast float %a.coerce to i32
492 %tmp.0.extract.trunc = trunc i32 %0 to i16
493 %1 = bitcast i16 %tmp.0.extract.trunc to half
494 %2 = bitcast float %b.coerce to i32
495 %tmp1.0.extract.trunc = trunc i32 %2 to i16
496 %3 = bitcast i16 %tmp1.0.extract.trunc to half
497 %4 = bitcast float %c.coerce to i32
498 %tmp2.0.extract.trunc = trunc i32 %4 to i16
499 %5 = bitcast i16 %tmp2.0.extract.trunc to half
500 %mul = fmul half %1, %3
501 %add = fadd half %5, %mul
502 %6 = bitcast half %add to i16
503 %tmp4.0.insert.ext = zext i16 %6 to i32
504 %7 = bitcast i32 %tmp4.0.insert.ext to float
508 ; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
509 ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
513 define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
515 %0 = bitcast float %a.coerce to i32
516 %tmp.0.extract.trunc = trunc i32 %0 to i16
517 %1 = bitcast i16 %tmp.0.extract.trunc to half
518 %2 = bitcast float %b.coerce to i32
519 %tmp1.0.extract.trunc = trunc i32 %2 to i16
520 %3 = bitcast i16 %tmp1.0.extract.trunc to half
521 %4 = bitcast float %c.coerce to i32
522 %tmp2.0.extract.trunc = trunc i32 %4 to i16
523 %5 = bitcast i16 %tmp2.0.extract.trunc to half
524 %mul = fmul half %1, %3
525 %add = fsub half %5, %mul
526 %6 = bitcast half %add to i16
527 %tmp4.0.insert.ext = zext i16 %6 to i32
528 %7 = bitcast i32 %tmp4.0.insert.ext to float
532 ; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
533 ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
536 ; TODO: fix immediates.
537 ; 21. VMOV (between general-purpose register and half-precision register)
539 ; 22. VMOV (immediate)
540 define i32 @movi(i32 %a.coerce) {
542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
543 %0 = bitcast i16 %tmp.0.extract.trunc to half
544 %add = fadd half %0, 0xHC000
545 %1 = bitcast half %add to i16
546 %tmp2.0.insert.ext = zext i16 %1 to i32
547 ret i32 %tmp2.0.insert.ext
550 ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00
554 define float @Mul(float %a.coerce, float %b.coerce) {
556 %0 = bitcast float %a.coerce to i32
557 %tmp.0.extract.trunc = trunc i32 %0 to i16
558 %1 = bitcast i16 %tmp.0.extract.trunc to half
559 %2 = bitcast float %b.coerce to i32
560 %tmp1.0.extract.trunc = trunc i32 %2 to i16
561 %3 = bitcast i16 %tmp1.0.extract.trunc to half
562 %add = fmul half %1, %3
563 %4 = bitcast half %add to i16
564 %tmp4.0.insert.ext = zext i16 %4 to i32
565 %5 = bitcast i32 %tmp4.0.insert.ext to float
570 ; CHECK-SOFT: bl __aeabi_h2f
571 ; CHECK-SOFT: bl __aeabi_h2f
572 ; CHECK-SOFT: bl __aeabi_fmul
573 ; CHECK-SOFT: bl __aeabi_f2h
575 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
576 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
577 ; CHECK-SOFTFP-VFP3: vmul.f32
578 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
580 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
581 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
582 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
583 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
584 ; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
585 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
586 ; CHECK-SOFTFP-FP16: vmov r0, s0
588 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
589 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
590 ; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
591 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
593 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
594 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
595 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
596 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
597 ; CHECK-HARDFP-VFP3: vmul.f32
598 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
599 ; CHECK-HARDFP-VFP3: vmov s0, r0
601 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
602 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
603 ; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
604 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
606 ; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
610 define float @Neg(float %a.coerce) {
612 %0 = bitcast float %a.coerce to i32
613 %tmp.0.extract.trunc = trunc i32 %0 to i16
614 %1 = bitcast i16 %tmp.0.extract.trunc to half
615 %2 = fsub half -0.000000e+00, %1
616 %3 = bitcast half %2 to i16
617 %tmp4.0.insert.ext = zext i16 %3 to i32
618 %4 = bitcast i32 %tmp4.0.insert.ext to float
622 ; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
626 define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
628 %0 = bitcast float %a.coerce to i32
629 %tmp.0.extract.trunc = trunc i32 %0 to i16
630 %1 = bitcast i16 %tmp.0.extract.trunc to half
631 %2 = bitcast float %b.coerce to i32
632 %tmp1.0.extract.trunc = trunc i32 %2 to i16
633 %3 = bitcast i16 %tmp1.0.extract.trunc to half
634 %4 = bitcast float %c.coerce to i32
635 %tmp2.0.extract.trunc = trunc i32 %4 to i16
636 %5 = bitcast i16 %tmp2.0.extract.trunc to half
637 %add = fmul half %1, %3
638 %add2 = fsub half -0.000000e+00, %add
639 %add3 = fsub half %add2, %5
640 %6 = bitcast half %add3 to i16
641 %tmp4.0.insert.ext = zext i16 %6 to i32
642 %7 = bitcast i32 %tmp4.0.insert.ext to float
645 ; CHECK-LABEL: VNMLA:
646 ; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
647 ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
651 define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
653 %0 = bitcast float %a.coerce to i32
654 %tmp.0.extract.trunc = trunc i32 %0 to i16
655 %1 = bitcast i16 %tmp.0.extract.trunc to half
656 %2 = bitcast float %b.coerce to i32
657 %tmp1.0.extract.trunc = trunc i32 %2 to i16
658 %3 = bitcast i16 %tmp1.0.extract.trunc to half
659 %4 = bitcast float %c.coerce to i32
660 %tmp2.0.extract.trunc = trunc i32 %4 to i16
661 %5 = bitcast i16 %tmp2.0.extract.trunc to half
662 %add = fmul half %1, %3
663 %add2 = fsub half %add, %5
664 %6 = bitcast half %add2 to i16
665 %tmp4.0.insert.ext = zext i16 %6 to i32
666 %7 = bitcast i32 %tmp4.0.insert.ext to float
669 ; CHECK-LABEL: VNMLS:
670 ; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
671 ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
675 define float @NMul(float %a.coerce, float %b.coerce) {
677 %0 = bitcast float %a.coerce to i32
678 %tmp.0.extract.trunc = trunc i32 %0 to i16
679 %1 = bitcast i16 %tmp.0.extract.trunc to half
680 %2 = bitcast float %b.coerce to i32
681 %tmp1.0.extract.trunc = trunc i32 %2 to i16
682 %3 = bitcast i16 %tmp1.0.extract.trunc to half
683 %add = fmul half %1, %3
684 %add2 = fsub half -0.0, %add
685 %4 = bitcast half %add2 to i16
686 %tmp4.0.insert.ext = zext i16 %4 to i32
687 %5 = bitcast i32 %tmp4.0.insert.ext to float
691 ; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
695 define half @select_cc1(half* %a0) {
696 %1 = load half, half* %a0
697 %2 = fcmp nsz oeq half %1, 0xH0001
698 %3 = select i1 %2, half 0xHC000, half 0xH0002
701 ; CHECK-LABEL: select_cc1:
703 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0
704 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
705 ; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
707 ; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0
708 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
709 ; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
711 ; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0
712 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
713 ; CHECK-SOFTFP-FP16-T32: it eq
714 ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
717 ; FIXME: more tests need to be added for VSELGE and VSELGT.
718 ; That is, more combinations of immediate operands that can or can't
719 ; be encoded as an FP16 immediate need to be added here.
722 define half @select_cc_ge1(half* %a0) {
723 %1 = load half, half* %a0
724 %2 = fcmp nsz oge half %1, 0xH0001
725 %3 = select i1 %2, half 0xHC000, half 0xH0002
728 ; CHECK-LABEL: select_cc_ge1:
730 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
731 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
732 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
734 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
735 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
736 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
738 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
739 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
740 ; CHECK-SOFTFP-FP16-T32-NEXT: it ge
741 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
744 define half @select_cc_ge2(half* %a0) {
745 %1 = load half, half* %a0
746 %2 = fcmp nsz ole half %1, 0xH0001
747 %3 = select i1 %2, half 0xHC000, half 0xH0002
750 ; CHECK-LABEL: select_cc_ge2:
752 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
753 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
754 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
756 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
757 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
758 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}}
760 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
761 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
762 ; CHECK-SOFTFP-FP16-T32-NEXT: it ls
763 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}}
766 define half @select_cc_ge3(half* %a0) {
767 %1 = load half, half* %a0
768 %2 = fcmp nsz ugt half %1, 0xH0001
769 %3 = select i1 %2, half 0xHC000, half 0xH0002
772 ; CHECK-LABEL: select_cc_ge3:
774 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
775 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
776 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
778 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
779 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
780 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
782 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
783 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
784 ; CHECK-SOFTFP-FP16-T32-NEXT: it hi
785 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
788 define half @select_cc_ge4(half* %a0) {
789 %1 = load half, half* %a0
790 %2 = fcmp nsz ult half %1, 0xH0001
791 %3 = select i1 %2, half 0xHC000, half 0xH0002
794 ; CHECK-LABEL: select_cc_ge4:
796 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
797 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
798 ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
800 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
801 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
802 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
804 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
805 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
806 ; CHECK-SOFTFP-FP16-T32-NEXT: it lt
807 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
811 define half @select_cc_gt1(half* %a0) {
812 %1 = load half, half* %a0
813 %2 = fcmp nsz ogt half %1, 0xH0001
814 %3 = select i1 %2, half 0xHC000, half 0xH0002
817 ; CHECK-LABEL: select_cc_gt1:
819 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
820 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
821 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
823 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
824 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
825 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
827 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
828 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
829 ; CHECK-SOFTFP-FP16-T32-NEXT: it gt
830 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
833 define half @select_cc_gt2(half* %a0) {
834 %1 = load half, half* %a0
835 %2 = fcmp nsz uge half %1, 0xH0001
836 %3 = select i1 %2, half 0xHC000, half 0xH0002
839 ; CHECK-LABEL: select_cc_gt2:
841 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
842 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
843 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
845 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
846 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
847 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
849 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
850 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
851 ; CHECK-SOFTFP-FP16-T32-NEXT: it pl
852 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
855 define half @select_cc_gt3(half* %a0) {
856 %1 = load half, half* %a0
857 %2 = fcmp nsz ule half %1, 0xH0001
858 %3 = select i1 %2, half 0xHC000, half 0xH0002
861 ; CHECK-LABEL: select_cc_gt3:
863 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0
864 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
865 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
867 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
868 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
869 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}}
871 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
872 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
873 ; CHECK-SOFTFP-FP16-T32-NEXT: it le
874 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}}
877 define half @select_cc_gt4(half* %a0) {
878 %1 = load half, half* %a0
879 %2 = fcmp nsz olt half %1, 0xH0001
880 %3 = select i1 %2, half 0xHC000, half 0xH0002
883 ; CHECK-LABEL: select_cc_gt4:
885 ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6
886 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
887 ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
889 ; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0
890 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
891 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
893 ; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0
894 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
895 ; CHECK-SOFTFP-FP16-T32-NEXT: it mi
896 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
900 define float @select_cc4(float %a.coerce) {
902 %0 = bitcast float %a.coerce to i32
903 %tmp.0.extract.trunc = trunc i32 %0 to i16
904 %1 = bitcast i16 %tmp.0.extract.trunc to half
906 %2 = fcmp nsz ueq half %1, 0xH0001
907 %3 = select i1 %2, half 0xHC000, half 0xH0002
909 %4 = bitcast half %3 to i16
910 %tmp4.0.insert.ext = zext i16 %4 to i32
911 %5 = bitcast i32 %tmp4.0.insert.ext to float
914 ; CHECK-LABEL: select_cc4:
916 ; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
917 ; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
918 ; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
919 ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
920 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
921 ; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
922 ; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
924 ; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
925 ; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
926 ; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
927 ; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
928 ; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
929 ; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
930 ; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
931 ; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
932 ; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
933 ; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
935 ; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
936 ; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
937 ; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
938 ; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
939 ; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
940 ; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
941 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
942 ; CHECK-SOFTFP-FP16-T32: it eq
943 ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
944 ; CHECK-SOFTFP-FP16-T32: it vs
945 ; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
946 ; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
950 define float @Sub(float %a.coerce, float %b.coerce) {
952 %0 = bitcast float %a.coerce to i32
953 %tmp.0.extract.trunc = trunc i32 %0 to i16
954 %1 = bitcast i16 %tmp.0.extract.trunc to half
955 %2 = bitcast float %b.coerce to i32
956 %tmp1.0.extract.trunc = trunc i32 %2 to i16
957 %3 = bitcast i16 %tmp1.0.extract.trunc to half
958 %add = fsub half %1, %3
959 %4 = bitcast half %add to i16
960 %tmp4.0.insert.ext = zext i16 %4 to i32
961 %5 = bitcast i32 %tmp4.0.insert.ext to float
966 ; CHECK-SOFT: bl __aeabi_h2f
967 ; CHECK-SOFT: bl __aeabi_h2f
968 ; CHECK-SOFT: bl __aeabi_fsub
969 ; CHECK-SOFT: bl __aeabi_f2h
971 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
972 ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
973 ; CHECK-SOFTFP-VFP3: vsub.f32
974 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
976 ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
977 ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
978 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
979 ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
980 ; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
981 ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
982 ; CHECK-SOFTFP-FP16: vmov r0, s0
984 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
985 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
986 ; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
987 ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
989 ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
990 ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
991 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
992 ; CHECK-HARDFP-VFP3: bl __aeabi_h2f
993 ; CHECK-HARDFP-VFP3: vsub.f32
994 ; CHECK-HARDFP-VFP3: bl __aeabi_f2h
995 ; CHECK-HARDFP-VFP3: vmov s0, r0
997 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
998 ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
999 ; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
1000 ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
1002 ; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
1005 ; Check for VSTRH with a FCONSTH, this checks that addressing mode
1006 ; AddrMode5FP16 is supported.
1007 define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
1009 %S = alloca half, align 2
1010 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
1011 %0 = bitcast i16 %tmp.0.extract.trunc to half
1012 %S.0.S.0..sroa_cast = bitcast half* %S to i8*
1013 store volatile half 0xH3C00, half* %S, align 2
1014 %S.0.S.0. = load volatile half, half* %S, align 2
1015 %add = fadd half %S.0.S.0., %0
1016 %1 = bitcast half %add to i16
1017 %tmp2.0.insert.ext = zext i16 %1 to i32
1018 ret i32 %tmp2.0.insert.ext
1020 ; CHECK-LABEL: ThumbAddrMode5FP16
1022 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00
1023 ; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}]
1024 ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0
1025 ; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}]
1026 ; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]]
1029 ; Test function calls to check store/load reg to/from stack
1032 %coerce = alloca half, align 2
1033 %tmp2 = alloca i32, align 4
1034 store half 0xH7C00, half* %coerce, align 2
1035 %0 = load i32, i32* %tmp2, align 4
1036 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
1037 store half 0xH7C00, half* %coerce, align 2
1038 %1 = load i32, i32* %tmp2, align 4
1039 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
1042 ; CHECK-SPILL-RELOAD-LABEL: fn1:
1043 ; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill
1044 ; CHECK-SPILL-RELOAD: bl fn2
1045 ; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload
1048 declare dso_local i32 @fn2(...)
1049 declare dso_local i32 @fn3(...)