1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -enable-legalize-types-checking -mtriple=x86_64-linux-gnu -mattr=+f16c | FileCheck %s --check-prefix=F16C
3 ; RUN: llc < %s -enable-legalize-types-checking -mtriple=x86_64-linux-gnu -mattr=+avx512fp16 | FileCheck %s --check-prefix=FP16
4 ; RUN: llc < %s -enable-legalize-types-checking -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefix=X64
5 ; RUN: llc < %s -enable-legalize-types-checking -mtriple=i686-linux-gnu -mattr=sse2 | FileCheck %s --check-prefix=X86
7 ; Check all soft floating point library function calls.
9 define void @test_half_ceil(half %a0, ptr %p0) nounwind {
10 ; F16C-LABEL: test_half_ceil:
12 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
13 ; F16C-NEXT: vmovd %eax, %xmm0
14 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
15 ; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
16 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
17 ; F16C-NEXT: vmovd %xmm0, %eax
18 ; F16C-NEXT: movw %ax, (%rdi)
21 ; FP16-LABEL: test_half_ceil:
23 ; FP16-NEXT: vrndscalesh $10, %xmm0, %xmm0, %xmm0
24 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
27 ; X64-LABEL: test_half_ceil:
29 ; X64-NEXT: pushq %rbx
30 ; X64-NEXT: movq %rdi, %rbx
31 ; X64-NEXT: callq __extendhfsf2@PLT
32 ; X64-NEXT: callq ceilf@PLT
33 ; X64-NEXT: callq __truncsfhf2@PLT
34 ; X64-NEXT: pextrw $0, %xmm0, %eax
35 ; X64-NEXT: movw %ax, (%rbx)
39 ; X86-LABEL: test_half_ceil:
41 ; X86-NEXT: pushl %esi
42 ; X86-NEXT: subl $8, %esp
43 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
44 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
45 ; X86-NEXT: pextrw $0, %xmm0, %eax
46 ; X86-NEXT: movw %ax, (%esp)
47 ; X86-NEXT: calll __extendhfsf2
48 ; X86-NEXT: fstps (%esp)
49 ; X86-NEXT: calll ceilf
50 ; X86-NEXT: fstps (%esp)
51 ; X86-NEXT: calll __truncsfhf2
52 ; X86-NEXT: pextrw $0, %xmm0, %eax
53 ; X86-NEXT: movw %ax, (%esi)
54 ; X86-NEXT: addl $8, %esp
57 %res = call half @llvm.ceil.half(half %a0)
58 store half %res, ptr %p0, align 2
62 define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind {
63 ; F16C-LABEL: test_half_copysign:
65 ; F16C-NEXT: vpextrw $0, %xmm1, %eax
66 ; F16C-NEXT: andl $32768, %eax # imm = 0x8000
67 ; F16C-NEXT: vpextrw $0, %xmm0, %ecx
68 ; F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
69 ; F16C-NEXT: orl %eax, %ecx
70 ; F16C-NEXT: movw %cx, (%rdi)
73 ; FP16-LABEL: test_half_copysign:
75 ; FP16-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
76 ; FP16-NEXT: vpternlogd $202, %xmm1, %xmm0, %xmm2
77 ; FP16-NEXT: vmovsh %xmm2, (%rdi)
80 ; X64-LABEL: test_half_copysign:
82 ; X64-NEXT: pextrw $0, %xmm1, %eax
83 ; X64-NEXT: andl $32768, %eax # imm = 0x8000
84 ; X64-NEXT: pextrw $0, %xmm0, %ecx
85 ; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
86 ; X64-NEXT: orl %eax, %ecx
87 ; X64-NEXT: movw %cx, (%rdi)
90 ; X86-LABEL: test_half_copysign:
92 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
93 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
94 ; X86-NEXT: andl $32768, %ecx # imm = 0x8000
95 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
96 ; X86-NEXT: andl $32767, %edx # imm = 0x7FFF
97 ; X86-NEXT: orl %ecx, %edx
98 ; X86-NEXT: movw %dx, (%eax)
100 %res = call half @llvm.copysign.half(half %a0, half %a1)
101 store half %res, ptr %p0, align 2
105 define void @test_half_cos(half %a0, ptr %p0) nounwind {
106 ; F16C-LABEL: test_half_cos:
108 ; F16C-NEXT: pushq %rbx
109 ; F16C-NEXT: movq %rdi, %rbx
110 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
111 ; F16C-NEXT: vmovd %eax, %xmm0
112 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
113 ; F16C-NEXT: callq cosf@PLT
114 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
115 ; F16C-NEXT: vmovd %xmm0, %eax
116 ; F16C-NEXT: movw %ax, (%rbx)
117 ; F16C-NEXT: popq %rbx
120 ; FP16-LABEL: test_half_cos:
122 ; FP16-NEXT: pushq %rbx
123 ; FP16-NEXT: movq %rdi, %rbx
124 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
125 ; FP16-NEXT: callq cosf@PLT
126 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
127 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
128 ; FP16-NEXT: popq %rbx
131 ; X64-LABEL: test_half_cos:
133 ; X64-NEXT: pushq %rbx
134 ; X64-NEXT: movq %rdi, %rbx
135 ; X64-NEXT: callq __extendhfsf2@PLT
136 ; X64-NEXT: callq cosf@PLT
137 ; X64-NEXT: callq __truncsfhf2@PLT
138 ; X64-NEXT: pextrw $0, %xmm0, %eax
139 ; X64-NEXT: movw %ax, (%rbx)
140 ; X64-NEXT: popq %rbx
143 ; X86-LABEL: test_half_cos:
145 ; X86-NEXT: pushl %esi
146 ; X86-NEXT: subl $8, %esp
147 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
148 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
149 ; X86-NEXT: pextrw $0, %xmm0, %eax
150 ; X86-NEXT: movw %ax, (%esp)
151 ; X86-NEXT: calll __extendhfsf2
152 ; X86-NEXT: fstps (%esp)
153 ; X86-NEXT: calll cosf
154 ; X86-NEXT: fstps (%esp)
155 ; X86-NEXT: calll __truncsfhf2
156 ; X86-NEXT: pextrw $0, %xmm0, %eax
157 ; X86-NEXT: movw %ax, (%esi)
158 ; X86-NEXT: addl $8, %esp
159 ; X86-NEXT: popl %esi
161 %res = call half @llvm.cos.half(half %a0)
162 store half %res, ptr %p0, align 2
166 define void @test_half_exp(half %a0, ptr %p0) nounwind {
167 ; F16C-LABEL: test_half_exp:
169 ; F16C-NEXT: pushq %rbx
170 ; F16C-NEXT: movq %rdi, %rbx
171 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
172 ; F16C-NEXT: vmovd %eax, %xmm0
173 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
174 ; F16C-NEXT: callq expf@PLT
175 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
176 ; F16C-NEXT: vmovd %xmm0, %eax
177 ; F16C-NEXT: movw %ax, (%rbx)
178 ; F16C-NEXT: popq %rbx
181 ; FP16-LABEL: test_half_exp:
183 ; FP16-NEXT: pushq %rbx
184 ; FP16-NEXT: movq %rdi, %rbx
185 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
186 ; FP16-NEXT: callq expf@PLT
187 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
188 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
189 ; FP16-NEXT: popq %rbx
192 ; X64-LABEL: test_half_exp:
194 ; X64-NEXT: pushq %rbx
195 ; X64-NEXT: movq %rdi, %rbx
196 ; X64-NEXT: callq __extendhfsf2@PLT
197 ; X64-NEXT: callq expf@PLT
198 ; X64-NEXT: callq __truncsfhf2@PLT
199 ; X64-NEXT: pextrw $0, %xmm0, %eax
200 ; X64-NEXT: movw %ax, (%rbx)
201 ; X64-NEXT: popq %rbx
204 ; X86-LABEL: test_half_exp:
206 ; X86-NEXT: pushl %esi
207 ; X86-NEXT: subl $8, %esp
208 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
209 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
210 ; X86-NEXT: pextrw $0, %xmm0, %eax
211 ; X86-NEXT: movw %ax, (%esp)
212 ; X86-NEXT: calll __extendhfsf2
213 ; X86-NEXT: fstps (%esp)
214 ; X86-NEXT: calll expf
215 ; X86-NEXT: fstps (%esp)
216 ; X86-NEXT: calll __truncsfhf2
217 ; X86-NEXT: pextrw $0, %xmm0, %eax
218 ; X86-NEXT: movw %ax, (%esi)
219 ; X86-NEXT: addl $8, %esp
220 ; X86-NEXT: popl %esi
222 %res = call half @llvm.exp.half(half %a0)
223 store half %res, ptr %p0, align 2
227 define void @test_half_exp2(half %a0, ptr %p0) nounwind {
228 ; F16C-LABEL: test_half_exp2:
230 ; F16C-NEXT: pushq %rbx
231 ; F16C-NEXT: movq %rdi, %rbx
232 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
233 ; F16C-NEXT: vmovd %eax, %xmm0
234 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
235 ; F16C-NEXT: callq exp2f@PLT
236 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
237 ; F16C-NEXT: vmovd %xmm0, %eax
238 ; F16C-NEXT: movw %ax, (%rbx)
239 ; F16C-NEXT: popq %rbx
242 ; FP16-LABEL: test_half_exp2:
244 ; FP16-NEXT: pushq %rbx
245 ; FP16-NEXT: movq %rdi, %rbx
246 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
247 ; FP16-NEXT: callq exp2f@PLT
248 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
249 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
250 ; FP16-NEXT: popq %rbx
253 ; X64-LABEL: test_half_exp2:
255 ; X64-NEXT: pushq %rbx
256 ; X64-NEXT: movq %rdi, %rbx
257 ; X64-NEXT: callq __extendhfsf2@PLT
258 ; X64-NEXT: callq exp2f@PLT
259 ; X64-NEXT: callq __truncsfhf2@PLT
260 ; X64-NEXT: pextrw $0, %xmm0, %eax
261 ; X64-NEXT: movw %ax, (%rbx)
262 ; X64-NEXT: popq %rbx
265 ; X86-LABEL: test_half_exp2:
267 ; X86-NEXT: pushl %esi
268 ; X86-NEXT: subl $8, %esp
269 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
271 ; X86-NEXT: pextrw $0, %xmm0, %eax
272 ; X86-NEXT: movw %ax, (%esp)
273 ; X86-NEXT: calll __extendhfsf2
274 ; X86-NEXT: fstps (%esp)
275 ; X86-NEXT: calll exp2f
276 ; X86-NEXT: fstps (%esp)
277 ; X86-NEXT: calll __truncsfhf2
278 ; X86-NEXT: pextrw $0, %xmm0, %eax
279 ; X86-NEXT: movw %ax, (%esi)
280 ; X86-NEXT: addl $8, %esp
281 ; X86-NEXT: popl %esi
283 %res = call half @llvm.exp2.half(half %a0)
284 store half %res, ptr %p0, align 2
288 define void @test_half_exp10(half %a0, ptr %p0) nounwind {
289 ; F16C-LABEL: test_half_exp10:
291 ; F16C-NEXT: pushq %rbx
292 ; F16C-NEXT: movq %rdi, %rbx
293 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
294 ; F16C-NEXT: vmovd %eax, %xmm0
295 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
296 ; F16C-NEXT: callq exp10f@PLT
297 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
298 ; F16C-NEXT: vmovd %xmm0, %eax
299 ; F16C-NEXT: movw %ax, (%rbx)
300 ; F16C-NEXT: popq %rbx
303 ; FP16-LABEL: test_half_exp10:
305 ; FP16-NEXT: pushq %rbx
306 ; FP16-NEXT: movq %rdi, %rbx
307 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
308 ; FP16-NEXT: callq exp10f@PLT
309 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
310 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
311 ; FP16-NEXT: popq %rbx
314 ; X64-LABEL: test_half_exp10:
316 ; X64-NEXT: pushq %rbx
317 ; X64-NEXT: movq %rdi, %rbx
318 ; X64-NEXT: callq __extendhfsf2@PLT
319 ; X64-NEXT: callq exp10f@PLT
320 ; X64-NEXT: callq __truncsfhf2@PLT
321 ; X64-NEXT: pextrw $0, %xmm0, %eax
322 ; X64-NEXT: movw %ax, (%rbx)
323 ; X64-NEXT: popq %rbx
326 ; X86-LABEL: test_half_exp10:
328 ; X86-NEXT: pushl %esi
329 ; X86-NEXT: subl $8, %esp
330 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
331 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
332 ; X86-NEXT: pextrw $0, %xmm0, %eax
333 ; X86-NEXT: movw %ax, (%esp)
334 ; X86-NEXT: calll __extendhfsf2
335 ; X86-NEXT: fstps (%esp)
336 ; X86-NEXT: calll exp10f
337 ; X86-NEXT: fstps (%esp)
338 ; X86-NEXT: calll __truncsfhf2
339 ; X86-NEXT: pextrw $0, %xmm0, %eax
340 ; X86-NEXT: movw %ax, (%esi)
341 ; X86-NEXT: addl $8, %esp
342 ; X86-NEXT: popl %esi
344 %res = call half @llvm.exp10.half(half %a0)
345 store half %res, ptr %p0, align 2
349 define void @test_half_fabs(half %a0, ptr %p0) nounwind {
350 ; F16C-LABEL: test_half_fabs:
352 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
353 ; F16C-NEXT: vmovd %eax, %xmm0
354 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
355 ; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
356 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
357 ; F16C-NEXT: vmovd %xmm0, %eax
358 ; F16C-NEXT: movw %ax, (%rdi)
361 ; FP16-LABEL: test_half_fabs:
363 ; FP16-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
364 ; FP16-NEXT: vpand %xmm1, %xmm0, %xmm0
365 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
368 ; X64-LABEL: test_half_fabs:
370 ; X64-NEXT: pushq %rbx
371 ; X64-NEXT: movq %rdi, %rbx
372 ; X64-NEXT: callq __extendhfsf2@PLT
373 ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
374 ; X64-NEXT: callq __truncsfhf2@PLT
375 ; X64-NEXT: pextrw $0, %xmm0, %eax
376 ; X64-NEXT: movw %ax, (%rbx)
377 ; X64-NEXT: popq %rbx
380 ; X86-LABEL: test_half_fabs:
382 ; X86-NEXT: pushl %esi
383 ; X86-NEXT: subl $8, %esp
384 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
385 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
386 ; X86-NEXT: pextrw $0, %xmm0, %eax
387 ; X86-NEXT: movw %ax, (%esp)
388 ; X86-NEXT: calll __extendhfsf2
389 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
390 ; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
391 ; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
392 ; X86-NEXT: movd %xmm0, (%esp)
393 ; X86-NEXT: calll __truncsfhf2
394 ; X86-NEXT: pextrw $0, %xmm0, %eax
395 ; X86-NEXT: movw %ax, (%esi)
396 ; X86-NEXT: addl $8, %esp
397 ; X86-NEXT: popl %esi
399 %res = call half @llvm.fabs.half(half %a0)
400 store half %res, ptr %p0, align 2
404 define void @test_half_floor(half %a0, ptr %p0) nounwind {
405 ; F16C-LABEL: test_half_floor:
407 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
408 ; F16C-NEXT: vmovd %eax, %xmm0
409 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
410 ; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
411 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
412 ; F16C-NEXT: vmovd %xmm0, %eax
413 ; F16C-NEXT: movw %ax, (%rdi)
416 ; FP16-LABEL: test_half_floor:
418 ; FP16-NEXT: vrndscalesh $9, %xmm0, %xmm0, %xmm0
419 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
422 ; X64-LABEL: test_half_floor:
424 ; X64-NEXT: pushq %rbx
425 ; X64-NEXT: movq %rdi, %rbx
426 ; X64-NEXT: callq __extendhfsf2@PLT
427 ; X64-NEXT: callq floorf@PLT
428 ; X64-NEXT: callq __truncsfhf2@PLT
429 ; X64-NEXT: pextrw $0, %xmm0, %eax
430 ; X64-NEXT: movw %ax, (%rbx)
431 ; X64-NEXT: popq %rbx
434 ; X86-LABEL: test_half_floor:
436 ; X86-NEXT: pushl %esi
437 ; X86-NEXT: subl $8, %esp
438 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
439 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
440 ; X86-NEXT: pextrw $0, %xmm0, %eax
441 ; X86-NEXT: movw %ax, (%esp)
442 ; X86-NEXT: calll __extendhfsf2
443 ; X86-NEXT: fstps (%esp)
444 ; X86-NEXT: calll floorf
445 ; X86-NEXT: fstps (%esp)
446 ; X86-NEXT: calll __truncsfhf2
447 ; X86-NEXT: pextrw $0, %xmm0, %eax
448 ; X86-NEXT: movw %ax, (%esi)
449 ; X86-NEXT: addl $8, %esp
450 ; X86-NEXT: popl %esi
452 %res = call half @llvm.floor.half(half %a0)
453 store half %res, ptr %p0, align 2
457 define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
458 ; F16C-LABEL: test_half_fma:
460 ; F16C-NEXT: pushq %rbx
461 ; F16C-NEXT: movq %rdi, %rbx
462 ; F16C-NEXT: vpextrw $0, %xmm2, %eax
463 ; F16C-NEXT: vpextrw $0, %xmm1, %ecx
464 ; F16C-NEXT: vpextrw $0, %xmm0, %edx
465 ; F16C-NEXT: vmovd %edx, %xmm0
466 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
467 ; F16C-NEXT: vmovd %ecx, %xmm1
468 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
469 ; F16C-NEXT: vmovd %eax, %xmm2
470 ; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
471 ; F16C-NEXT: callq fmaf@PLT
472 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
473 ; F16C-NEXT: vmovd %xmm0, %eax
474 ; F16C-NEXT: movw %ax, (%rbx)
475 ; F16C-NEXT: popq %rbx
478 ; FP16-LABEL: test_half_fma:
480 ; FP16-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0
481 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
484 ; X64-LABEL: test_half_fma:
486 ; X64-NEXT: pushq %rbx
487 ; X64-NEXT: subq $16, %rsp
488 ; X64-NEXT: movq %rdi, %rbx
489 ; X64-NEXT: movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
490 ; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
491 ; X64-NEXT: movaps %xmm1, %xmm0
492 ; X64-NEXT: callq __extendhfsf2@PLT
493 ; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
494 ; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
495 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
496 ; X64-NEXT: callq __extendhfsf2@PLT
497 ; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
498 ; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
499 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
500 ; X64-NEXT: callq __extendhfsf2@PLT
501 ; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
502 ; X64-NEXT: # xmm1 = mem[0],zero,zero,zero
503 ; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
504 ; X64-NEXT: # xmm2 = mem[0],zero,zero,zero
505 ; X64-NEXT: callq fmaf@PLT
506 ; X64-NEXT: callq __truncsfhf2@PLT
507 ; X64-NEXT: pextrw $0, %xmm0, %eax
508 ; X64-NEXT: movw %ax, (%rbx)
509 ; X64-NEXT: addq $16, %rsp
510 ; X64-NEXT: popq %rbx
513 ; X86-LABEL: test_half_fma:
515 ; X86-NEXT: pushl %esi
516 ; X86-NEXT: subl $72, %esp
517 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
518 ; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
519 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
520 ; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
521 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
522 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
523 ; X86-NEXT: pextrw $0, %xmm0, %eax
524 ; X86-NEXT: movw %ax, (%esp)
525 ; X86-NEXT: calll __extendhfsf2
526 ; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
527 ; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
528 ; X86-NEXT: pextrw $0, %xmm0, %eax
529 ; X86-NEXT: movw %ax, (%esp)
530 ; X86-NEXT: calll __extendhfsf2
531 ; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
532 ; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
533 ; X86-NEXT: pextrw $0, %xmm0, %eax
534 ; X86-NEXT: movw %ax, (%esp)
535 ; X86-NEXT: calll __extendhfsf2
536 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
537 ; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
538 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
539 ; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
540 ; X86-NEXT: fstps (%esp)
541 ; X86-NEXT: calll fmaf
542 ; X86-NEXT: fstps (%esp)
543 ; X86-NEXT: calll __truncsfhf2
544 ; X86-NEXT: pextrw $0, %xmm0, %eax
545 ; X86-NEXT: movw %ax, (%esi)
546 ; X86-NEXT: addl $72, %esp
547 ; X86-NEXT: popl %esi
549 %res = call half @llvm.fma.half(half %a0, half %a1, half %a2)
550 store half %res, ptr %p0, align 2
554 define void @test_half_fneg(half %a0, ptr %p0) nounwind {
555 ; F16C-LABEL: test_half_fneg:
557 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
558 ; F16C-NEXT: vmovd %eax, %xmm0
559 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
560 ; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
561 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
562 ; F16C-NEXT: vmovd %xmm0, %eax
563 ; F16C-NEXT: movw %ax, (%rdi)
566 ; FP16-LABEL: test_half_fneg:
568 ; FP16-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
569 ; FP16-NEXT: vpxor %xmm1, %xmm0, %xmm0
570 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
573 ; X64-LABEL: test_half_fneg:
575 ; X64-NEXT: pushq %rbx
576 ; X64-NEXT: movq %rdi, %rbx
577 ; X64-NEXT: callq __extendhfsf2@PLT
578 ; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
579 ; X64-NEXT: callq __truncsfhf2@PLT
580 ; X64-NEXT: pextrw $0, %xmm0, %eax
581 ; X64-NEXT: movw %ax, (%rbx)
582 ; X64-NEXT: popq %rbx
585 ; X86-LABEL: test_half_fneg:
587 ; X86-NEXT: pushl %esi
588 ; X86-NEXT: subl $8, %esp
589 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
590 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
591 ; X86-NEXT: pextrw $0, %xmm0, %eax
592 ; X86-NEXT: movw %ax, (%esp)
593 ; X86-NEXT: calll __extendhfsf2
594 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
595 ; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
596 ; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
597 ; X86-NEXT: movd %xmm0, (%esp)
598 ; X86-NEXT: calll __truncsfhf2
599 ; X86-NEXT: pextrw $0, %xmm0, %eax
600 ; X86-NEXT: movw %ax, (%esi)
601 ; X86-NEXT: addl $8, %esp
602 ; X86-NEXT: popl %esi
605 store half %res, ptr %p0, align 2
609 define void @test_half_log(half %a0, ptr %p0) nounwind {
610 ; F16C-LABEL: test_half_log:
612 ; F16C-NEXT: pushq %rbx
613 ; F16C-NEXT: movq %rdi, %rbx
614 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
615 ; F16C-NEXT: vmovd %eax, %xmm0
616 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
617 ; F16C-NEXT: callq logf@PLT
618 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
619 ; F16C-NEXT: vmovd %xmm0, %eax
620 ; F16C-NEXT: movw %ax, (%rbx)
621 ; F16C-NEXT: popq %rbx
624 ; FP16-LABEL: test_half_log:
626 ; FP16-NEXT: pushq %rbx
627 ; FP16-NEXT: movq %rdi, %rbx
628 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
629 ; FP16-NEXT: callq logf@PLT
630 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
631 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
632 ; FP16-NEXT: popq %rbx
635 ; X64-LABEL: test_half_log:
637 ; X64-NEXT: pushq %rbx
638 ; X64-NEXT: movq %rdi, %rbx
639 ; X64-NEXT: callq __extendhfsf2@PLT
640 ; X64-NEXT: callq logf@PLT
641 ; X64-NEXT: callq __truncsfhf2@PLT
642 ; X64-NEXT: pextrw $0, %xmm0, %eax
643 ; X64-NEXT: movw %ax, (%rbx)
644 ; X64-NEXT: popq %rbx
647 ; X86-LABEL: test_half_log:
649 ; X86-NEXT: pushl %esi
650 ; X86-NEXT: subl $8, %esp
651 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
652 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
653 ; X86-NEXT: pextrw $0, %xmm0, %eax
654 ; X86-NEXT: movw %ax, (%esp)
655 ; X86-NEXT: calll __extendhfsf2
656 ; X86-NEXT: fstps (%esp)
657 ; X86-NEXT: calll logf
658 ; X86-NEXT: fstps (%esp)
659 ; X86-NEXT: calll __truncsfhf2
660 ; X86-NEXT: pextrw $0, %xmm0, %eax
661 ; X86-NEXT: movw %ax, (%esi)
662 ; X86-NEXT: addl $8, %esp
663 ; X86-NEXT: popl %esi
665 %res = call half @llvm.log.half(half %a0)
666 store half %res, ptr %p0, align 2
670 define void @test_half_log2(half %a0, ptr %p0) nounwind {
671 ; F16C-LABEL: test_half_log2:
673 ; F16C-NEXT: pushq %rbx
674 ; F16C-NEXT: movq %rdi, %rbx
675 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
676 ; F16C-NEXT: vmovd %eax, %xmm0
677 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
678 ; F16C-NEXT: callq log2f@PLT
679 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
680 ; F16C-NEXT: vmovd %xmm0, %eax
681 ; F16C-NEXT: movw %ax, (%rbx)
682 ; F16C-NEXT: popq %rbx
685 ; FP16-LABEL: test_half_log2:
687 ; FP16-NEXT: pushq %rbx
688 ; FP16-NEXT: movq %rdi, %rbx
689 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
690 ; FP16-NEXT: callq log2f@PLT
691 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
692 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
693 ; FP16-NEXT: popq %rbx
696 ; X64-LABEL: test_half_log2:
698 ; X64-NEXT: pushq %rbx
699 ; X64-NEXT: movq %rdi, %rbx
700 ; X64-NEXT: callq __extendhfsf2@PLT
701 ; X64-NEXT: callq log2f@PLT
702 ; X64-NEXT: callq __truncsfhf2@PLT
703 ; X64-NEXT: pextrw $0, %xmm0, %eax
704 ; X64-NEXT: movw %ax, (%rbx)
705 ; X64-NEXT: popq %rbx
708 ; X86-LABEL: test_half_log2:
710 ; X86-NEXT: pushl %esi
711 ; X86-NEXT: subl $8, %esp
712 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
713 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
714 ; X86-NEXT: pextrw $0, %xmm0, %eax
715 ; X86-NEXT: movw %ax, (%esp)
716 ; X86-NEXT: calll __extendhfsf2
717 ; X86-NEXT: fstps (%esp)
718 ; X86-NEXT: calll log2f
719 ; X86-NEXT: fstps (%esp)
720 ; X86-NEXT: calll __truncsfhf2
721 ; X86-NEXT: pextrw $0, %xmm0, %eax
722 ; X86-NEXT: movw %ax, (%esi)
723 ; X86-NEXT: addl $8, %esp
724 ; X86-NEXT: popl %esi
726 %res = call half @llvm.log2.half(half %a0)
727 store half %res, ptr %p0, align 2
731 define void @test_half_log10(half %a0, ptr %p0) nounwind {
732 ; F16C-LABEL: test_half_log10:
734 ; F16C-NEXT: pushq %rbx
735 ; F16C-NEXT: movq %rdi, %rbx
736 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
737 ; F16C-NEXT: vmovd %eax, %xmm0
738 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
739 ; F16C-NEXT: callq log10f@PLT
740 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
741 ; F16C-NEXT: vmovd %xmm0, %eax
742 ; F16C-NEXT: movw %ax, (%rbx)
743 ; F16C-NEXT: popq %rbx
746 ; FP16-LABEL: test_half_log10:
748 ; FP16-NEXT: pushq %rbx
749 ; FP16-NEXT: movq %rdi, %rbx
750 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
751 ; FP16-NEXT: callq log10f@PLT
752 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
753 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
754 ; FP16-NEXT: popq %rbx
757 ; X64-LABEL: test_half_log10:
759 ; X64-NEXT: pushq %rbx
760 ; X64-NEXT: movq %rdi, %rbx
761 ; X64-NEXT: callq __extendhfsf2@PLT
762 ; X64-NEXT: callq log10f@PLT
763 ; X64-NEXT: callq __truncsfhf2@PLT
764 ; X64-NEXT: pextrw $0, %xmm0, %eax
765 ; X64-NEXT: movw %ax, (%rbx)
766 ; X64-NEXT: popq %rbx
769 ; X86-LABEL: test_half_log10:
771 ; X86-NEXT: pushl %esi
772 ; X86-NEXT: subl $8, %esp
773 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
774 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
775 ; X86-NEXT: pextrw $0, %xmm0, %eax
776 ; X86-NEXT: movw %ax, (%esp)
777 ; X86-NEXT: calll __extendhfsf2
778 ; X86-NEXT: fstps (%esp)
779 ; X86-NEXT: calll log10f
780 ; X86-NEXT: fstps (%esp)
781 ; X86-NEXT: calll __truncsfhf2
782 ; X86-NEXT: pextrw $0, %xmm0, %eax
783 ; X86-NEXT: movw %ax, (%esi)
784 ; X86-NEXT: addl $8, %esp
785 ; X86-NEXT: popl %esi
787 %res = call half @llvm.log10.half(half %a0)
788 store half %res, ptr %p0, align 2
792 define void @test_half_nearbyint(half %a0, ptr %p0) nounwind {
793 ; F16C-LABEL: test_half_nearbyint:
795 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
796 ; F16C-NEXT: vmovd %eax, %xmm0
797 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
798 ; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
799 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
800 ; F16C-NEXT: vmovd %xmm0, %eax
801 ; F16C-NEXT: movw %ax, (%rdi)
804 ; FP16-LABEL: test_half_nearbyint:
806 ; FP16-NEXT: vrndscalesh $12, %xmm0, %xmm0, %xmm0
807 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
810 ; X64-LABEL: test_half_nearbyint:
812 ; X64-NEXT: pushq %rbx
813 ; X64-NEXT: movq %rdi, %rbx
814 ; X64-NEXT: callq __extendhfsf2@PLT
815 ; X64-NEXT: callq nearbyintf@PLT
816 ; X64-NEXT: callq __truncsfhf2@PLT
817 ; X64-NEXT: pextrw $0, %xmm0, %eax
818 ; X64-NEXT: movw %ax, (%rbx)
819 ; X64-NEXT: popq %rbx
822 ; X86-LABEL: test_half_nearbyint:
824 ; X86-NEXT: pushl %esi
825 ; X86-NEXT: subl $8, %esp
826 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
827 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
828 ; X86-NEXT: pextrw $0, %xmm0, %eax
829 ; X86-NEXT: movw %ax, (%esp)
830 ; X86-NEXT: calll __extendhfsf2
831 ; X86-NEXT: fstps (%esp)
832 ; X86-NEXT: calll nearbyintf
833 ; X86-NEXT: fstps (%esp)
834 ; X86-NEXT: calll __truncsfhf2
835 ; X86-NEXT: pextrw $0, %xmm0, %eax
836 ; X86-NEXT: movw %ax, (%esi)
837 ; X86-NEXT: addl $8, %esp
838 ; X86-NEXT: popl %esi
840 %res = call half @llvm.nearbyint.half(half %a0)
841 store half %res, ptr %p0, align 2
845 define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
846 ; F16C-LABEL: test_half_pow:
848 ; F16C-NEXT: pushq %rbx
849 ; F16C-NEXT: movq %rdi, %rbx
850 ; F16C-NEXT: vpextrw $0, %xmm1, %eax
851 ; F16C-NEXT: vpextrw $0, %xmm0, %ecx
852 ; F16C-NEXT: vmovd %ecx, %xmm0
853 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
854 ; F16C-NEXT: vmovd %eax, %xmm1
855 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
856 ; F16C-NEXT: callq powf@PLT
857 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
858 ; F16C-NEXT: vmovd %xmm0, %eax
859 ; F16C-NEXT: movw %ax, (%rbx)
860 ; F16C-NEXT: popq %rbx
863 ; FP16-LABEL: test_half_pow:
865 ; FP16-NEXT: pushq %rbx
866 ; FP16-NEXT: movq %rdi, %rbx
867 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
868 ; FP16-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
869 ; FP16-NEXT: callq powf@PLT
870 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
871 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
872 ; FP16-NEXT: popq %rbx
875 ; X64-LABEL: test_half_pow:
877 ; X64-NEXT: pushq %rbx
878 ; X64-NEXT: subq $16, %rsp
879 ; X64-NEXT: movq %rdi, %rbx
880 ; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
881 ; X64-NEXT: movaps %xmm1, %xmm0
882 ; X64-NEXT: callq __extendhfsf2@PLT
883 ; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
884 ; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
885 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
886 ; X64-NEXT: callq __extendhfsf2@PLT
887 ; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
888 ; X64-NEXT: # xmm1 = mem[0],zero,zero,zero
889 ; X64-NEXT: callq powf@PLT
890 ; X64-NEXT: callq __truncsfhf2@PLT
891 ; X64-NEXT: pextrw $0, %xmm0, %eax
892 ; X64-NEXT: movw %ax, (%rbx)
893 ; X64-NEXT: addq $16, %rsp
894 ; X64-NEXT: popq %rbx
897 ; X86-LABEL: test_half_pow:
899 ; X86-NEXT: pushl %esi
900 ; X86-NEXT: subl $56, %esp
901 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
902 ; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
903 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
904 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
905 ; X86-NEXT: pextrw $0, %xmm0, %eax
906 ; X86-NEXT: movw %ax, (%esp)
907 ; X86-NEXT: calll __extendhfsf2
908 ; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
909 ; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
910 ; X86-NEXT: pextrw $0, %xmm0, %eax
911 ; X86-NEXT: movw %ax, (%esp)
912 ; X86-NEXT: calll __extendhfsf2
913 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
914 ; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
915 ; X86-NEXT: fstps (%esp)
916 ; X86-NEXT: calll powf
917 ; X86-NEXT: fstps (%esp)
918 ; X86-NEXT: calll __truncsfhf2
919 ; X86-NEXT: pextrw $0, %xmm0, %eax
920 ; X86-NEXT: movw %ax, (%esi)
921 ; X86-NEXT: addl $56, %esp
922 ; X86-NEXT: popl %esi
924 %res = call half @llvm.pow.half(half %a0, half %a1)
925 store half %res, ptr %p0, align 2
929 define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
930 ; F16C-LABEL: test_half_powi:
932 ; F16C-NEXT: pushq %rbx
933 ; F16C-NEXT: movq %rsi, %rbx
934 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
935 ; F16C-NEXT: vmovd %eax, %xmm0
936 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
937 ; F16C-NEXT: callq __powisf2@PLT
938 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
939 ; F16C-NEXT: vmovd %xmm0, %eax
940 ; F16C-NEXT: movw %ax, (%rbx)
941 ; F16C-NEXT: popq %rbx
944 ; FP16-LABEL: test_half_powi:
946 ; FP16-NEXT: pushq %rbx
947 ; FP16-NEXT: movq %rsi, %rbx
948 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
949 ; FP16-NEXT: callq __powisf2@PLT
950 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
951 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
952 ; FP16-NEXT: popq %rbx
955 ; X64-LABEL: test_half_powi:
957 ; X64-NEXT: pushq %rbp
958 ; X64-NEXT: pushq %rbx
959 ; X64-NEXT: pushq %rax
960 ; X64-NEXT: movq %rsi, %rbx
961 ; X64-NEXT: movl %edi, %ebp
962 ; X64-NEXT: callq __extendhfsf2@PLT
963 ; X64-NEXT: movl %ebp, %edi
964 ; X64-NEXT: callq __powisf2@PLT
965 ; X64-NEXT: callq __truncsfhf2@PLT
966 ; X64-NEXT: pextrw $0, %xmm0, %eax
967 ; X64-NEXT: movw %ax, (%rbx)
968 ; X64-NEXT: addq $8, %rsp
969 ; X64-NEXT: popq %rbx
970 ; X64-NEXT: popq %rbp
973 ; X86-LABEL: test_half_powi:
975 ; X86-NEXT: pushl %edi
976 ; X86-NEXT: pushl %esi
977 ; X86-NEXT: subl $20, %esp
978 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
979 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
980 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
981 ; X86-NEXT: pextrw $0, %xmm0, %eax
982 ; X86-NEXT: movw %ax, (%esp)
983 ; X86-NEXT: calll __extendhfsf2
984 ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
985 ; X86-NEXT: fstps (%esp)
986 ; X86-NEXT: calll __powisf2
987 ; X86-NEXT: fstps (%esp)
988 ; X86-NEXT: calll __truncsfhf2
989 ; X86-NEXT: pextrw $0, %xmm0, %eax
990 ; X86-NEXT: movw %ax, (%esi)
991 ; X86-NEXT: addl $20, %esp
992 ; X86-NEXT: popl %esi
993 ; X86-NEXT: popl %edi
995 %res = call half @llvm.powi.half(half %a0, i32 %a1)
996 store half %res, ptr %p0, align 2
1000 define void @test_half_rint(half %a0, ptr %p0) nounwind {
1001 ; F16C-LABEL: test_half_rint:
1003 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
1004 ; F16C-NEXT: vmovd %eax, %xmm0
1005 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1006 ; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
1007 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1008 ; F16C-NEXT: vmovd %xmm0, %eax
1009 ; F16C-NEXT: movw %ax, (%rdi)
1012 ; FP16-LABEL: test_half_rint:
1014 ; FP16-NEXT: vrndscalesh $4, %xmm0, %xmm0, %xmm0
1015 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
1018 ; X64-LABEL: test_half_rint:
1020 ; X64-NEXT: pushq %rbx
1021 ; X64-NEXT: movq %rdi, %rbx
1022 ; X64-NEXT: callq __extendhfsf2@PLT
1023 ; X64-NEXT: callq rintf@PLT
1024 ; X64-NEXT: callq __truncsfhf2@PLT
1025 ; X64-NEXT: pextrw $0, %xmm0, %eax
1026 ; X64-NEXT: movw %ax, (%rbx)
1027 ; X64-NEXT: popq %rbx
1030 ; X86-LABEL: test_half_rint:
1032 ; X86-NEXT: pushl %esi
1033 ; X86-NEXT: subl $8, %esp
1034 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1035 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1036 ; X86-NEXT: pextrw $0, %xmm0, %eax
1037 ; X86-NEXT: movw %ax, (%esp)
1038 ; X86-NEXT: calll __extendhfsf2
1039 ; X86-NEXT: fstps (%esp)
1040 ; X86-NEXT: calll rintf
1041 ; X86-NEXT: fstps (%esp)
1042 ; X86-NEXT: calll __truncsfhf2
1043 ; X86-NEXT: pextrw $0, %xmm0, %eax
1044 ; X86-NEXT: movw %ax, (%esi)
1045 ; X86-NEXT: addl $8, %esp
1046 ; X86-NEXT: popl %esi
1048 %res = call half @llvm.rint.half(half %a0)
1049 store half %res, ptr %p0, align 2
1053 define void @test_half_sin(half %a0, ptr %p0) nounwind {
1054 ; F16C-LABEL: test_half_sin:
1056 ; F16C-NEXT: pushq %rbx
1057 ; F16C-NEXT: movq %rdi, %rbx
1058 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
1059 ; F16C-NEXT: vmovd %eax, %xmm0
1060 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1061 ; F16C-NEXT: callq sinf@PLT
1062 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1063 ; F16C-NEXT: vmovd %xmm0, %eax
1064 ; F16C-NEXT: movw %ax, (%rbx)
1065 ; F16C-NEXT: popq %rbx
1068 ; FP16-LABEL: test_half_sin:
1070 ; FP16-NEXT: pushq %rbx
1071 ; FP16-NEXT: movq %rdi, %rbx
1072 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
1073 ; FP16-NEXT: callq sinf@PLT
1074 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
1075 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
1076 ; FP16-NEXT: popq %rbx
1079 ; X64-LABEL: test_half_sin:
1081 ; X64-NEXT: pushq %rbx
1082 ; X64-NEXT: movq %rdi, %rbx
1083 ; X64-NEXT: callq __extendhfsf2@PLT
1084 ; X64-NEXT: callq sinf@PLT
1085 ; X64-NEXT: callq __truncsfhf2@PLT
1086 ; X64-NEXT: pextrw $0, %xmm0, %eax
1087 ; X64-NEXT: movw %ax, (%rbx)
1088 ; X64-NEXT: popq %rbx
1091 ; X86-LABEL: test_half_sin:
1093 ; X86-NEXT: pushl %esi
1094 ; X86-NEXT: subl $8, %esp
1095 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1096 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1097 ; X86-NEXT: pextrw $0, %xmm0, %eax
1098 ; X86-NEXT: movw %ax, (%esp)
1099 ; X86-NEXT: calll __extendhfsf2
1100 ; X86-NEXT: fstps (%esp)
1101 ; X86-NEXT: calll sinf
1102 ; X86-NEXT: fstps (%esp)
1103 ; X86-NEXT: calll __truncsfhf2
1104 ; X86-NEXT: pextrw $0, %xmm0, %eax
1105 ; X86-NEXT: movw %ax, (%esi)
1106 ; X86-NEXT: addl $8, %esp
1107 ; X86-NEXT: popl %esi
1109 %res = call half @llvm.sin.half(half %a0)
1110 store half %res, ptr %p0, align 2
1114 define void @test_half_sqrt(half %a0, ptr %p0) nounwind {
1115 ; F16C-LABEL: test_half_sqrt:
1117 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
1118 ; F16C-NEXT: vmovd %eax, %xmm0
1119 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1120 ; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
1121 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1122 ; F16C-NEXT: vmovd %xmm0, %eax
1123 ; F16C-NEXT: movw %ax, (%rdi)
1126 ; FP16-LABEL: test_half_sqrt:
1128 ; FP16-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0
1129 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
1132 ; X64-LABEL: test_half_sqrt:
1134 ; X64-NEXT: pushq %rbx
1135 ; X64-NEXT: movq %rdi, %rbx
1136 ; X64-NEXT: callq __extendhfsf2@PLT
1137 ; X64-NEXT: sqrtss %xmm0, %xmm0
1138 ; X64-NEXT: callq __truncsfhf2@PLT
1139 ; X64-NEXT: pextrw $0, %xmm0, %eax
1140 ; X64-NEXT: movw %ax, (%rbx)
1141 ; X64-NEXT: popq %rbx
1144 ; X86-LABEL: test_half_sqrt:
1146 ; X86-NEXT: pushl %esi
1147 ; X86-NEXT: subl $8, %esp
1148 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1149 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1150 ; X86-NEXT: pextrw $0, %xmm0, %eax
1151 ; X86-NEXT: movw %ax, (%esp)
1152 ; X86-NEXT: calll __extendhfsf2
1153 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
1154 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1155 ; X86-NEXT: sqrtss %xmm0, %xmm0
1156 ; X86-NEXT: movss %xmm0, (%esp)
1157 ; X86-NEXT: calll __truncsfhf2
1158 ; X86-NEXT: pextrw $0, %xmm0, %eax
1159 ; X86-NEXT: movw %ax, (%esi)
1160 ; X86-NEXT: addl $8, %esp
1161 ; X86-NEXT: popl %esi
1163 %res = call half @llvm.sqrt.half(half %a0)
1164 store half %res, ptr %p0, align 2
1168 define void @test_half_tan(half %a0, ptr %p0) nounwind {
1169 ; F16C-LABEL: test_half_tan:
1171 ; F16C-NEXT: pushq %rbx
1172 ; F16C-NEXT: movq %rdi, %rbx
1173 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
1174 ; F16C-NEXT: vmovd %eax, %xmm0
1175 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1176 ; F16C-NEXT: callq tanf@PLT
1177 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1178 ; F16C-NEXT: vmovd %xmm0, %eax
1179 ; F16C-NEXT: movw %ax, (%rbx)
1180 ; F16C-NEXT: popq %rbx
1183 ; FP16-LABEL: test_half_tan:
1185 ; FP16-NEXT: pushq %rbx
1186 ; FP16-NEXT: movq %rdi, %rbx
1187 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
1188 ; FP16-NEXT: callq tanf@PLT
1189 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
1190 ; FP16-NEXT: vmovsh %xmm0, (%rbx)
1191 ; FP16-NEXT: popq %rbx
1194 ; X64-LABEL: test_half_tan:
1196 ; X64-NEXT: pushq %rbx
1197 ; X64-NEXT: movq %rdi, %rbx
1198 ; X64-NEXT: callq __extendhfsf2@PLT
1199 ; X64-NEXT: callq tanf@PLT
1200 ; X64-NEXT: callq __truncsfhf2@PLT
1201 ; X64-NEXT: pextrw $0, %xmm0, %eax
1202 ; X64-NEXT: movw %ax, (%rbx)
1203 ; X64-NEXT: popq %rbx
1206 ; X86-LABEL: test_half_tan:
1208 ; X86-NEXT: pushl %esi
1209 ; X86-NEXT: subl $8, %esp
1210 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1211 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1212 ; X86-NEXT: pextrw $0, %xmm0, %eax
1213 ; X86-NEXT: movw %ax, (%esp)
1214 ; X86-NEXT: calll __extendhfsf2
1215 ; X86-NEXT: fstps (%esp)
1216 ; X86-NEXT: calll tanf
1217 ; X86-NEXT: fstps (%esp)
1218 ; X86-NEXT: calll __truncsfhf2
1219 ; X86-NEXT: pextrw $0, %xmm0, %eax
1220 ; X86-NEXT: movw %ax, (%esi)
1221 ; X86-NEXT: addl $8, %esp
1222 ; X86-NEXT: popl %esi
1224 %res = call half @llvm.tan.half(half %a0)
1225 store half %res, ptr %p0, align 2
1229 define void @test_half_trunc(half %a0, ptr %p0) nounwind {
1230 ; F16C-LABEL: test_half_trunc:
1232 ; F16C-NEXT: vpextrw $0, %xmm0, %eax
1233 ; F16C-NEXT: vmovd %eax, %xmm0
1234 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
1235 ; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
1236 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1237 ; F16C-NEXT: vmovd %xmm0, %eax
1238 ; F16C-NEXT: movw %ax, (%rdi)
1241 ; FP16-LABEL: test_half_trunc:
1243 ; FP16-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0
1244 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
1247 ; X64-LABEL: test_half_trunc:
1249 ; X64-NEXT: pushq %rbx
1250 ; X64-NEXT: movq %rdi, %rbx
1251 ; X64-NEXT: callq __extendhfsf2@PLT
1252 ; X64-NEXT: callq truncf@PLT
1253 ; X64-NEXT: callq __truncsfhf2@PLT
1254 ; X64-NEXT: pextrw $0, %xmm0, %eax
1255 ; X64-NEXT: movw %ax, (%rbx)
1256 ; X64-NEXT: popq %rbx
1259 ; X86-LABEL: test_half_trunc:
1261 ; X86-NEXT: pushl %esi
1262 ; X86-NEXT: subl $8, %esp
1263 ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1264 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1265 ; X86-NEXT: pextrw $0, %xmm0, %eax
1266 ; X86-NEXT: movw %ax, (%esp)
1267 ; X86-NEXT: calll __extendhfsf2
1268 ; X86-NEXT: fstps (%esp)
1269 ; X86-NEXT: calll truncf
1270 ; X86-NEXT: fstps (%esp)
1271 ; X86-NEXT: calll __truncsfhf2
1272 ; X86-NEXT: pextrw $0, %xmm0, %eax
1273 ; X86-NEXT: movw %ax, (%esi)
1274 ; X86-NEXT: addl $8, %esp
1275 ; X86-NEXT: popl %esi
1277 %res = call half @llvm.trunc.half(half %a0)
1278 store half %res, ptr %p0, align 2