8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / uts / intel / ia32 / ml / float.s
blob5a8962c9ff64298b16acfcf6275138a122c1b499
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 /* All Rights Reserved */
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
34 * Copyright (c) 2009, Intel Corporation.
35 * All rights reserved.
38 #include <sys/asm_linkage.h>
39 #include <sys/asm_misc.h>
40 #include <sys/regset.h>
41 #include <sys/privregs.h>
42 #include <sys/x86_archext.h>
44 #if defined(__lint)
45 #include <sys/types.h>
46 #include <sys/fp.h>
47 #else
48 #include "assym.h"
49 #endif
51 #if defined(__lint)
53 uint_t
54 fpu_initial_probe(void)
55 { return (0); }
57 #else /* __lint */
60 * Returns zero if x87 "chip" is present(!)
62 ENTRY_NP(fpu_initial_probe)
63 CLTS
64 fninit
65 fnstsw %ax
66 movzbl %al, %eax
67 ret
68 SET_SIZE(fpu_initial_probe)
70 #endif /* __lint */
72 #if defined(__lint)
74 /*ARGSUSED*/
75 void
76 fxsave_insn(struct fxsave_state *fx)
79 #else /* __lint */
81 #if defined(__amd64)
83 ENTRY_NP(fxsave_insn)
84 FXSAVEQ ((%rdi))
85 ret
86 SET_SIZE(fxsave_insn)
88 #elif defined(__i386)
90 ENTRY_NP(fxsave_insn)
91 movl 4(%esp), %eax
92 fxsave (%eax)
93 ret
94 SET_SIZE(fxsave_insn)
96 #endif
98 #endif /* __lint */
100 #if defined(__i386)
103 * If (num1/num2 > num1/num3) the FPU has the FDIV bug.
106 #if defined(__lint)
109 fpu_probe_pentium_fdivbug(void)
110 { return (0); }
112 #else /* __lint */
114 ENTRY_NP(fpu_probe_pentium_fdivbug)
115 fldl .num1
116 fldl .num2
117 fdivr %st(1), %st
118 fxch %st(1)
119 fdivl .num3
120 fcompp
121 fstsw %ax
122 sahf
123 jae 0f
124 movl $1, %eax
127 0: xorl %eax, %eax
130 .align 4
131 .num1: .4byte 0xbce4217d /* 4.999999 */
132 .4byte 0x4013ffff
133 .num2: .4byte 0x0 /* 15.0 */
134 .4byte 0x402e0000
135 .num3: .4byte 0xde7210bf /* 14.999999 */
136 .4byte 0x402dffff
137 SET_SIZE(fpu_probe_pentium_fdivbug)
139 #endif /* __lint */
142 * To cope with processors that do not implement fxsave/fxrstor
143 * instructions, patch hot paths in the kernel to use them only
144 * when that feature has been detected.
147 #if defined(__lint)
149 void
150 patch_sse(void)
153 void
154 patch_sse2(void)
157 void
158 patch_xsave(void)
161 #else /* __lint */
163 ENTRY_NP(patch_sse)
164 _HOT_PATCH_PROLOG
166 / frstor (%ebx); nop -> fxrstor (%ebx)
168 _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3)
170 / lock; xorl $0, (%esp) -> sfence; ret
172 _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
173 _HOT_PATCH_EPILOG
175 _fxrstor_ebx_insn: / see ndptrap_frstor()
176 fxrstor (%ebx)
177 _ldmxcsr_ebx_insn: / see resume_from_zombie()
178 ldmxcsr (%ebx)
179 _sfence_ret_insn: / see membar_producer()
180 .byte 0xf, 0xae, 0xf8 / [sfence instruction]
182 SET_SIZE(patch_sse)
184 ENTRY_NP(patch_sse2)
185 _HOT_PATCH_PROLOG
187 / lock; xorl $0, (%esp) -> lfence; ret
189 _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
190 _HOT_PATCH_EPILOG
192 _lfence_ret_insn: / see membar_consumer()
193 .byte 0xf, 0xae, 0xe8 / [lfence instruction]
195 SET_SIZE(patch_sse2)
198 * Patch lazy fp restore instructions in the trap handler
199 * to use xrstor instead of frstor
201 ENTRY_NP(patch_xsave)
202 _HOT_PATCH_PROLOG
204 / frstor (%ebx); nop -> xrstor (%ebx)
206 _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3)
207 _HOT_PATCH_EPILOG
209 _xrstor_ebx_insn: / see ndptrap_frstor()
210 #xrstor (%ebx)
211 .byte 0x0f, 0xae, 0x2b
212 SET_SIZE(patch_xsave)
214 #endif /* __lint */
215 #endif /* __i386 */
217 #if defined(__amd64)
218 #if defined(__lint)
220 void
221 patch_xsave(void)
224 #else /* __lint */
227 * Patch lazy fp restore instructions in the trap handler
228 * to use xrstor instead of fxrstorq
230 ENTRY_NP(patch_xsave)
231 pushq %rbx
232 pushq %rbp
233 pushq %r15
235 / FXRSTORQ (%rbx); -> xrstor (%rbx)
236 / hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4)
238 leaq _patch_xrstorq_rbx(%rip), %rbx
239 leaq _xrstor_rbx_insn(%rip), %rbp
240 movq $4, %r15
242 movq %rbx, %rdi /* patch address */
243 movzbq (%rbp), %rsi /* instruction byte */
244 movq $1, %rdx /* count */
245 call hot_patch_kernel_text
246 addq $1, %rbx
247 addq $1, %rbp
248 subq $1, %r15
249 jnz 1b
251 popq %r15
252 popq %rbp
253 popq %rbx
256 _xrstor_rbx_insn: / see ndptrap_frstor()
257 #rex.W=1 (.byte 0x48)
258 #xrstor (%rbx)
259 .byte 0x48, 0x0f, 0xae, 0x2b
260 SET_SIZE(patch_xsave)
262 #endif /* __lint */
263 #endif /* __amd64 */
266 * One of these routines is called from any lwp with floating
267 * point context as part of the prolog of a context switch.
270 #if defined(__lint)
272 /*ARGSUSED*/
273 void
274 xsave_ctxt(void *arg)
277 /*ARGSUSED*/
278 void
279 fpxsave_ctxt(void *arg)
282 /*ARGSUSED*/
283 void
284 fpnsave_ctxt(void *arg)
287 #else /* __lint */
289 #if defined(__amd64)
291 ENTRY_NP(fpxsave_ctxt)
292 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
293 jne 1f
295 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
296 FXSAVEQ (FPU_CTX_FPU_REGS(%rdi))
299 * On certain AMD processors, the "exception pointers" i.e. the last
300 * instruction pointer, last data pointer, and last opcode
301 * are saved by the fxsave instruction ONLY if the exception summary
302 * bit is set.
304 * To ensure that we don't leak these values into the next context
305 * on the cpu, we could just issue an fninit here, but that's
306 * rather slow and so we issue an instruction sequence that
307 * clears them more quickly, if a little obscurely.
309 btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
310 jnc 0f /* jump if ES = 0 */
311 fnclex /* clear pending x87 exceptions */
312 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
313 fildl .fpzero_const(%rip)
314 /* dummy load changes all exception pointers */
315 STTS(%rsi) /* trap on next fpu touch */
316 1: rep; ret /* use 2 byte return instruction when branch target */
317 /* AMD Software Optimization Guide - Section 6.2 */
318 SET_SIZE(fpxsave_ctxt)
320 ENTRY_NP(xsave_ctxt)
321 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
322 jne 1f
323 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
325 * Setup xsave flags in EDX:EAX
327 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
328 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
329 leaq FPU_CTX_FPU_REGS(%rdi), %rsi
330 #xsave (%rsi)
331 .byte 0x0f, 0xae, 0x26
334 * (see notes above about "exception pointers")
335 * TODO: does it apply to any machine that uses xsave?
337 btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
338 jnc 0f /* jump if ES = 0 */
339 fnclex /* clear pending x87 exceptions */
340 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
341 fildl .fpzero_const(%rip)
342 /* dummy load changes all exception pointers */
343 STTS(%rsi) /* trap on next fpu touch */
344 1: ret
345 SET_SIZE(xsave_ctxt)
347 #elif defined(__i386)
349 ENTRY_NP(fpnsave_ctxt)
350 movl 4(%esp), %eax /* a struct fpu_ctx */
351 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
352 jne 1f
354 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
355 fnsave FPU_CTX_FPU_REGS(%eax)
356 /* (fnsave also reinitializes x87 state) */
357 STTS(%edx) /* trap on next fpu touch */
358 1: rep; ret /* use 2 byte return instruction when branch target */
359 /* AMD Software Optimization Guide - Section 6.2 */
360 SET_SIZE(fpnsave_ctxt)
362 ENTRY_NP(fpxsave_ctxt)
363 movl 4(%esp), %eax /* a struct fpu_ctx */
364 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
365 jne 1f
367 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
368 fxsave FPU_CTX_FPU_REGS(%eax)
369 /* (see notes above about "exception pointers") */
370 btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */
371 jnc 0f /* jump if ES = 0 */
372 fnclex /* clear pending x87 exceptions */
373 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
374 fildl .fpzero_const
375 /* dummy load changes all exception pointers */
376 STTS(%edx) /* trap on next fpu touch */
377 1: rep; ret /* use 2 byte return instruction when branch target */
378 /* AMD Software Optimization Guide - Section 6.2 */
379 SET_SIZE(fpxsave_ctxt)
381 ENTRY_NP(xsave_ctxt)
382 movl 4(%esp), %ecx /* a struct fpu_ctx */
383 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
384 jne 1f
386 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
387 movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
388 movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
389 leal FPU_CTX_FPU_REGS(%ecx), %ecx
390 #xsave (%ecx)
391 .byte 0x0f, 0xae, 0x21
394 * (see notes above about "exception pointers")
395 * TODO: does it apply to any machine that uses xsave?
397 btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */
398 jnc 0f /* jump if ES = 0 */
399 fnclex /* clear pending x87 exceptions */
400 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
401 fildl .fpzero_const
402 /* dummy load changes all exception pointers */
403 STTS(%edx) /* trap on next fpu touch */
404 1: ret
405 SET_SIZE(xsave_ctxt)
407 #endif /* __i386 */
409 .align 8
410 .fpzero_const:
411 .4byte 0x0
412 .4byte 0x0
414 #endif /* __lint */
417 #if defined(__lint)
419 /*ARGSUSED*/
420 void
421 fpsave(struct fnsave_state *f)
424 /*ARGSUSED*/
425 void
426 fpxsave(struct fxsave_state *f)
429 /*ARGSUSED*/
430 void
431 xsave(struct xsave_state *f, uint64_t m)
434 #else /* __lint */
436 #if defined(__amd64)
438 ENTRY_NP(fpxsave)
439 CLTS
440 FXSAVEQ ((%rdi))
441 fninit /* clear exceptions, init x87 tags */
442 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
444 SET_SIZE(fpxsave)
446 ENTRY_NP(xsave)
447 CLTS
448 movl %esi, %eax /* bv mask */
449 movq %rsi, %rdx
450 shrq $32, %rdx
451 #xsave (%rdi)
452 .byte 0x0f, 0xae, 0x27
454 fninit /* clear exceptions, init x87 tags */
455 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
457 SET_SIZE(xsave)
459 #elif defined(__i386)
461 ENTRY_NP(fpsave)
462 CLTS
463 movl 4(%esp), %eax
464 fnsave (%eax)
465 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
467 SET_SIZE(fpsave)
469 ENTRY_NP(fpxsave)
470 CLTS
471 movl 4(%esp), %eax
472 fxsave (%eax)
473 fninit /* clear exceptions, init x87 tags */
474 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
476 SET_SIZE(fpxsave)
478 ENTRY_NP(xsave)
479 CLTS
480 movl 4(%esp), %ecx
481 movl 8(%esp), %eax
482 movl 12(%esp), %edx
483 #xsave (%ecx)
484 .byte 0x0f, 0xae, 0x21
486 fninit /* clear exceptions, init x87 tags */
487 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
489 SET_SIZE(xsave)
491 #endif /* __i386 */
492 #endif /* __lint */
494 #if defined(__lint)
496 /*ARGSUSED*/
497 void
498 fprestore(struct fnsave_state *f)
501 /*ARGSUSED*/
502 void
503 fpxrestore(struct fxsave_state *f)
506 /*ARGSUSED*/
507 void
508 xrestore(struct xsave_state *f, uint64_t m)
511 #else /* __lint */
513 #if defined(__amd64)
515 ENTRY_NP(fpxrestore)
516 CLTS
517 FXRSTORQ ((%rdi))
519 SET_SIZE(fpxrestore)
521 ENTRY_NP(xrestore)
522 CLTS
523 movl %esi, %eax /* bv mask */
524 movq %rsi, %rdx
525 shrq $32, %rdx
526 #xrstor (%rdi)
527 .byte 0x0f, 0xae, 0x2f
529 SET_SIZE(xrestore)
531 #elif defined(__i386)
533 ENTRY_NP(fprestore)
534 CLTS
535 movl 4(%esp), %eax
536 frstor (%eax)
538 SET_SIZE(fprestore)
540 ENTRY_NP(fpxrestore)
541 CLTS
542 movl 4(%esp), %eax
543 fxrstor (%eax)
545 SET_SIZE(fpxrestore)
547 ENTRY_NP(xrestore)
548 CLTS
549 movl 4(%esp), %ecx
550 movl 8(%esp), %eax
551 movl 12(%esp), %edx
552 #xrstor (%ecx)
553 .byte 0x0f, 0xae, 0x29
555 SET_SIZE(xrestore)
557 #endif /* __i386 */
558 #endif /* __lint */
561 * Disable the floating point unit.
564 #if defined(__lint)
566 void
567 fpdisable(void)
570 #else /* __lint */
572 #if defined(__amd64)
574 ENTRY_NP(fpdisable)
575 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
577 SET_SIZE(fpdisable)
579 #elif defined(__i386)
581 ENTRY_NP(fpdisable)
582 STTS(%eax)
584 SET_SIZE(fpdisable)
586 #endif /* __i386 */
587 #endif /* __lint */
590 * Initialize the fpu hardware.
593 #if defined(__lint)
595 void
596 fpinit(void)
599 #else /* __lint */
601 #if defined(__amd64)
603 ENTRY_NP(fpinit)
604 CLTS
605 cmpl $FP_XSAVE, fp_save_mech
606 je 1f
608 /* fxsave */
609 leaq sse_initial(%rip), %rax
610 FXRSTORQ ((%rax)) /* load clean initial state */
613 1: /* xsave */
614 leaq avx_initial(%rip), %rcx
615 xorl %edx, %edx
616 movl $XFEATURE_AVX, %eax
617 bt $X86FSET_AVX, x86_featureset
618 cmovael %edx, %eax
619 orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
620 /* xrstor (%rcx) */
621 .byte 0x0f, 0xae, 0x29 /* load clean initial state */
623 SET_SIZE(fpinit)
625 #elif defined(__i386)
627 ENTRY_NP(fpinit)
628 CLTS
629 cmpl $FP_FXSAVE, fp_save_mech
630 je 1f
631 cmpl $FP_XSAVE, fp_save_mech
632 je 2f
634 /* fnsave */
635 fninit
636 movl $x87_initial, %eax
637 frstor (%eax) /* load clean initial state */
640 1: /* fxsave */
641 movl $sse_initial, %eax
642 fxrstor (%eax) /* load clean initial state */
645 2: /* xsave */
646 movl $avx_initial, %ecx
647 xorl %edx, %edx
648 movl $XFEATURE_AVX, %eax
649 bt $X86FSET_AVX, x86_featureset
650 cmovael %edx, %eax
651 orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
652 /* xrstor (%ecx) */
653 .byte 0x0f, 0xae, 0x29 /* load clean initial state */
655 SET_SIZE(fpinit)
657 #endif /* __i386 */
658 #endif /* __lint */
661 * Clears FPU exception state.
662 * Returns the FP status word.
665 #if defined(__lint)
667 uint32_t
668 fperr_reset(void)
669 { return (0); }
671 uint32_t
672 fpxerr_reset(void)
673 { return (0); }
675 #else /* __lint */
677 #if defined(__amd64)
679 ENTRY_NP(fperr_reset)
680 CLTS
681 xorl %eax, %eax
682 fnstsw %ax
683 fnclex
685 SET_SIZE(fperr_reset)
687 ENTRY_NP(fpxerr_reset)
688 pushq %rbp
689 movq %rsp, %rbp
690 subq $0x10, %rsp /* make some temporary space */
691 CLTS
692 stmxcsr (%rsp)
693 movl (%rsp), %eax
694 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
695 ldmxcsr (%rsp) /* clear processor exceptions */
696 leave
698 SET_SIZE(fpxerr_reset)
700 #elif defined(__i386)
702 ENTRY_NP(fperr_reset)
703 CLTS
704 xorl %eax, %eax
705 fnstsw %ax
706 fnclex
708 SET_SIZE(fperr_reset)
710 ENTRY_NP(fpxerr_reset)
711 CLTS
712 subl $4, %esp /* make some temporary space */
713 stmxcsr (%esp)
714 movl (%esp), %eax
715 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
716 ldmxcsr (%esp) /* clear processor exceptions */
717 addl $4, %esp
719 SET_SIZE(fpxerr_reset)
721 #endif /* __i386 */
722 #endif /* __lint */
724 #if defined(__lint)
726 uint32_t
727 fpgetcwsw(void)
729 return (0);
732 #else /* __lint */
734 #if defined(__amd64)
736 ENTRY_NP(fpgetcwsw)
737 pushq %rbp
738 movq %rsp, %rbp
739 subq $0x10, %rsp /* make some temporary space */
740 CLTS
741 fnstsw (%rsp) /* store the status word */
742 fnstcw 2(%rsp) /* store the control word */
743 movl (%rsp), %eax /* put both in %eax */
744 leave
746 SET_SIZE(fpgetcwsw)
748 #elif defined(__i386)
750 ENTRY_NP(fpgetcwsw)
751 CLTS
752 subl $4, %esp /* make some temporary space */
753 fnstsw (%esp) /* store the status word */
754 fnstcw 2(%esp) /* store the control word */
755 movl (%esp), %eax /* put both in %eax */
756 addl $4, %esp
758 SET_SIZE(fpgetcwsw)
760 #endif /* __i386 */
761 #endif /* __lint */
764 * Returns the MXCSR register.
767 #if defined(__lint)
769 uint32_t
770 fpgetmxcsr(void)
772 return (0);
775 #else /* __lint */
777 #if defined(__amd64)
779 ENTRY_NP(fpgetmxcsr)
780 pushq %rbp
781 movq %rsp, %rbp
782 subq $0x10, %rsp /* make some temporary space */
783 CLTS
784 stmxcsr (%rsp)
785 movl (%rsp), %eax
786 leave
788 SET_SIZE(fpgetmxcsr)
790 #elif defined(__i386)
792 ENTRY_NP(fpgetmxcsr)
793 CLTS
794 subl $4, %esp /* make some temporary space */
795 stmxcsr (%esp)
796 movl (%esp), %eax
797 addl $4, %esp
799 SET_SIZE(fpgetmxcsr)
801 #endif /* __i386 */
802 #endif /* __lint */