4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 /* All Rights Reserved */
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
34 * Copyright (c) 2009, Intel Corporation.
35 * All rights reserved.
38 #include <sys/asm_linkage.h>
39 #include <sys/asm_misc.h>
40 #include <sys/regset.h>
41 #include <sys/privregs.h>
42 #include <sys/x86_archext.h>
45 #include <sys/types.h>
54 fpu_initial_probe
(void
)
60 * Returns zero if x87 "chip" is present(!)
62 ENTRY_NP
(fpu_initial_probe
)
68 SET_SIZE
(fpu_initial_probe
)
76 fxsave_insn
(struct fxsave_state
*fx
)
103 * If (num1/num2 > num1/num3) the FPU has the FDIV bug.
109 fpu_probe_pentium_fdivbug
(void
)
114 ENTRY_NP
(fpu_probe_pentium_fdivbug
)
131 .num1: .4byte 0xbce4217d /* 4.999999 */
133 .num2: .4byte 0x0 /* 15.0 */
135 .num3: .4byte 0xde7210bf /* 14.999999 */
137 SET_SIZE
(fpu_probe_pentium_fdivbug
)
142 * To cope with processors that do not implement fxsave/fxrstor
143 * instructions, patch hot paths in the kernel to use them only
144 * when that feature has been detected.
166 / frstor
(%ebx
);
nop -> fxrstor
(%ebx
)
168 _HOT_PATCH
(_fxrstor_ebx_insn
, _patch_fxrstor_ebx
, 3)
170 / lock; xorl $
0, (%esp
) -> sfence; ret
172 _HOT_PATCH
(_sfence_ret_insn
, _patch_sfence_ret
, 4)
175 _fxrstor_ebx_insn
: / see ndptrap_frstor
()
177 _ldmxcsr_ebx_insn
: / see resume_from_zombie
()
179 _sfence_ret_insn
: / see membar_producer
()
180 .byte 0xf, 0xae, 0xf8 / [sfence instruction]
187 / lock; xorl $
0, (%esp
) -> lfence; ret
189 _HOT_PATCH
(_lfence_ret_insn
, _patch_lfence_ret
, 4)
192 _lfence_ret_insn
: / see membar_consumer
()
193 .byte 0xf, 0xae, 0xe8 / [lfence instruction]
198 * Patch lazy fp restore instructions in the trap handler
199 * to use xrstor instead of frstor
201 ENTRY_NP
(patch_xsave
)
204 / frstor
(%ebx
);
nop -> xrstor
(%ebx
)
206 _HOT_PATCH
(_xrstor_ebx_insn
, _patch_xrstor_ebx
, 3)
209 _xrstor_ebx_insn
: / see ndptrap_frstor
()
211 .byte 0x0f, 0xae, 0x2b
212 SET_SIZE
(patch_xsave
)
227 * Patch lazy fp restore instructions in the trap handler
228 * to use xrstor instead of fxrstorq
230 ENTRY_NP
(patch_xsave
)
235 / FXRSTORQ
(%rbx
);
-> xrstor
(%rbx
)
236 / hot_patch
(_xrstor_rbx_insn
, _patch_xrstorq_rbx
, 4)
238 leaq _patch_xrstorq_rbx
(%rip
), %rbx
239 leaq _xrstor_rbx_insn
(%rip
), %rbp
242 movq
%rbx
, %rdi
/* patch address */
243 movzbq
(%rbp
), %rsi
/* instruction byte */
244 movq $
1, %rdx
/* count */
245 call hot_patch_kernel_text
256 _xrstor_rbx_insn
: / see ndptrap_frstor
()
257 #rex.W=1 (.byte 0x48)
259 .byte 0x48, 0x0f, 0xae, 0x2b
260 SET_SIZE
(patch_xsave
)
266 * One of these routines is called from any lwp with floating
267 * point context as part of the prolog of a context switch.
274 xsave_ctxt
(void
*arg
)
279 fpxsave_ctxt
(void
*arg
)
284 fpnsave_ctxt
(void
*arg
)
291 ENTRY_NP
(fpxsave_ctxt
)
292 cmpl $FPU_EN
, FPU_CTX_FPU_FLAGS
(%rdi
)
295 movl $_CONST
(FPU_VALID|FPU_EN
), FPU_CTX_FPU_FLAGS
(%rdi
)
296 FXSAVEQ
(FPU_CTX_FPU_REGS
(%rdi
))
299 * On certain AMD processors, the "exception pointers" i.e. the last
300 * instruction pointer, last data pointer, and last opcode
301 * are saved by the fxsave instruction ONLY if the exception summary
304 * To ensure that we don't leak these values into the next context
305 * on the cpu, we could just issue an fninit here, but that's
306 * rather slow and so we issue an instruction sequence that
307 * clears them more quickly, if a little obscurely.
309 btw $
7, FXSAVE_STATE_FSW
(%rdi
) /* Test saved ES bit */
310 jnc
0f
/* jump if ES = 0 */
311 fnclex
/* clear pending x87 exceptions */
312 0: ffree
%st(7) /* clear tag bit to remove possible stack overflow */
313 fildl
.fpzero_const(%rip)
314 /* dummy load changes all exception pointers */
315 STTS
(%rsi
) /* trap on next fpu touch */
316 1: rep; ret
/* use 2 byte return instruction when branch target */
317 /* AMD Software Optimization Guide - Section 6.2 */
318 SET_SIZE
(fpxsave_ctxt
)
321 cmpl $FPU_EN
, FPU_CTX_FPU_FLAGS
(%rdi
)
323 movl $_CONST
(FPU_VALID|FPU_EN
), FPU_CTX_FPU_FLAGS
(%rdi
)
325 * Setup xsave flags in EDX:EAX
327 movl FPU_CTX_FPU_XSAVE_MASK
(%rdi
), %eax
328 movl FPU_CTX_FPU_XSAVE_MASK+
4(%rdi
), %edx
329 leaq FPU_CTX_FPU_REGS
(%rdi
), %rsi
331 .byte 0x0f, 0xae, 0x26
334 * (see notes above about "exception pointers")
335 * TODO: does it apply to any machine that uses xsave?
337 btw $
7, FXSAVE_STATE_FSW
(%rdi
) /* Test saved ES bit */
338 jnc
0f
/* jump if ES = 0 */
339 fnclex
/* clear pending x87 exceptions */
340 0: ffree
%st(7) /* clear tag bit to remove possible stack overflow */
341 fildl
.fpzero_const(%rip)
342 /* dummy load changes all exception pointers */
343 STTS
(%rsi
) /* trap on next fpu touch */
347 #elif defined(__i386)
349 ENTRY_NP
(fpnsave_ctxt
)
350 movl
4(%esp
), %eax
/* a struct fpu_ctx */
351 cmpl $FPU_EN
, FPU_CTX_FPU_FLAGS
(%eax
)
354 movl $_CONST
(FPU_VALID|FPU_EN
), FPU_CTX_FPU_FLAGS
(%eax
)
355 fnsave FPU_CTX_FPU_REGS
(%eax
)
356 /* (fnsave also reinitializes x87 state) */
357 STTS
(%edx
) /* trap on next fpu touch */
358 1: rep; ret
/* use 2 byte return instruction when branch target */
359 /* AMD Software Optimization Guide - Section 6.2 */
360 SET_SIZE
(fpnsave_ctxt
)
362 ENTRY_NP
(fpxsave_ctxt
)
363 movl
4(%esp
), %eax
/* a struct fpu_ctx */
364 cmpl $FPU_EN
, FPU_CTX_FPU_FLAGS
(%eax
)
367 movl $_CONST
(FPU_VALID|FPU_EN
), FPU_CTX_FPU_FLAGS
(%eax
)
368 fxsave FPU_CTX_FPU_REGS
(%eax
)
369 /* (see notes above about "exception pointers") */
370 btw $
7, FXSAVE_STATE_FSW
(%eax
) /* Test saved ES bit */
371 jnc
0f
/* jump if ES = 0 */
372 fnclex
/* clear pending x87 exceptions */
373 0: ffree
%st(7) /* clear tag bit to remove possible stack overflow */
375 /* dummy load changes all exception pointers */
376 STTS
(%edx
) /* trap on next fpu touch */
377 1: rep; ret
/* use 2 byte return instruction when branch target */
378 /* AMD Software Optimization Guide - Section 6.2 */
379 SET_SIZE
(fpxsave_ctxt
)
382 movl
4(%esp
), %ecx
/* a struct fpu_ctx */
383 cmpl $FPU_EN
, FPU_CTX_FPU_FLAGS
(%ecx
)
386 movl $_CONST
(FPU_VALID|FPU_EN
), FPU_CTX_FPU_FLAGS
(%ecx
)
387 movl FPU_CTX_FPU_XSAVE_MASK
(%ecx
), %eax
388 movl FPU_CTX_FPU_XSAVE_MASK+
4(%ecx
), %edx
389 leal FPU_CTX_FPU_REGS
(%ecx
), %ecx
391 .byte 0x0f, 0xae, 0x21
394 * (see notes above about "exception pointers")
395 * TODO: does it apply to any machine that uses xsave?
397 btw $
7, FXSAVE_STATE_FSW
(%ecx
) /* Test saved ES bit */
398 jnc
0f
/* jump if ES = 0 */
399 fnclex
/* clear pending x87 exceptions */
400 0: ffree
%st(7) /* clear tag bit to remove possible stack overflow */
402 /* dummy load changes all exception pointers */
403 STTS
(%edx
) /* trap on next fpu touch */
421 fpsave
(struct fnsave_state
*f
)
426 fpxsave
(struct fxsave_state
*f
)
431 xsave
(struct xsave_state
*f
, uint64_t m
)
441 fninit
/* clear exceptions, init x87 tags */
442 STTS
(%rdi
) /* set TS bit in %cr0 (disable FPU) */
448 movl
%esi
, %eax
/* bv mask */
452 .byte 0x0f, 0xae, 0x27
454 fninit
/* clear exceptions, init x87 tags */
455 STTS
(%rdi
) /* set TS bit in %cr0 (disable FPU) */
459 #elif defined(__i386)
465 STTS
(%eax
) /* set TS bit in %cr0 (disable FPU) */
473 fninit
/* clear exceptions, init x87 tags */
474 STTS
(%eax
) /* set TS bit in %cr0 (disable FPU) */
484 .byte 0x0f, 0xae, 0x21
486 fninit
/* clear exceptions, init x87 tags */
487 STTS
(%eax
) /* set TS bit in %cr0 (disable FPU) */
498 fprestore
(struct fnsave_state
*f
)
503 fpxrestore
(struct fxsave_state
*f
)
508 xrestore
(struct xsave_state
*f
, uint64_t m
)
523 movl
%esi
, %eax
/* bv mask */
527 .byte 0x0f, 0xae, 0x2f
531 #elif defined(__i386)
553 .byte 0x0f, 0xae, 0x29
561 * Disable the floating point unit.
575 STTS
(%rdi
) /* set TS bit in %cr0 (disable FPU) */
579 #elif defined(__i386)
590 * Initialize the fpu hardware.
605 cmpl $FP_XSAVE
, fp_save_mech
609 leaq sse_initial
(%rip
), %rax
610 FXRSTORQ
((%rax
)) /* load clean initial state */
614 leaq avx_initial
(%rip
), %rcx
616 movl $XFEATURE_AVX
, %eax
617 bt $X86FSET_AVX
, x86_featureset
619 orl $
(XFEATURE_LEGACY_FP | XFEATURE_SSE
), %eax
621 .byte 0x0f, 0xae, 0x29 /* load clean initial state */
625 #elif defined(__i386)
629 cmpl $FP_FXSAVE
, fp_save_mech
631 cmpl $FP_XSAVE
, fp_save_mech
636 movl $x87_initial
, %eax
637 frstor
(%eax
) /* load clean initial state */
641 movl $sse_initial
, %eax
642 fxrstor
(%eax
) /* load clean initial state */
646 movl $avx_initial
, %ecx
648 movl $XFEATURE_AVX
, %eax
649 bt $X86FSET_AVX
, x86_featureset
651 orl $
(XFEATURE_LEGACY_FP | XFEATURE_SSE
), %eax
653 .byte 0x0f, 0xae, 0x29 /* load clean initial state */
661 * Clears FPU exception state.
662 * Returns the FP status word.
679 ENTRY_NP
(fperr_reset
)
685 SET_SIZE
(fperr_reset
)
687 ENTRY_NP
(fpxerr_reset
)
690 subq $
0x10, %rsp
/* make some temporary space */
694 andl $_BITNOT
(SSE_MXCSR_EFLAGS
), (%rsp
)
695 ldmxcsr
(%rsp
) /* clear processor exceptions */
698 SET_SIZE
(fpxerr_reset
)
700 #elif defined(__i386)
702 ENTRY_NP
(fperr_reset
)
708 SET_SIZE
(fperr_reset
)
710 ENTRY_NP
(fpxerr_reset
)
712 subl $
4, %esp
/* make some temporary space */
715 andl $_BITNOT
(SSE_MXCSR_EFLAGS
), (%esp
)
716 ldmxcsr
(%esp
) /* clear processor exceptions */
719 SET_SIZE
(fpxerr_reset
)
739 subq $
0x10, %rsp
/* make some temporary space */
741 fnstsw
(%rsp
) /* store the status word */
742 fnstcw
2(%rsp
) /* store the control word */
743 movl
(%rsp
), %eax
/* put both in %eax */
748 #elif defined(__i386)
752 subl $
4, %esp
/* make some temporary space */
753 fnstsw
(%esp
) /* store the status word */
754 fnstcw
2(%esp
) /* store the control word */
755 movl
(%esp
), %eax
/* put both in %eax */
764 * Returns the MXCSR register.
782 subq $
0x10, %rsp
/* make some temporary space */
790 #elif defined(__i386)
794 subl $
4, %esp
/* make some temporary space */