8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / uts / i86pc / ml / locore.s
blob042818844dfbeb36d66366abedcc8b1f65a8c0fc
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
29 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
31 /* All Rights Reserved */
33 /* Copyright (c) 1987, 1988 Microsoft Corporation */
34 /* All Rights Reserved */
37 #include <sys/asm_linkage.h>
38 #include <sys/asm_misc.h>
39 #include <sys/regset.h>
40 #include <sys/privregs.h>
41 #include <sys/psw.h>
42 #include <sys/reboot.h>
43 #include <sys/x86_archext.h>
44 #include <sys/machparam.h>
46 #if defined(__lint)
48 #include <sys/types.h>
49 #include <sys/thread.h>
50 #include <sys/systm.h>
51 #include <sys/lgrp.h>
52 #include <sys/regset.h>
53 #include <sys/link.h>
54 #include <sys/bootconf.h>
55 #include <sys/bootsvcs.h>
57 #else /* __lint */
59 #include <sys/segments.h>
60 #include <sys/pcb.h>
61 #include <sys/trap.h>
62 #include <sys/ftrace.h>
63 #include <sys/traptrace.h>
64 #include <sys/clock.h>
65 #include <sys/cmn_err.h>
66 #include <sys/pit.h>
67 #include <sys/panic.h>
69 #if defined(__xpv)
70 #include <sys/hypervisor.h>
71 #endif
73 #include "assym.h"
76 * Our assumptions:
77 * - We are running in protected-paged mode.
78 * - Interrupts are disabled.
79 * - The GDT and IDT are the callers; we need our copies.
80 * - The kernel's text, initialized data and bss are mapped.
82 * Our actions:
83 * - Save arguments
84 * - Initialize our stack pointer to the thread 0 stack (t0stack)
85 * and leave room for a phony "struct regs".
86 * - Our GDT and IDT need to get munged.
87 * - Since we are using the boot's GDT descriptors, we need
88 * to copy them into our GDT before we switch to ours.
89 * - We start using our GDT by loading correct values in the
90 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
91 * gs=KGS_SEL).
92 * - The default LDT entry for syscall is set.
93 * - We load the default LDT into the hardware LDT register.
94 * - We load the default TSS into the hardware task register.
95 * - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
96 * - mlsetup(%esp) gets called.
97 * - We change our appearance to look like the real thread 0.
98 * (NOTE: making ourselves to be a real thread may be a noop)
99 * - main() gets called. (NOTE: main() never returns).
101 * NOW, the real code!
104 * The very first thing in the kernel's text segment must be a jump
105 * to the os/fakebop.c startup code.
107 .text
108 jmp _start
111 * Globals:
113 .globl _locore_start
114 .globl mlsetup
115 .globl main
116 .globl panic
117 .globl t0stack
118 .globl t0
119 .globl sysp
120 .globl edata
123 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
125 .globl bootops
126 .globl bootopsp
129 * NOTE: t0stack should be the first thing in the data section so that
130 * if it ever overflows, it will fault on the last kernel text page.
132 .data
133 .comm t0stack, DEFAULTSTKSZ, 32
134 .comm t0, 4094, 32
136 #endif /* __lint */
139 #if defined(__amd64)
141 #if defined(__lint)
143 /* ARGSUSED */
144 void
145 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
148 #else /* __lint */
151 * kobj_init() vectors us back to here with (note) a slightly different
152 * set of arguments than _start is given (see lint prototypes above).
154 * XXX Make this less vile, please.
156 ENTRY_NP(_locore_start)
159 * %rdi = boot services (should die someday)
160 * %rdx = bootops
161 * end
164 leaq edata(%rip), %rbp /* reference edata for ksyms */
165 movq $0, (%rbp) /* limit stack back trace */
168 * Initialize our stack pointer to the thread 0 stack (t0stack)
169 * and leave room for a "struct regs" for lwp0. Note that the
170 * stack doesn't actually align to a 16-byte boundary until just
171 * before we call mlsetup because we want to use %rsp to point at
172 * our regs structure.
174 leaq t0stack(%rip), %rsp
175 addq $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
176 #if (REGSIZE & 15) == 0
177 subq $8, %rsp
178 #endif
180 * Save call back for special x86 boot services vector
182 movq %rdi, sysp(%rip)
184 movq %rdx, bootops(%rip) /* save bootops */
185 movq $bootops, bootopsp(%rip)
188 * Save arguments and flags, if only for debugging ..
190 movq %rdi, REGOFF_RDI(%rsp)
191 movq %rsi, REGOFF_RSI(%rsp)
192 movq %rdx, REGOFF_RDX(%rsp)
193 movq %rcx, REGOFF_RCX(%rsp)
194 movq %r8, REGOFF_R8(%rsp)
195 movq %r9, REGOFF_R9(%rsp)
196 pushf
197 popq %r11
198 movq %r11, REGOFF_RFL(%rsp)
200 #if !defined(__xpv)
202 * Enable write protect and alignment check faults.
204 movq %cr0, %rax
205 orq $_CONST(CR0_WP|CR0_AM), %rax
206 andq $_BITNOT(CR0_WT|CR0_CE), %rax
207 movq %rax, %cr0
208 #endif /* __xpv */
211 * (We just assert this works by virtue of being here)
213 bts $X86FSET_CPUID, x86_featureset(%rip)
216 * mlsetup() gets called with a struct regs as argument, while
217 * main takes no args and should never return.
219 xorl %ebp, %ebp
220 movq %rsp, %rdi
221 pushq %rbp
222 /* (stack pointer now aligned on 16-byte boundary right here) */
223 movq %rsp, %rbp
224 call mlsetup
225 call main
226 /* NOTREACHED */
227 leaq __return_from_main(%rip), %rdi
228 xorl %eax, %eax
229 call panic
230 SET_SIZE(_locore_start)
232 #endif /* __amd64 */
233 #endif /* __lint */
235 #if !defined(__lint)
237 __return_from_main:
238 .string "main() returned"
239 __unsupported_cpu:
240 .string "486 style cpu detected - no longer supported!"
242 #endif /* !__lint */
244 #if !defined(__amd64)
246 #if defined(__lint)
248 /* ARGSUSED */
249 void
250 _locore_start(struct boot_syscalls *sysp, struct bootops *bop)
253 #else /* __lint */
256 * kobj_init() vectors us back to here with (note) a slightly different
257 * set of arguments than _start is given (see lint prototypes above).
259 * XXX Make this less vile, please.
261 ENTRY_NP(_locore_start)
264 * %ecx = boot services (should die someday)
265 * %ebx = bootops
267 mov $edata, %ebp / edata needs to be defined for ksyms
268 movl $0, (%ebp) / limit stack back trace
271 * Initialize our stack pointer to the thread 0 stack (t0stack)
272 * and leave room for a phony "struct regs".
274 movl $t0stack + DEFAULTSTKSZ - REGSIZE, %esp
277 * Save call back for special x86 boot services vector
279 mov %ecx, sysp / save call back for boot services
281 mov %ebx, bootops / save bootops
282 movl $bootops, bootopsp
286 * Save all registers and flags
288 pushal
289 pushfl
291 #if !defined(__xpv)
293 * Override bios settings and enable write protect and
294 * alignment check faults.
296 movl %cr0, %eax
299 * enable WP for detecting faults, and enable alignment checking.
301 orl $_CONST(CR0_WP|CR0_AM), %eax
302 andl $_BITNOT(CR0_WT|CR0_CE), %eax
303 movl %eax, %cr0 / set the cr0 register correctly and
304 / override the BIOS setup
307 * If bit 21 of eflags can be flipped, then cpuid is present
308 * and enabled.
310 pushfl
311 popl %ecx
312 movl %ecx, %eax
313 xorl $PS_ID, %eax / try complemented bit
314 pushl %eax
315 popfl
316 pushfl
317 popl %eax
318 cmpl %eax, %ecx
319 jne have_cpuid
322 * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
323 * div does not modify the cc flags on Cyrix, even though this may
324 * also be true for other vendors, this is generally true only for
325 * newer models from those vendors that support and do not disable
326 * cpuid (usually because cpuid cannot be disabled)
330 * clear cc flags
332 xorb %ah, %ah
333 sahf
336 * perform 5/2 test
338 movw $5, %ax
339 movb $2, %bl
340 divb %bl
342 lahf
343 cmpb $2, %ah
344 jne cpu_486
347 * div did not modify the cc flags, chances are the vendor is Cyrix
348 * assume the vendor is Cyrix and use the CCR's to enable cpuid
350 .set CYRIX_CRI, 0x22 / CR Index Register
351 .set CYRIX_CRD, 0x23 / CR Data Register
353 .set CYRIX_CCR3, 0xc3 / Config Control Reg 3
354 .set CYRIX_CCR4, 0xe8 / Config Control Reg 4
355 .set CYRIX_DIR0, 0xfe / Device Identification Reg 0
356 .set CYRIX_DIR1, 0xff / Device Identification Reg 1
359 * even if the cpu vendor is Cyrix and the motherboard/chipset
360 * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
361 * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
362 * the reads and writes of 0x21 are guaranteed to be off-chip of
363 * the cpu
367 * enable read of ISR at I/O port 0x20
369 movb $0xb, %al
370 outb $MCMD_PORT
373 * read IMR and store in %bl
375 inb $MIMR_PORT
376 movb %al, %bl
379 * mask out all interrupts so that ISR will not change
381 movb $0xff, %al
382 outb $MIMR_PORT
385 * reads of I/O port 0x22 on Cyrix are always directed off-chip
386 * make use of I/O pull-up to test for an unknown device on 0x22
388 inb $CYRIX_CRI
389 cmpb $0xff, %al
390 je port_22_free
393 * motherboard/chipset vendor may be ignoring line A1 of I/O address
395 movb %al, %cl
398 * if the ISR and the value read from 0x22 do not match then we have
399 * detected some unknown device, probably a chipset, at 0x22
401 inb $MCMD_PORT
402 cmpb %al, %cl
403 jne restore_IMR
405 port_22_free:
407 * now test to see if some unknown device is using I/O port 0x23
409 * read the external I/O port at 0x23
411 inb $CYRIX_CRD
414 * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
415 * IMR is 0xff so both tests are performed simultaneously.
417 cmpb $0xff, %al
418 jne restore_IMR
421 * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
422 * record the type and fix it later if not.
424 movl $X86_VENDOR_Cyrix, x86_vendor
425 movl $X86_TYPE_CYRIX_486, x86_type
428 * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
430 * load CCR3 index into CCR index register
433 movb $CYRIX_CCR3, %al
434 outb $CYRIX_CRI
437 * If we are not a Cyrix cpu, then we have performed an external I/O
438 * cycle. If the CCR index was not valid for this Cyrix model, we may
439 * have performed an external I/O cycle as well. In these cases and
440 * if the motherboard/chipset vendor ignores I/O address line A1,
441 * then the PIC will have IRQ3 set at the lowest priority as a side
442 * effect of the above outb. We are reasonalbly confident that there
443 * is not an unknown device on I/O port 0x22, so there should have been
444 * no unpredictable side-effect of the above outb.
448 * read CCR3
450 inb $CYRIX_CRD
453 * If we are not a Cyrix cpu the inb above produced an external I/O
454 * cycle. If we are a Cyrix model that does not support CCR3 wex
455 * produced an external I/O cycle. In all known Cyrix models 6x86 and
456 * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
457 * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
458 * reserved as well. It is highly unlikely that CCR3 contains the value
459 * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
460 * deduce we are not a Cyrix with support for cpuid if so.
462 cmpb $0xff, %al
463 je restore_PIC
466 * There exist 486 ISA Cyrix chips that support CCR3 but do not support
467 * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
468 * cycles, the exact behavior is model specific and undocumented.
469 * Unfortunately these external I/O cycles may confuse some PIC's beyond
470 * recovery. Fortunatetly we can use the following undocumented trick:
471 * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
472 * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
473 * to work on all Cyrix cpu's which support cpuid.
475 movb %al, %dl
476 xorb $0x10, %dl
477 movb %al, %cl
480 * write back CRR3 with toggled bit 4 to CCR3
482 movb $CYRIX_CCR3, %al
483 outb $CYRIX_CRI
485 movb %dl, %al
486 outb $CYRIX_CRD
489 * read CCR3
491 movb $CYRIX_CCR3, %al
492 outb $CYRIX_CRI
493 inb $CYRIX_CRD
494 movb %al, %dl
497 * restore CCR3
499 movb $CYRIX_CCR3, %al
500 outb $CYRIX_CRI
502 movb %cl, %al
503 outb $CYRIX_CRD
506 * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
507 * case we do not have cpuid anyway
509 andb $0x10, %al
510 andb $0x10, %dl
511 cmpb %al, %dl
512 je restore_PIC
515 * read DIR0
517 movb $CYRIX_DIR0, %al
518 outb $CYRIX_CRI
519 inb $CYRIX_CRD
522 * test for pull-up
524 cmpb $0xff, %al
525 je restore_PIC
528 * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
529 * future use. If Cyrix ever produces a cpu that supports cpuid with
530 * these ids, the following test will have to change. For now we remain
531 * pessimistic since the formats of the CRR's may be different then.
533 * test for at least a 6x86, to see if we support both MAPEN and CPUID
535 cmpb $0x30, %al
536 jb restore_IMR
539 * enable MAPEN
541 movb $CYRIX_CCR3, %al
542 outb $CYRIX_CRI
544 andb $0xf, %cl
545 movb %cl, %al
546 orb $0x10, %al
547 outb $CYRIX_CRD
550 * select CCR4
552 movb $CYRIX_CCR4, %al
553 outb $CYRIX_CRI
556 * read CCR4
558 inb $CYRIX_CRD
561 * enable cpuid
563 orb $0x80, %al
564 movb %al, %dl
567 * select CCR4
569 movb $CYRIX_CCR4, %al
570 outb $CYRIX_CRI
573 * write CCR4
575 movb %dl, %al
576 outb $CYRIX_CRD
579 * select CCR3
581 movb $CYRIX_CCR3, %al
582 outb $CYRIX_CRI
585 * disable MAPEN and write CCR3
587 movb %cl, %al
588 outb $CYRIX_CRD
591 * restore IMR
593 movb %bl, %al
594 outb $MIMR_PORT
597 * test to see if cpuid available
599 pushfl
600 popl %ecx
601 movl %ecx, %eax
602 xorl $PS_ID, %eax / try complemented bit
603 pushl %eax
604 popfl
605 pushfl
606 popl %eax
607 cmpl %eax, %ecx
608 jne have_cpuid
609 jmp cpu_486
611 restore_PIC:
613 * In case the motherboard/chipset vendor is ignoring line A1 of the
614 * I/O address, we set the PIC priorities to sane values.
616 movb $0xc7, %al / irq 7 lowest priority
617 outb $MCMD_PORT
619 restore_IMR:
620 movb %bl, %al
621 outb $MIMR_PORT
622 jmp cpu_486
624 have_cpuid:
626 * cpuid instruction present
628 bts $X86FSET_CPUID, x86_featureset / Just to set; Ignore the CF
629 movl $0, %eax
630 cpuid
632 movl %ebx, cpu_vendor
633 movl %edx, cpu_vendor+4
634 movl %ecx, cpu_vendor+8
637 * early cyrix cpus are somewhat strange and need to be
638 * probed in curious ways to determine their identity
641 leal cpu_vendor, %esi
642 leal CyrixInstead, %edi
643 movl $12, %ecx
644 repz
645 cmpsb
646 je vendor_is_cyrix
648 / let mlsetup()/cpuid_pass1() handle everything else in C
650 jmp cpu_done
652 is486:
654 * test to see if a useful cpuid
656 testl %eax, %eax
657 jz isa486
659 movl $1, %eax
660 cpuid
662 movl %eax, %ebx
663 andl $0xF00, %ebx
664 cmpl $0x400, %ebx
665 je isa486
667 rep; ret /* use 2 byte return instruction */
668 /* AMD Software Optimization Guide - Section 6.2 */
669 isa486:
671 * lose the return address
673 popl %eax
674 jmp cpu_486
676 vendor_is_cyrix:
677 call is486
680 * Processor signature and feature flags for Cyrix are insane.
681 * BIOS can play with semi-documented registers, so cpuid must be used
682 * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
683 * Keep the family in %ebx and feature flags in %edx until not needed
687 * read DIR0
689 movb $CYRIX_DIR0, %al
690 outb $CYRIX_CRI
691 inb $CYRIX_CRD
694 * First we handle the cases where we are a 6x86 or 6x86L.
695 * The 6x86 is basically a 486, the only reliable bit in the
696 * feature flags is for FPU. The 6x86L is better, unfortunately
697 * there is no really good way to distinguish between these two
698 * cpu's. We are pessimistic and when in doubt assume 6x86.
701 cmpb $0x40, %al
702 jae maybeGX
705 * We are an M1, either a 6x86 or 6x86L.
707 cmpb $0x30, %al
708 je maybe6x86L
709 cmpb $0x31, %al
710 je maybe6x86L
711 cmpb $0x34, %al
712 je maybe6x86L
713 cmpb $0x35, %al
714 je maybe6x86L
717 * although it is possible that we are a 6x86L, the cpu and
718 * documentation are so buggy, we just do not care.
720 jmp likely6x86
722 maybe6x86L:
724 * read DIR1
726 movb $CYRIX_DIR1, %al
727 outb $CYRIX_CRI
728 inb $CYRIX_CRD
729 cmpb $0x22, %al
730 jb likely6x86
733 * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
735 movl $X86_TYPE_CYRIX_6x86L, x86_type
736 jmp coma_bug
738 likely6x86:
740 * We are likely a 6x86, or a 6x86L without a way of knowing
742 * The 6x86 has NO Pentium or Pentium Pro compatible features even
743 * though it claims to be a Pentium Pro compatible!
745 * The 6x86 core used in the 6x86 may have most of the Pentium system
746 * registers and largely conform to the Pentium System Programming
747 * Reference. Documentation on these parts is long gone. Treat it as
748 * a crippled Pentium and hope for the best.
751 movl $X86_TYPE_CYRIX_6x86, x86_type
752 jmp coma_bug
754 maybeGX:
756 * Now we check whether we are a MediaGX or GXm. We have particular
757 * reason for concern here. Even though most of the GXm's
758 * report having TSC in the cpuid feature flags, the TSC may be
759 * horribly broken. What is worse, is that MediaGX's are basically
760 * 486's while the good GXm's are more like Pentium Pro's!
763 cmpb $0x50, %al
764 jae maybeM2
767 * We are either a MediaGX (sometimes called a Gx86) or GXm
770 cmpb $41, %al
771 je maybeMediaGX
773 cmpb $44, %al
774 jb maybeGXm
776 cmpb $47, %al
777 jbe maybeMediaGX
780 * We do not honestly know what we are, so assume a MediaGX
782 jmp media_gx
784 maybeGXm:
786 * It is still possible we are either a MediaGX or GXm, trust cpuid
787 * family should be 5 on a GXm
789 cmpl $0x500, %ebx
790 je GXm
793 * BIOS/Cyrix might set family to 6 on a GXm
795 cmpl $0x600, %ebx
796 jne media_gx
798 GXm:
799 movl $X86_TYPE_CYRIX_GXm, x86_type
800 jmp cpu_done
802 maybeMediaGX:
804 * read DIR1
806 movb $CYRIX_DIR1, %al
807 outb $CYRIX_CRI
808 inb $CYRIX_CRD
810 cmpb $0x30, %al
811 jae maybeGXm
814 * we are a MediaGX for which we do not trust cpuid
816 media_gx:
817 movl $X86_TYPE_CYRIX_MediaGX, x86_type
818 jmp cpu_486
820 maybeM2:
822 * Now we check whether we are a 6x86MX or MII. These cpu's are
823 * virtually identical, but we care because for the 6x86MX, we
824 * must work around the coma bug. Also for 6x86MX prior to revision
825 * 1.4, the TSC may have serious bugs.
828 cmpb $0x60, %al
829 jae maybeM3
832 * family should be 6, but BIOS/Cyrix might set it to 5
834 cmpl $0x600, %ebx
835 ja cpu_486
838 * read DIR1
840 movb $CYRIX_DIR1, %al
841 outb $CYRIX_CRI
842 inb $CYRIX_CRD
844 cmpb $0x8, %al
845 jb cyrix6x86MX
846 cmpb $0x80, %al
847 jb MII
849 cyrix6x86MX:
851 * It is altogether unclear how the revision stamped on the cpu
852 * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
854 movl $X86_TYPE_CYRIX_6x86MX, x86_type
855 jmp coma_bug
857 MII:
858 movl $X86_TYPE_CYRIX_MII, x86_type
859 likeMII:
860 jmp cpu_done
862 maybeM3:
864 * We are some chip that we cannot identify yet, an MIII perhaps.
865 * We will be optimistic and hope that the chip is much like an MII,
866 * and that cpuid is sane. Cyrix seemed to have gotten it right in
867 * time for the MII, we can only hope it stayed that way.
868 * Maybe the BIOS or Cyrix is trying to hint at something
870 cmpl $0x500, %ebx
871 je GXm
873 cmpb $0x80, %al
874 jae likelyM3
877 * Just test for the features Cyrix is known for
880 jmp MII
882 likelyM3:
884 * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
885 * the Cyrix MIII. There may be parts later that use the same ranges
886 * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
887 * now we will call anything with a DIR0 of 0x80 or higher an MIII.
888 * The MIII is supposed to support large pages, but we will believe
889 * it when we see it. For now we just enable and test for MII features.
891 movl $X86_TYPE_VIA_CYRIX_III, x86_type
892 jmp likeMII
894 coma_bug:
897 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
898 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
899 * cycles except page table accesses and interrupt ACK cycles do not assert
900 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
901 * Due to a bug in the cpu core involving over-optimization of branch
902 * prediction, register renaming, and execution of instructions down both the
903 * X and Y pipes for the xchgl instruction, short loops can be written that
904 * never de-assert LOCK# from one invocation of the loop to the next, ad
905 * infinitum. The undesirable effect of this situation is that interrupts are
906 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
907 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
908 * longer do, unless they are page table accesses or interrupt ACK cycles.
909 * With LOCK# not asserted, these bus cycles are now cached. This can cause
910 * undesirable behaviour if the ARR's are not configured correctly. Solaris
911 * does not configure the ARR's, nor does it provide any useful mechanism for
912 * doing so, thus the ideal workaround is not viable. Fortunately, the only
913 * known exploits for this bug involve the xchgl instruction specifically.
914 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
915 * 6x86MX cpu's which can be used to specify one instruction as a serializing
916 * instruction. With the xchgl instruction serialized, LOCK# is still
917 * asserted, but it is the sole instruction for which LOCK# is asserted.
918 * There is now some added penalty for the xchgl instruction, but the usual
919 * bus locking is preserved. This ingenious workaround was discovered by
920 * disassembling a binary provided by Cyrix as a workaround for this bug on
921 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
922 * mentioned in any public errata! The only concern for this workaround is
923 * that there may be similar undiscovered bugs with other instructions that
924 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
925 * fixed this bug sometime late in 1997 and no other exploits other than
926 * xchgl have been discovered is good indication that this workaround is
927 * reasonable.
930 .set CYRIX_DBR0, 0x30 / Debug Register 0
931 .set CYRIX_DBR1, 0x31 / Debug Register 1
932 .set CYRIX_DBR2, 0x32 / Debug Register 2
933 .set CYRIX_DBR3, 0x33 / Debug Register 3
934 .set CYRIX_DOR, 0x3c / Debug Opcode Register
937 * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
938 * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
939 * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
940 * and 0xff. Then, DOR is loaded with the one byte opcode.
944 * select CCR3
946 movb $CYRIX_CCR3, %al
947 outb $CYRIX_CRI
950 * read CCR3 and mask out MAPEN
952 inb $CYRIX_CRD
953 andb $0xf, %al
956 * save masked CCR3 in %ah
958 movb %al, %ah
961 * select CCR3
963 movb $CYRIX_CCR3, %al
964 outb $CYRIX_CRI
967 * enable MAPEN
969 movb %ah, %al
970 orb $0x10, %al
971 outb $CYRIX_CRD
974 * read DBR0
976 movb $CYRIX_DBR0, %al
977 outb $CYRIX_CRI
978 inb $CYRIX_CRD
981 * disable MATCH and save in %bh
983 orb $0x80, %al
984 movb %al, %bh
987 * write DBR0
989 movb $CYRIX_DBR0, %al
990 outb $CYRIX_CRI
991 movb %bh, %al
992 outb $CYRIX_CRD
995 * write DBR1
997 movb $CYRIX_DBR1, %al
998 outb $CYRIX_CRI
999 movb $0xf8, %al
1000 outb $CYRIX_CRD
1003 * write DBR2
1005 movb $CYRIX_DBR2, %al
1006 outb $CYRIX_CRI
1007 movb $0x7f, %al
1008 outb $CYRIX_CRD
1011 * write DBR3
1013 movb $CYRIX_DBR3, %al
1014 outb $CYRIX_CRI
1015 xorb %al, %al
1016 outb $CYRIX_CRD
1019 * write DOR
1021 movb $CYRIX_DOR, %al
1022 outb $CYRIX_CRI
1023 movb $0x87, %al
1024 outb $CYRIX_CRD
1027 * enable MATCH
1029 movb $CYRIX_DBR0, %al
1030 outb $CYRIX_CRI
1031 movb %bh, %al
1032 andb $0x7f, %al
1033 outb $CYRIX_CRD
1036 * disable MAPEN
1038 movb $0xc3, %al
1039 outb $CYRIX_CRI
1040 movb %ah, %al
1041 outb $CYRIX_CRD
1043 jmp cpu_done
1045 cpu_done:
1047 popfl /* Restore original FLAGS */
1048 popal /* Restore all registers */
1050 #endif /* !__xpv */
1053 * mlsetup(%esp) gets called.
1055 pushl %esp
1056 call mlsetup
1057 addl $4, %esp
1060 * We change our appearance to look like the real thread 0.
1061 * (NOTE: making ourselves to be a real thread may be a noop)
1062 * main() gets called. (NOTE: main() never returns).
1064 call main
1065 /* NOTREACHED */
1066 pushl $__return_from_main
1067 call panic
1069 /* NOTREACHED */
1070 cpu_486:
1071 pushl $__unsupported_cpu
1072 call panic
1073 SET_SIZE(_locore_start)
1075 #endif /* __lint */
1076 #endif /* !__amd64 */
1080 * For stack layout, see privregs.h
1081 * When cmntrap gets called, the error code and trap number have been pushed.
1082 * When cmntrap_pushed gets called, the entire struct regs has been pushed.
1085 #if defined(__lint)
1087 /* ARGSUSED */
1088 void
1089 cmntrap()
1092 #else /* __lint */
1094 .globl trap /* C handler called below */
1096 #if defined(__amd64)
1098 ENTRY_NP2(cmntrap, _cmntrap)
1100 INTR_PUSH
1102 ALTENTRY(cmntrap_pushed)
1104 movq %rsp, %rbp
1107 * - if this is a #pf i.e. T_PGFLT, %r15 is live
1108 * and contains the faulting address i.e. a copy of %cr2
1110 * - if this is a #db i.e. T_SGLSTP, %r15 is live
1111 * and contains the value of %db6
1114 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1115 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1116 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1119 * We must first check if DTrace has set its NOFAULT bit. This
1120 * regrettably must happen before the trap stack is recorded, because
1121 * this requires a call to getpcstack() and may induce recursion if an
1122 * fbt::getpcstack: enabling is inducing the bad load.
1124 movl %gs:CPU_ID, %eax
1125 shlq $CPU_CORE_SHIFT, %rax
1126 leaq cpu_core(%rip), %r8
1127 addq %r8, %rax
1128 movw CPUC_DTRACE_FLAGS(%rax), %cx
1129 testw $CPU_DTRACE_NOFAULT, %cx
1130 jnz .dtrace_induced
1132 TRACE_STACK(%rdi)
1134 movq %rbp, %rdi
1135 movq %r15, %rsi
1136 movl %gs:CPU_ID, %edx
1139 * We know that this isn't a DTrace non-faulting load; we can now safely
1140 * reenable interrupts. (In the case of pagefaults, we enter through an
1141 * interrupt gate.)
1143 ENABLE_INTR_FLAGS
1145 call trap /* trap(rp, addr, cpuid) handles all traps */
1146 jmp _sys_rtt
1148 .dtrace_induced:
1149 cmpw $KCS_SEL, REGOFF_CS(%rbp) /* test CS for user-mode trap */
1150 jne 3f /* if from user, panic */
1152 cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp)
1153 je 1f
1155 cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp)
1156 je 0f
1158 cmpl $T_ILLINST, REGOFF_TRAPNO(%rbp)
1159 je 0f
1161 cmpl $T_ZERODIV, REGOFF_TRAPNO(%rbp)
1162 jne 4f /* if not PF/GP/UD/DE, panic */
1164 orw $CPU_DTRACE_DIVZERO, %cx
1165 movw %cx, CPUC_DTRACE_FLAGS(%rax)
1166 jmp 2f
1169 * If we've taken a GPF, we don't (unfortunately) have the address that
1170 * induced the fault. So instead of setting the fault to BADADDR,
1171 * we'll set the fault to ILLOP.
1174 orw $CPU_DTRACE_ILLOP, %cx
1175 movw %cx, CPUC_DTRACE_FLAGS(%rax)
1176 jmp 2f
1178 orw $CPU_DTRACE_BADADDR, %cx
1179 movw %cx, CPUC_DTRACE_FLAGS(%rax) /* set fault to bad addr */
1180 movq %r15, CPUC_DTRACE_ILLVAL(%rax)
1181 /* fault addr is illegal value */
1183 movq REGOFF_RIP(%rbp), %rdi
1184 movq %rdi, %r12
1185 call dtrace_instr_size
1186 addq %rax, %r12
1187 movq %r12, REGOFF_RIP(%rbp)
1188 INTR_POP
1189 IRET
1190 /*NOTREACHED*/
1192 leaq dtrace_badflags(%rip), %rdi
1193 xorl %eax, %eax
1194 call panic
1196 leaq dtrace_badtrap(%rip), %rdi
1197 xorl %eax, %eax
1198 call panic
1199 SET_SIZE(cmntrap)
1200 SET_SIZE(_cmntrap)
1202 #elif defined(__i386)
1205 ENTRY_NP2(cmntrap, _cmntrap)
1207 INTR_PUSH
1209 ALTENTRY(cmntrap_pushed)
1211 movl %esp, %ebp
1214 * - if this is a #pf i.e. T_PGFLT, %esi is live
1215 * and contains the faulting address i.e. a copy of %cr2
1217 * - if this is a #db i.e. T_SGLSTP, %esi is live
1218 * and contains the value of %db6
1221 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1222 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1223 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1226 * We must first check if DTrace has set its NOFAULT bit. This
1227 * regrettably must happen before the trap stack is recorded, because
1228 * this requires a call to getpcstack() and may induce recursion if an
1229 * fbt::getpcstack: enabling is inducing the bad load.
1231 movl %gs:CPU_ID, %eax
1232 shll $CPU_CORE_SHIFT, %eax
1233 addl $cpu_core, %eax
1234 movw CPUC_DTRACE_FLAGS(%eax), %cx
1235 testw $CPU_DTRACE_NOFAULT, %cx
1236 jnz .dtrace_induced
1238 TRACE_STACK(%edi)
1240 pushl %gs:CPU_ID
1241 pushl %esi /* fault address for PGFLTs */
1242 pushl %ebp /* &regs */
1245 * We know that this isn't a DTrace non-faulting load; we can now safely
1246 * reenable interrupts. (In the case of pagefaults, we enter through an
1247 * interrupt gate.)
1249 ENABLE_INTR_FLAGS
1251 call trap /* trap(rp, addr, cpuid) handles all traps */
1252 addl $12, %esp /* get argument off stack */
1253 jmp _sys_rtt
1255 .dtrace_induced:
1256 cmpw $KCS_SEL, REGOFF_CS(%ebp) /* test CS for user-mode trap */
1257 jne 3f /* if from user, panic */
1259 cmpl $T_PGFLT, REGOFF_TRAPNO(%ebp)
1260 je 1f
1262 cmpl $T_GPFLT, REGOFF_TRAPNO(%ebp)
1263 je 0f
1265 cmpl $T_ZERODIV, REGOFF_TRAPNO(%ebp)
1266 jne 4f /* if not PF/GP/UD/DE, panic */
1268 orw $CPU_DTRACE_DIVZERO, %cx
1269 movw %cx, CPUC_DTRACE_FLAGS(%eax)
1270 jmp 2f
1274 * If we've taken a GPF, we don't (unfortunately) have the address that
1275 * induced the fault. So instead of setting the fault to BADADDR,
1276 * we'll set the fault to ILLOP.
1278 orw $CPU_DTRACE_ILLOP, %cx
1279 movw %cx, CPUC_DTRACE_FLAGS(%eax)
1280 jmp 2f
1282 orw $CPU_DTRACE_BADADDR, %cx
1283 movw %cx, CPUC_DTRACE_FLAGS(%eax) /* set fault to bad addr */
1284 movl %esi, CPUC_DTRACE_ILLVAL(%eax)
1285 /* fault addr is illegal value */
1287 pushl REGOFF_EIP(%ebp)
1288 call dtrace_instr_size
1289 addl $4, %esp
1290 movl REGOFF_EIP(%ebp), %ecx
1291 addl %eax, %ecx
1292 movl %ecx, REGOFF_EIP(%ebp)
1293 INTR_POP_KERNEL
1294 IRET
1295 /*NOTREACHED*/
1297 pushl $dtrace_badflags
1298 call panic
1300 pushl $dtrace_badtrap
1301 call panic
1302 SET_SIZE(cmntrap)
1303 SET_SIZE(_cmntrap)
1305 #endif /* __i386 */
1308 * Declare a uintptr_t which has the size of _cmntrap to enable stack
1309 * traceback code to know when a regs structure is on the stack.
1311 .globl _cmntrap_size
1312 .align CLONGSIZE
1313 _cmntrap_size:
1314 .NWORD . - _cmntrap
1315 .type _cmntrap_size, @object
1317 dtrace_badflags:
1318 .string "bad DTrace flags"
1320 dtrace_badtrap:
1321 .string "bad DTrace trap"
1323 #endif /* __lint */
1325 #if defined(__lint)
1327 /* ARGSUSED */
1328 void
1329 cmninttrap()
1332 #if !defined(__xpv)
1333 void
1334 bop_trap_handler(void)
1336 #endif
1338 #else /* __lint */
1340 .globl trap /* C handler called below */
1342 #if defined(__amd64)
1344 ENTRY_NP(cmninttrap)
1346 INTR_PUSH
1347 INTGATE_INIT_KERNEL_FLAGS
1349 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1350 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1351 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1353 movq %rsp, %rbp
1355 movl %gs:CPU_ID, %edx
1356 xorl %esi, %esi
1357 movq %rsp, %rdi
1358 call trap /* trap(rp, addr, cpuid) handles all traps */
1359 jmp _sys_rtt
1360 SET_SIZE(cmninttrap)
1362 #if !defined(__xpv)
1364 * Handle traps early in boot. Just revectors into C quickly as
1365 * these are always fatal errors.
1367 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
1369 ENTRY(bop_trap_handler)
1370 movq %rsp, %rdi
1371 sub $8, %rsp
1372 call bop_trap
1373 SET_SIZE(bop_trap_handler)
1374 #endif
1376 #elif defined(__i386)
1378 ENTRY_NP(cmninttrap)
1380 INTR_PUSH
1381 INTGATE_INIT_KERNEL_FLAGS
1383 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1384 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1385 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1387 movl %esp, %ebp
1389 TRACE_STACK(%edi)
1391 pushl %gs:CPU_ID
1392 pushl $0
1393 pushl %ebp
1394 call trap /* trap(rp, addr, cpuid) handles all traps */
1395 addl $12, %esp
1396 jmp _sys_rtt
1397 SET_SIZE(cmninttrap)
1399 #if !defined(__xpv)
1401 * Handle traps early in boot. Just revectors into C quickly as
1402 * these are always fatal errors.
1404 ENTRY(bop_trap_handler)
1405 movl %esp, %eax
1406 pushl %eax
1407 call bop_trap
1408 SET_SIZE(bop_trap_handler)
1409 #endif
1411 #endif /* __i386 */
1413 #endif /* __lint */
1415 #if defined(__lint)
1417 /* ARGSUSED */
1418 void
1419 dtrace_trap()
1422 #else /* __lint */
1424 .globl dtrace_user_probe
1426 #if defined(__amd64)
1428 ENTRY_NP(dtrace_trap)
1430 INTR_PUSH
1432 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1433 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1434 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1436 movq %rsp, %rbp
1438 movl %gs:CPU_ID, %edx
1439 #if defined(__xpv)
1440 movq %gs:CPU_VCPU_INFO, %rsi
1441 movq VCPU_INFO_ARCH_CR2(%rsi), %rsi
1442 #else
1443 movq %cr2, %rsi
1444 #endif
1445 movq %rsp, %rdi
1447 ENABLE_INTR_FLAGS
1449 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1450 jmp _sys_rtt
1452 SET_SIZE(dtrace_trap)
1454 #elif defined(__i386)
1456 ENTRY_NP(dtrace_trap)
1458 INTR_PUSH
1460 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1461 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1462 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1464 movl %esp, %ebp
1466 pushl %gs:CPU_ID
1467 #if defined(__xpv)
1468 movl %gs:CPU_VCPU_INFO, %eax
1469 movl VCPU_INFO_ARCH_CR2(%eax), %eax
1470 #else
1471 movl %cr2, %eax
1472 #endif
1473 pushl %eax
1474 pushl %ebp
1476 ENABLE_INTR_FLAGS
1478 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1479 addl $12, %esp /* get argument off stack */
1481 jmp _sys_rtt
1482 SET_SIZE(dtrace_trap)
1484 #endif /* __i386 */
1486 #endif /* __lint */
1489 * Return from _sys_trap routine.
1492 #if defined(__lint)
1494 void
1495 lwp_rtt_initial(void)
1498 void
1499 lwp_rtt(void)
1502 void
1503 _sys_rtt(void)
1506 #else /* __lint */
1508 #if defined(__amd64)
1510 ENTRY_NP(lwp_rtt_initial)
1511 movq %gs:CPU_THREAD, %r15
1512 movq T_STACK(%r15), %rsp /* switch to the thread stack */
1513 movq %rsp, %rbp
1514 call __dtrace_probe___proc_start
1515 jmp _lwp_rtt
1517 ENTRY_NP(lwp_rtt)
1520 * r14 lwp
1521 * rdx lwp->lwp_procp
1522 * r15 curthread
1525 movq %gs:CPU_THREAD, %r15
1526 movq T_STACK(%r15), %rsp /* switch to the thread stack */
1527 movq %rsp, %rbp
1528 _lwp_rtt:
1529 call __dtrace_probe___proc_lwp__start
1530 movq %gs:CPU_LWP, %r14
1531 movq LWP_PROCP(%r14), %rdx
1534 * XX64 Is the stack misaligned correctly at this point?
1535 * If not, we need to do a push before calling anything ..
1538 #if defined(DEBUG)
1540 * If we were to run lwp_savectx at this point -without-
1541 * pcb_rupdate being set to 1, we'd end up sampling the hardware
1542 * state left by the previous running lwp, rather than setting
1543 * the values requested by the lwp creator. Bad.
1545 testb $0x1, PCB_RUPDATE(%r14)
1546 jne 1f
1547 leaq _no_pending_updates(%rip), %rdi
1548 movl $__LINE__, %esi
1549 movq %r14, %rdx
1550 xorl %eax, %eax
1551 call panic
1552 _no_pending_updates:
1553 .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
1555 #endif
1558 * If agent lwp, clear %fs and %gs
1560 cmpq %r15, P_AGENTTP(%rdx)
1561 jne 1f
1562 xorl %ecx, %ecx
1563 movq %rcx, REGOFF_FS(%rsp)
1564 movq %rcx, REGOFF_GS(%rsp)
1565 movw %cx, LWP_PCB_FS(%r14)
1566 movw %cx, LWP_PCB_GS(%r14)
1568 call dtrace_systrace_rtt
1569 movq REGOFF_RDX(%rsp), %rsi
1570 movq REGOFF_RAX(%rsp), %rdi
1571 call post_syscall /* post_syscall(rval1, rval2) */
1574 * set up to take fault on first use of fp
1576 STTS(%rdi)
1579 * XXX - may want a fast path that avoids sys_rtt_common in the
1580 * most common case.
1582 ALTENTRY(_sys_rtt)
1583 CLI(%rax) /* disable interrupts */
1584 ALTENTRY(_sys_rtt_ints_disabled)
1585 movq %rsp, %rdi /* pass rp to sys_rtt_common */
1586 call sys_rtt_common /* do common sys_rtt tasks */
1587 testq %rax, %rax /* returning to userland? */
1588 jz sr_sup
1591 * Return to user
1593 ASSERT_UPCALL_MASK_IS_SET
1594 cmpw $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
1595 je sys_rtt_syscall
1598 * Return to 32-bit userland
1600 ALTENTRY(sys_rtt_syscall32)
1601 USER32_POP
1602 IRET
1603 /*NOTREACHED*/
1605 ALTENTRY(sys_rtt_syscall)
1607 * Return to 64-bit userland
1609 USER_POP
1610 ALTENTRY(nopop_sys_rtt_syscall)
1611 IRET
1612 /*NOTREACHED*/
1613 SET_SIZE(nopop_sys_rtt_syscall)
1616 * Return to supervisor
1617 * NOTE: to make the check in trap() that tests if we are executing
1618 * segment register fixup/restore code work properly, sr_sup MUST be
1619 * after _sys_rtt .
1621 ALTENTRY(sr_sup)
1623 * Restore regs before doing iretq to kernel mode
1625 INTR_POP
1626 IRET
1627 .globl _sys_rtt_end
1628 _sys_rtt_end:
1629 /*NOTREACHED*/
1630 SET_SIZE(sr_sup)
1631 SET_SIZE(_sys_rtt_end)
1632 SET_SIZE(lwp_rtt)
1633 SET_SIZE(lwp_rtt_initial)
1634 SET_SIZE(_sys_rtt_ints_disabled)
1635 SET_SIZE(_sys_rtt)
1636 SET_SIZE(sys_rtt_syscall)
1637 SET_SIZE(sys_rtt_syscall32)
1639 #elif defined(__i386)
1641 ENTRY_NP(lwp_rtt_initial)
1642 movl %gs:CPU_THREAD, %eax
1643 movl T_STACK(%eax), %esp /* switch to the thread stack */
1644 movl %esp, %ebp
1645 call __dtrace_probe___proc_start
1646 jmp _lwp_rtt
1648 ENTRY_NP(lwp_rtt)
1649 movl %gs:CPU_THREAD, %eax
1650 movl T_STACK(%eax), %esp /* switch to the thread stack */
1651 movl %esp, %ebp
1652 _lwp_rtt:
1653 call __dtrace_probe___proc_lwp__start
1656 * If agent lwp, clear %fs and %gs.
1658 movl %gs:CPU_LWP, %eax
1659 movl LWP_PROCP(%eax), %edx
1661 cmpl %eax, P_AGENTTP(%edx)
1662 jne 1f
1663 movl $0, REGOFF_FS(%esp)
1664 movl $0, REGOFF_GS(%esp)
1666 call dtrace_systrace_rtt
1667 movl REGOFF_EDX(%esp), %edx
1668 movl REGOFF_EAX(%esp), %eax
1669 pushl %edx
1670 pushl %eax
1671 call post_syscall /* post_syscall(rval1, rval2) */
1672 addl $8, %esp
1675 * set up to take fault on first use of fp
1677 STTS(%eax)
1680 * XXX - may want a fast path that avoids sys_rtt_common in the
1681 * most common case.
1683 ALTENTRY(_sys_rtt)
1684 CLI(%eax) /* disable interrupts */
1685 ALTENTRY(_sys_rtt_ints_disabled)
1686 pushl %esp /* pass rp to sys_rtt_common */
1687 call sys_rtt_common
1688 addl $4, %esp /* pop arg */
1689 testl %eax, %eax /* test for return to user mode */
1690 jz sr_sup
1693 * Return to User.
1695 ALTENTRY(sys_rtt_syscall)
1696 INTR_POP_USER
1699 * There can be no instructions between this label and IRET or
1700 * we could end up breaking linux brand support. See label usage
1701 * in lx_brand_int80_callback for an example.
1703 ALTENTRY(nopop_sys_rtt_syscall)
1704 IRET
1705 /*NOTREACHED*/
1706 SET_SIZE(nopop_sys_rtt_syscall)
1708 ALTENTRY(_sys_rtt_end)
1711 * Return to supervisor
1713 ALTENTRY(sr_sup)
1716 * Restore regs before doing iret to kernel mode
1718 INTR_POP_KERNEL
1719 IRET
1720 /*NOTREACHED*/
1722 SET_SIZE(sr_sup)
1723 SET_SIZE(_sys_rtt_end)
1724 SET_SIZE(lwp_rtt)
1725 SET_SIZE(lwp_rtt_initial)
1726 SET_SIZE(_sys_rtt_ints_disabled)
1727 SET_SIZE(_sys_rtt)
1728 SET_SIZE(sys_rtt_syscall)
1730 #endif /* __i386 */
1732 #endif /* __lint */
1734 #if defined(__lint)
1737 * So why do we have to deal with all this crud in the world of ia32?
1739 * Basically there are four classes of ia32 implementations, those that do not
1740 * have a TSC, those that have a marginal TSC that is broken to the extent
1741 * that it is useless, those that have a marginal TSC that is not quite so
1742 * horribly broken and can be used with some care, and those that have a
1743 * reliable TSC. This crud has to be here in order to sift through all the
1744 * variants.
1747 /*ARGSUSED*/
1748 uint64_t
1749 freq_tsc(uint32_t *pit_counter)
1751 return (0);
1754 #else /* __lint */
1756 #if defined(__amd64)
1759 * XX64 quick and dirty port from the i386 version. Since we
1760 * believe the amd64 tsc is more reliable, could this code be
1761 * simpler?
1763 ENTRY_NP(freq_tsc)
1764 pushq %rbp
1765 movq %rsp, %rbp
1766 movq %rdi, %r9 /* save pit_counter */
1767 pushq %rbx
1769 / We have a TSC, but we have no way in general to know how reliable it is.
1770 / Usually a marginal TSC behaves appropriately unless not enough time
1771 / elapses between reads. A reliable TSC can be read as often and as rapidly
1772 / as desired. The simplistic approach of reading the TSC counter and
1773 / correlating to the PIT counter cannot be naively followed. Instead estimates
1774 / have to be taken to successively refine a guess at the speed of the cpu
1775 / and then the TSC and PIT counter are correlated. In practice very rarely
1776 / is more than one quick loop required for an estimate. Measures have to be
1777 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1778 / measuring the clock rate of very fast processors.
1780 / The following constant can be tuned. It should be such that the loop does
1781 / not take too many nor too few PIT counts to execute. If this value is too
1782 / large, then on slow machines the loop will take a long time, or the PIT
1783 / counter may even wrap. If this value is too small, then on fast machines
1784 / the PIT counter may count so few ticks that the resolution of the PIT
1785 / itself causes a bad guess. Because this code is used in machines with
1786 / marginal TSC's and/or IO, if this value is too small on those, it may
1787 / cause the calculated cpu frequency to vary slightly from boot to boot.
1789 / In all cases even if this constant is set inappropriately, the algorithm
1790 / will still work and the caller should be able to handle variances in the
1791 / calculation of cpu frequency, but the calculation will be inefficient and
1792 / take a disproportionate amount of time relative to a well selected value.
1793 / As the slowest supported cpu becomes faster, this constant should be
1794 / carefully increased.
1796 movl $0x8000, %ecx
1798 / to make sure the instruction cache has been warmed
1801 jmp freq_tsc_loop
1803 / The following block of code up to and including the latching of the PIT
1804 / counter after freq_tsc_perf_loop is very critical and very carefully
1805 / written, it should only be modified with great care. freq_tsc_loop to
1806 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1807 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
1809 .align 32
1810 freq_tsc_loop:
1811 / save the loop count in %ebx
1812 movl %ecx, %ebx
1814 / initialize the PIT counter and start a count down
1815 movb $PIT_LOADMODE, %al
1816 outb $PITCTL_PORT
1817 movb $0xff, %al
1818 outb $PITCTR0_PORT
1819 outb $PITCTR0_PORT
1821 / read the TSC and store the TS in %edi:%esi
1822 rdtsc
1823 movl %eax, %esi
1825 freq_tsc_perf_loop:
1826 movl %edx, %edi
1827 movl %eax, %esi
1828 movl %edx, %edi
1829 loop freq_tsc_perf_loop
1831 / read the TSC and store the LSW in %ecx
1832 rdtsc
1833 movl %eax, %ecx
1835 / latch the PIT counter and status
1836 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
1837 outb $PITCTL_PORT
1839 / remember if the icache has been warmed
1840 setc %ah
1842 / read the PIT status
1843 inb $PITCTR0_PORT
1844 shll $8, %eax
1846 / read PIT count
1847 inb $PITCTR0_PORT
1848 shll $8, %eax
1849 inb $PITCTR0_PORT
1850 bswap %eax
1852 / check to see if the PIT count was loaded into the CE
1853 btw $_CONST(PITSTAT_NULLCNT+8), %ax
1854 jc freq_tsc_increase_count
1856 / check to see if PIT counter wrapped
1857 btw $_CONST(PITSTAT_OUTPUT+8), %ax
1858 jnc freq_tsc_pit_did_not_wrap
1860 / halve count
1861 shrl $1, %ebx
1862 movl %ebx, %ecx
1864 / the instruction cache has been warmed
1867 jmp freq_tsc_loop
1869 freq_tsc_increase_count:
1870 shll $1, %ebx
1871 jc freq_tsc_too_fast
1873 movl %ebx, %ecx
1875 / the instruction cache has been warmed
1878 jmp freq_tsc_loop
1880 freq_tsc_pit_did_not_wrap:
1881 roll $16, %eax
1883 cmpw $0x2000, %ax
1884 notw %ax
1885 jb freq_tsc_sufficient_duration
1887 freq_tsc_calculate:
1888 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1889 / then on the second CLK pulse the CE is decremented, therefore mode 0
1890 / is really a (count + 1) counter, ugh
1891 xorl %esi, %esi
1892 movw %ax, %si
1893 incl %esi
1895 movl $0xf000, %eax
1896 mull %ebx
1898 / tuck away (target_pit_count * loop_count)
1899 movl %edx, %ecx
1900 movl %eax, %ebx
1902 movl %esi, %eax
1903 movl $0xffffffff, %edx
1904 mull %edx
1906 addl %esi, %eax
1907 adcl $0, %edx
1909 cmpl %ecx, %edx
1910 ja freq_tsc_div_safe
1911 jb freq_tsc_too_fast
1913 cmpl %ebx, %eax
1914 jbe freq_tsc_too_fast
1916 freq_tsc_div_safe:
1917 movl %ecx, %edx
1918 movl %ebx, %eax
1920 movl %esi, %ecx
1921 divl %ecx
1923 movl %eax, %ecx
1925 / the instruction cache has been warmed
1928 jmp freq_tsc_loop
1930 freq_tsc_sufficient_duration:
1931 / test to see if the icache has been warmed
1932 btl $16, %eax
1933 jnc freq_tsc_calculate
1935 / recall mode 0 is a (count + 1) counter
1936 andl $0xffff, %eax
1937 incl %eax
1939 / save the number of PIT counts
1940 movl %eax, (%r9)
1942 / calculate the number of TS's that elapsed
1943 movl %ecx, %eax
1944 subl %esi, %eax
1945 sbbl %edi, %edx
1947 jmp freq_tsc_end
1949 freq_tsc_too_fast:
1950 / return 0 as a 64 bit quantity
1951 xorl %eax, %eax
1952 xorl %edx, %edx
1954 freq_tsc_end:
1955 shlq $32, %rdx
1956 orq %rdx, %rax
1958 popq %rbx
1959 leaveq
1961 SET_SIZE(freq_tsc)
1963 #elif defined(__i386)
1965 ENTRY_NP(freq_tsc)
1966 pushl %ebp
1967 movl %esp, %ebp
1968 pushl %edi
1969 pushl %esi
1970 pushl %ebx
1972 / We have a TSC, but we have no way in general to know how reliable it is.
1973 / Usually a marginal TSC behaves appropriately unless not enough time
1974 / elapses between reads. A reliable TSC can be read as often and as rapidly
1975 / as desired. The simplistic approach of reading the TSC counter and
1976 / correlating to the PIT counter cannot be naively followed. Instead estimates
1977 / have to be taken to successively refine a guess at the speed of the cpu
1978 / and then the TSC and PIT counter are correlated. In practice very rarely
1979 / is more than one quick loop required for an estimate. Measures have to be
1980 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1981 / measuring the clock rate of very fast processors.
1983 / The following constant can be tuned. It should be such that the loop does
1984 / not take too many nor too few PIT counts to execute. If this value is too
1985 / large, then on slow machines the loop will take a long time, or the PIT
1986 / counter may even wrap. If this value is too small, then on fast machines
1987 / the PIT counter may count so few ticks that the resolution of the PIT
1988 / itself causes a bad guess. Because this code is used in machines with
1989 / marginal TSC's and/or IO, if this value is too small on those, it may
1990 / cause the calculated cpu frequency to vary slightly from boot to boot.
1992 / In all cases even if this constant is set inappropriately, the algorithm
1993 / will still work and the caller should be able to handle variances in the
1994 / calculation of cpu frequency, but the calculation will be inefficient and
1995 / take a disproportionate amount of time relative to a well selected value.
1996 / As the slowest supported cpu becomes faster, this constant should be
1997 / carefully increased.
1999 movl $0x8000, %ecx
2001 / to make sure the instruction cache has been warmed
2004 jmp freq_tsc_loop
2006 / The following block of code up to and including the latching of the PIT
2007 / counter after freq_tsc_perf_loop is very critical and very carefully
2008 / written, it should only be modified with great care. freq_tsc_loop to
2009 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
2010 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
2012 .align 32
2013 freq_tsc_loop:
2014 / save the loop count in %ebx
2015 movl %ecx, %ebx
2017 / initialize the PIT counter and start a count down
2018 movb $PIT_LOADMODE, %al
2019 outb $PITCTL_PORT
2020 movb $0xff, %al
2021 outb $PITCTR0_PORT
2022 outb $PITCTR0_PORT
2024 / read the TSC and store the TS in %edi:%esi
2025 rdtsc
2026 movl %eax, %esi
2028 freq_tsc_perf_loop:
2029 movl %edx, %edi
2030 movl %eax, %esi
2031 movl %edx, %edi
2032 loop freq_tsc_perf_loop
2034 / read the TSC and store the LSW in %ecx
2035 rdtsc
2036 movl %eax, %ecx
2038 / latch the PIT counter and status
2039 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
2040 outb $PITCTL_PORT
2042 / remember if the icache has been warmed
2043 setc %ah
2045 / read the PIT status
2046 inb $PITCTR0_PORT
2047 shll $8, %eax
2049 / read PIT count
2050 inb $PITCTR0_PORT
2051 shll $8, %eax
2052 inb $PITCTR0_PORT
2053 bswap %eax
2055 / check to see if the PIT count was loaded into the CE
2056 btw $_CONST(PITSTAT_NULLCNT+8), %ax
2057 jc freq_tsc_increase_count
2059 / check to see if PIT counter wrapped
2060 btw $_CONST(PITSTAT_OUTPUT+8), %ax
2061 jnc freq_tsc_pit_did_not_wrap
2063 / halve count
2064 shrl $1, %ebx
2065 movl %ebx, %ecx
2067 / the instruction cache has been warmed
2070 jmp freq_tsc_loop
2072 freq_tsc_increase_count:
2073 shll $1, %ebx
2074 jc freq_tsc_too_fast
2076 movl %ebx, %ecx
2078 / the instruction cache has been warmed
2081 jmp freq_tsc_loop
2083 freq_tsc_pit_did_not_wrap:
2084 roll $16, %eax
2086 cmpw $0x2000, %ax
2087 notw %ax
2088 jb freq_tsc_sufficient_duration
2090 freq_tsc_calculate:
2091 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2092 / then on the second CLK pulse the CE is decremented, therefore mode 0
2093 / is really a (count + 1) counter, ugh
2094 xorl %esi, %esi
2095 movw %ax, %si
2096 incl %esi
2098 movl $0xf000, %eax
2099 mull %ebx
2101 / tuck away (target_pit_count * loop_count)
2102 movl %edx, %ecx
2103 movl %eax, %ebx
2105 movl %esi, %eax
2106 movl $0xffffffff, %edx
2107 mull %edx
2109 addl %esi, %eax
2110 adcl $0, %edx
2112 cmpl %ecx, %edx
2113 ja freq_tsc_div_safe
2114 jb freq_tsc_too_fast
2116 cmpl %ebx, %eax
2117 jbe freq_tsc_too_fast
2119 freq_tsc_div_safe:
2120 movl %ecx, %edx
2121 movl %ebx, %eax
2123 movl %esi, %ecx
2124 divl %ecx
2126 movl %eax, %ecx
2128 / the instruction cache has been warmed
2131 jmp freq_tsc_loop
2133 freq_tsc_sufficient_duration:
2134 / test to see if the icache has been warmed
2135 btl $16, %eax
2136 jnc freq_tsc_calculate
2138 / recall mode 0 is a (count + 1) counter
2139 andl $0xffff, %eax
2140 incl %eax
2142 / save the number of PIT counts
2143 movl 8(%ebp), %ebx
2144 movl %eax, (%ebx)
2146 / calculate the number of TS's that elapsed
2147 movl %ecx, %eax
2148 subl %esi, %eax
2149 sbbl %edi, %edx
2151 jmp freq_tsc_end
2153 freq_tsc_too_fast:
2154 / return 0 as a 64 bit quantity
2155 xorl %eax, %eax
2156 xorl %edx, %edx
2158 freq_tsc_end:
2159 popl %ebx
2160 popl %esi
2161 popl %edi
2162 popl %ebp
2164 SET_SIZE(freq_tsc)
2166 #endif /* __i386 */
2167 #endif /* __lint */
2169 #if !defined(__amd64)
2170 #if defined(__lint)
2173 * We do not have a TSC so we use a block of instructions with well known
2174 * timings.
2177 /*ARGSUSED*/
2178 uint64_t
2179 freq_notsc(uint32_t *pit_counter)
2181 return (0);
2184 #else /* __lint */
2185 ENTRY_NP(freq_notsc)
2186 pushl %ebp
2187 movl %esp, %ebp
2188 pushl %edi
2189 pushl %esi
2190 pushl %ebx
2192 / initial count for the idivl loop
2193 movl $0x1000, %ecx
2195 / load the divisor
2196 movl $1, %ebx
2198 jmp freq_notsc_loop
2200 .align 16
2201 freq_notsc_loop:
2202 / set high 32 bits of dividend to zero
2203 xorl %edx, %edx
2205 / save the loop count in %edi
2206 movl %ecx, %edi
2208 / initialize the PIT counter and start a count down
2209 movb $PIT_LOADMODE, %al
2210 outb $PITCTL_PORT
2211 movb $0xff, %al
2212 outb $PITCTR0_PORT
2213 outb $PITCTR0_PORT
2215 / set low 32 bits of dividend to zero
2216 xorl %eax, %eax
2218 / It is vital that the arguments to idivl be set appropriately because on some
2219 / cpu's this instruction takes more or less clock ticks depending on its
2220 / arguments.
2221 freq_notsc_perf_loop:
2222 idivl %ebx
2223 idivl %ebx
2224 idivl %ebx
2225 idivl %ebx
2226 idivl %ebx
2227 loop freq_notsc_perf_loop
2229 / latch the PIT counter and status
2230 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
2231 outb $PITCTL_PORT
2233 / read the PIT status
2234 inb $PITCTR0_PORT
2235 shll $8, %eax
2237 / read PIT count
2238 inb $PITCTR0_PORT
2239 shll $8, %eax
2240 inb $PITCTR0_PORT
2241 bswap %eax
2243 / check to see if the PIT count was loaded into the CE
2244 btw $_CONST(PITSTAT_NULLCNT+8), %ax
2245 jc freq_notsc_increase_count
2247 / check to see if PIT counter wrapped
2248 btw $_CONST(PITSTAT_OUTPUT+8), %ax
2249 jnc freq_notsc_pit_did_not_wrap
2251 / halve count
2252 shrl $1, %edi
2253 movl %edi, %ecx
2255 jmp freq_notsc_loop
2257 freq_notsc_increase_count:
2258 shll $1, %edi
2259 jc freq_notsc_too_fast
2261 movl %edi, %ecx
2263 jmp freq_notsc_loop
2265 freq_notsc_pit_did_not_wrap:
2266 shrl $16, %eax
2268 cmpw $0x2000, %ax
2269 notw %ax
2270 jb freq_notsc_sufficient_duration
2272 freq_notsc_calculate:
2273 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2274 / then on the second CLK pulse the CE is decremented, therefore mode 0
2275 / is really a (count + 1) counter, ugh
2276 xorl %esi, %esi
2277 movw %ax, %si
2278 incl %esi
2280 movl %edi, %eax
2281 movl $0xf000, %ecx
2282 mull %ecx
2284 / tuck away (target_pit_count * loop_count)
2285 movl %edx, %edi
2286 movl %eax, %ecx
2288 movl %esi, %eax
2289 movl $0xffffffff, %edx
2290 mull %edx
2292 addl %esi, %eax
2293 adcl $0, %edx
2295 cmpl %edi, %edx
2296 ja freq_notsc_div_safe
2297 jb freq_notsc_too_fast
2299 cmpl %ecx, %eax
2300 jbe freq_notsc_too_fast
2302 freq_notsc_div_safe:
2303 movl %edi, %edx
2304 movl %ecx, %eax
2306 movl %esi, %ecx
2307 divl %ecx
2309 movl %eax, %ecx
2311 jmp freq_notsc_loop
2313 freq_notsc_sufficient_duration:
2314 / recall mode 0 is a (count + 1) counter
2315 incl %eax
2317 / save the number of PIT counts
2318 movl 8(%ebp), %ebx
2319 movl %eax, (%ebx)
2321 / calculate the number of cpu clock ticks that elapsed
2322 cmpl $X86_VENDOR_Cyrix, x86_vendor
2323 jz freq_notsc_notcyrix
2325 / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
2326 movl $86, %eax
2327 jmp freq_notsc_calculate_tsc
2329 freq_notsc_notcyrix:
2330 / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
2331 movl $237, %eax
2333 freq_notsc_calculate_tsc:
2334 mull %edi
2336 jmp freq_notsc_end
2338 freq_notsc_too_fast:
2339 / return 0 as a 64 bit quantity
2340 xorl %eax, %eax
2341 xorl %edx, %edx
2343 freq_notsc_end:
2344 popl %ebx
2345 popl %esi
2346 popl %edi
2347 popl %ebp
2350 SET_SIZE(freq_notsc)
2352 #endif /* __lint */
2353 #endif /* !__amd64 */
2355 #if !defined(__lint)
2356 .data
2357 #if !defined(__amd64)
2358 .align 4
2359 cpu_vendor:
2360 .long 0, 0, 0 /* Vendor ID string returned */
2362 .globl CyrixInstead
2364 .globl x86_featureset
2365 .globl x86_type
2366 .globl x86_vendor
2367 #endif
2369 #endif /* __lint */