usr/src/uts/i86pc/ml/syscall_asm.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2016 by Delphix. All rights reserved.
  24  */
  25
  26 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
  28 /*        All Rights Reserved                                   */
  29
  30 /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
  31 /*        All Rights Reserved                                   */
  32
  33 #include <sys/asm_linkage.h>
  34 #include <sys/asm_misc.h>
  35 #include <sys/regset.h>
  36 #include <sys/psw.h>
  37 #include <sys/x86_archext.h>
  38 #include <sys/machbrand.h>
  39 #include <sys/privregs.h>
  40
  41 #if defined(__lint)
  42
  43 #include <sys/types.h>
  44 #include <sys/thread.h>
  45 #include <sys/systm.h>
  46
  47 #else   /* __lint */
  48
  49 #include <sys/segments.h>
  50 #include <sys/pcb.h>
  51 #include <sys/trap.h>
  52 #include <sys/ftrace.h>
  53 #include <sys/traptrace.h>
  54 #include <sys/clock.h>
  55 #include <sys/panic.h>
  56 #include "assym.h"
  57
  58 #endif  /* __lint */
  59
  60 /*
  61  * We implement two flavours of system call entry points
  62  *
  63  * -    {int,lcall}/iret        (i386)
  64  * -    sysenter/sysexit        (Pentium II and beyond)
  65  *
  66  * The basic pattern used in the handlers is to check to see if we can
  67  * do fast (simple) version of the system call; if we can't we use various
  68  * C routines that handle corner cases and debugging.
  69  *
  70  * To reduce the amount of assembler replication, yet keep the system call
  71  * implementations vaguely comprehensible, the common code in the body
  72  * of the handlers is broken up into a set of preprocessor definitions
  73  * below.
  74  */
  75
  76 /*
  77  * When we have SYSCALLTRACE defined, we sneak an extra
  78  * predicate into a couple of tests.
  79  */
  80 #if defined(SYSCALLTRACE)
  81 #define ORL_SYSCALLTRACE(r32)   \
  82         orl     syscalltrace, r32
  83 #else
  84 #define ORL_SYSCALLTRACE(r32)
  85 #endif
  86
  87 /*
  88  * This check is false whenever we want to go fast i.e.
  89  *
  90  *      if (code >= NSYSCALL ||
  91  *          t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
  92  *              do full version
  93  * #ifdef SYSCALLTRACE
  94  *      if (syscalltrace)
  95  *              do full version
  96  * #endif
  97  *
  98  * Preconditions:
  99  * -    t       curthread
 100  * -    code    contains the syscall number
 101  * Postconditions:
 102  * -    %ecx and %edi are smashed
 103  * -    condition code flag ZF is cleared if pre-sys is too complex
 104  */
 105 #define CHECK_PRESYS_NE(t, code)                \
 106         movzbl  T_PRE_SYS(t), %edi;             \
 107         movzwl  T_PROC_FLAG(t), %ecx;           \
 108         andl    $TP_WATCHPT, %ecx;              \
 109         orl     %ecx, %edi;                     \
 110         cmpl    $NSYSCALL, code;                \
 111         setae   %cl;                            \
 112         movzbl  %cl, %ecx;                      \
 113         orl     %ecx, %edi;                     \
 114         ORL_SYSCALLTRACE(%edi)
 115
 116 /*
 117  * Check if a brand_mach_ops callback is defined for the specified callback_id
 118  * type.  If so invoke it with the user's %gs value loaded and the following
 119  * data on the stack:
 120  *         --------------------------------------
 121  *         | user's %ss                         |
 122  *    |    | user's %esp                        |
 123  *    |    | EFLAGS register                    |
 124  *    |    | user's %cs                         |
 125  *    |    | user's %eip (user return address)  |
 126  *    |    | 'scratch space'                    |
 127  *    |    | user's %ebx                        |
 128  *    |    | user's %gs selector                |
 129  *    v    | lwp pointer                        |
 130  *         | callback wrapper return addr       |
 131  *         --------------------------------------
 132  *
 133  * If the brand code returns, we assume that we are meant to execute the
 134  * normal system call path.
 135  *
 136  * The interface to the brand callbacks on the 32-bit kernel assumes %ebx
 137  * is available as a scratch register within the callback.  If the callback
 138  * returns within the kernel then this macro will restore %ebx.  If the
 139  * callback is going to return directly to userland then it should restore
 140  * %ebx before returning to userland.
 141  */
 142 #define BRAND_CALLBACK(callback_id)                                         \
 143         subl    $4, %esp                /* save some scratch space      */ ;\
 144         pushl   %ebx                    /* save %ebx to use for scratch */ ;\
 145         pushl   %gs                     /* save the user %gs            */ ;\
 146         movl    $KGS_SEL, %ebx                                             ;\
 147         movw    %bx, %gs                /* switch to the kernel's %gs   */ ;\
 148         movl    %gs:CPU_THREAD, %ebx    /* load the thread pointer      */ ;\
 149         movl    T_LWP(%ebx), %ebx       /* load the lwp pointer         */ ;\
 150         pushl   %ebx                    /* push the lwp pointer         */ ;\
 151         movl    LWP_PROCP(%ebx), %ebx   /* load the proc pointer        */ ;\
 152         movl    P_BRAND(%ebx), %ebx     /* load the brand pointer       */ ;\
 153         movl    B_MACHOPS(%ebx), %ebx   /* load the machops pointer     */ ;\
 154         movl    _CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx            ;\
 155         cmpl    $0, %ebx                                                   ;\
 156         je      1f                                                         ;\
 157         movl    %ebx, 12(%esp)          /* save callback to scratch     */ ;\
 158         movl    4(%esp), %ebx           /* grab the user %gs            */ ;\
 159         movw    %bx, %gs                /* restore the user %gs         */ ;\
 160         call    *12(%esp)               /* call callback in scratch     */ ;\
 161 1:      movl    4(%esp), %ebx           /* restore user %gs (re-do if   */ ;\
 162         movw    %bx, %gs                /* branch due to no callback)   */ ;\
 163         movl    8(%esp), %ebx           /* restore user's %ebx          */ ;\
 164         addl    $16, %esp               /* restore stack ptr            */
 165
 166 #define MSTATE_TRANSITION(from, to)             \
 167         pushl   $to;                            \
 168         pushl   $from;                          \
 169         call    syscall_mstate;                 \
 170         addl    $0x8, %esp
 171
 172 /*
 173  * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
 174  * This must be called with interrupts or preemption disabled.
 175  */
 176 #define CPU_STATS_SYS_SYSCALL_INC                       \
 177         addl    $1, %gs:CPU_STATS_SYS_SYSCALL;          \
 178         adcl    $0, %gs:CPU_STATS_SYS_SYSCALL+4;
 179
 180 #if !defined(__lint)
 181
 182 /*
 183  * ASSERT(lwptoregs(lwp) == rp);
 184  *
 185  * this may seem obvious, but very odd things happen if this
 186  * assertion is false
 187  *
 188  * Preconditions:
 189  *      -none-
 190  * Postconditions (if assertion is true):
 191  *      %esi and %edi are smashed
 192  */
 193 #if defined(DEBUG)
 194
 195 __lwptoregs_msg:
 196         .string "syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
 197
 198 #define ASSERT_LWPTOREGS(t, rp)                         \
 199         movl    T_LWP(t), %esi;                         \
 200         movl    LWP_REGS(%esi), %edi;                   \
 201         cmpl    rp, %edi;                               \
 202         je      7f;                                     \
 203         pushl   rp;                                     \
 204         pushl   %edi;                                   \
 205         pushl   %esi;                                   \
 206         pushl   $__LINE__;                              \
 207         pushl   $__lwptoregs_msg;                       \
 208         call    panic;                                  \
 209 7:
 210 #else
 211 #define ASSERT_LWPTOREGS(t, rp)
 212 #endif
 213
 214 #endif  /* __lint */
 215
 216 /*
 217  * This is an assembler version of this fragment:
 218  *
 219  * lwp->lwp_state = LWP_SYS;
 220  * lwp->lwp_ru.sysc++;
 221  * lwp->lwp_eosys = NORMALRETURN;
 222  * lwp->lwp_ap = argp;
 223  *
 224  * Preconditions:
 225  *      -none-
 226  * Postconditions:
 227  *      -none-
 228  */
 229 #define SET_LWP(lwp, argp)                              \
 230         movb    $LWP_SYS, LWP_STATE(lwp);               \
 231         addl    $1, LWP_RU_SYSC(lwp);                   \
 232         adcl    $0, LWP_RU_SYSC+4(lwp);                 \
 233         movb    $NORMALRETURN, LWP_EOSYS(lwp);          \
 234         movl    argp, LWP_AP(lwp)
 235
 236 /*
 237  * Set up the thread, lwp, find the handler, and copy
 238  * in the arguments from userland to the kernel stack.
 239  *
 240  * Preconditions:
 241  * -    %eax contains the syscall number
 242  * Postconditions:
 243  * -    %eax contains a pointer to the sysent structure
 244  * -    %ecx is zeroed
 245  * -    %esi, %edi are smashed
 246  * -    %esp is SYS_DROPped ready for the syscall
 247  */
 248 #define SIMPLE_SYSCALL_PRESYS(t, faultlabel)            \
 249         movl    T_LWP(t), %esi;                         \
 250         movw    %ax, T_SYSNUM(t);                       \
 251         subl    $SYS_DROP, %esp;                        \
 252         shll    $SYSENT_SIZE_SHIFT, %eax;                       \
 253         SET_LWP(%esi, %esp);                            \
 254         leal    sysent(%eax), %eax;                     \
 255         movzbl  SY_NARG(%eax), %ecx;                    \
 256         testl   %ecx, %ecx;                             \
 257         jz      4f;                                     \
 258         movl    %esp, %edi;                             \
 259         movl    SYS_DROP + REGOFF_UESP(%esp), %esi;     \
 260         movl    $faultlabel, T_LOFAULT(t);              \
 261         addl    $4, %esi;                               \
 262         rep;                                            \
 263           smovl;                                        \
 264         movl    %ecx, T_LOFAULT(t);                     \
 265 4:
 266
 267 /*
 268  * Check to see if a simple return is possible i.e.
 269  *
 270  *      if ((t->t_post_sys_ast | syscalltrace) != 0)
 271  *              do full version;
 272  *
 273  * Preconditions:
 274  * -    t is curthread
 275  * Postconditions:
 276  * -    condition code NE is set if post-sys is too complex
 277  * -    rtmp is zeroed if it isn't (we rely on this!)
 278  */
 279 #define CHECK_POSTSYS_NE(t, rtmp)                       \
 280         xorl    rtmp, rtmp;                             \
 281         ORL_SYSCALLTRACE(rtmp);                         \
 282         orl     T_POST_SYS_AST(t), rtmp;                \
 283         cmpl    $0, rtmp
 284
 285 /*
 286  * Fix up the lwp, thread, and eflags for a successful return
 287  *
 288  * Preconditions:
 289  * -    zwreg contains zero
 290  * Postconditions:
 291  * -    %esp has been unSYS_DROPped
 292  * -    %esi is smashed (points to lwp)
 293  */
 294 #define SIMPLE_SYSCALL_POSTSYS(t, zwreg)                \
 295         movl    T_LWP(t), %esi;                         \
 296         addl    $SYS_DROP, %esp;                        \
 297         movw    zwreg, T_SYSNUM(t);                     \
 298         movb    $LWP_USER, LWP_STATE(%esi);             \
 299         andb    $_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
 300
 301 /*
 302  * System call handler.  This is the destination of both the call
 303  * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
 304  * there are two significant differences between an interrupt gate and a call
 305  * gate:
 306  *
 307  * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
 308  * call gate runs the handler with whatever EFLAGS settings were in effect at
 309  * the time of the call.
 310  *
 311  * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
 312  * of the interrupt onto the stack, whereas a call gate does not.
 313  *
 314  * Because we use the following code sequence to handle system calls made from
 315  * _both_ a call gate _and_ an interrupt gate, these two differences must be
 316  * respected. In regards to number 1) above, the handler must ensure that a sane
 317  * EFLAGS snapshot is stored on the stack so that when the kernel returns back
 318  * to the user via iret (which returns to user with the EFLAGS value saved on
 319  * the stack), interrupts are re-enabled.
 320  *
 321  * In regards to number 2) above, the handler must always put a current snapshot
 322  * of EFLAGS onto the stack in the appropriate place. If we came in via an
 323  * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
 324  * the interrupt gate. This is OK, as the only bit that was changed by the
 325  * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
 326  * now off. If we were to do nothing, the stack would contain an EFLAGS with
 327  * IE off, resulting in us eventually returning back to the user with interrupts
 328  * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
 329  * the stack.
 330  *
 331  * Another subtlety which deserves mention is the difference between the two
 332  * descriptors. The call gate descriptor is set to instruct the hardware to copy
 333  * one parameter from the user stack to the kernel stack, whereas the interrupt
 334  * gate descriptor doesn't use the parameter passing mechanism at all. The
 335  * kernel doesn't actually use the parameter that is copied by the hardware; the
 336  * only reason it does this is so that there is a space on the stack large
 337  * enough to hold an EFLAGS register value, which happens to be in the correct
 338  * place for use by iret when we go back to userland. How convenient.
 339  *
 340  * Stack frame description in syscall() and callees.
 341  *
 342  * |------------|
 343  * | regs       | +(8*4)+4      registers
 344  * |------------|
 345  * | 8 args     | <- %esp       MAXSYSARGS (currently 8) arguments
 346  * |------------|
 347  *
 348  */
 349 #define SYS_DROP        _CONST(_MUL(MAXSYSARGS, 4))
 350
 351 #if defined(__lint)
 352
 353 /*ARGSUSED*/
 354 void
 355 sys_call()
 356 {}
 357
 358 void
 359 _allsyscalls()
 360 {}
 361
 362 size_t _allsyscalls_size;
 363
 364 #else   /* __lint */
 365
 366         ENTRY_NP2(brand_sys_call, _allsyscalls)
 367         BRAND_CALLBACK(BRAND_CB_SYSCALL)
 368
 369         ALTENTRY(sys_call)
 370         / on entry      eax = system call number
 371
 372         / set up the stack to look as in reg.h
 373         subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
 374
 375         SYSCALL_PUSH
 376
 377 #ifdef TRAPTRACE
 378         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
 379         TRACE_REGS(%edi, %esp, %ebx, %ecx)      / Uses label "9"
 380         pushl   %eax
 381         TRACE_STAMP(%edi)               / Clobbers %eax, %edx, uses "9"
 382         popl    %eax
 383         movl    %eax, TTR_SYSNUM(%edi)
 384 #endif
 385
 386 _watch_do_syscall:
 387         movl    %esp, %ebp
 388
 389         / Interrupts may be enabled here, so we must make sure this thread
 390         / doesn't migrate off the CPU while it updates the CPU stats.
 391         /
 392         / XXX This is only true if we got here via call gate thru the LDT for
 393         / old style syscalls. Perhaps this preempt++-- will go away soon?
 394         movl    %gs:CPU_THREAD, %ebx
 395         addb    $1, T_PREEMPT(%ebx)
 396         CPU_STATS_SYS_SYSCALL_INC
 397         subb    $1, T_PREEMPT(%ebx)
 398
 399         ENABLE_INTR_FLAGS
 400
 401         pushl   %eax                            / preserve across mstate call
 402         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 403         popl    %eax
 404
 405         movl    %gs:CPU_THREAD, %ebx
 406
 407         ASSERT_LWPTOREGS(%ebx, %esp)
 408
 409         CHECK_PRESYS_NE(%ebx, %eax)
 410         jne     _full_syscall_presys
 411         SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
 412
 413 _syslcall_call:
 414         call    *SY_CALLC(%eax)
 415
 416 _syslcall_done:
 417         CHECK_POSTSYS_NE(%ebx, %ecx)
 418         jne     _full_syscall_postsys
 419         SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
 420         movl    %eax, REGOFF_EAX(%esp)
 421         movl    %edx, REGOFF_EDX(%esp)
 422
 423         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 424
 425         /
 426         / get back via iret
 427         /
 428         CLI(%edx)
 429         jmp     sys_rtt_syscall
 430
 431 _full_syscall_presys:
 432         movl    T_LWP(%ebx), %esi
 433         subl    $SYS_DROP, %esp
 434         movb    $LWP_SYS, LWP_STATE(%esi)
 435         pushl   %esp
 436         pushl   %ebx
 437         call    syscall_entry
 438         addl    $8, %esp
 439         jmp     _syslcall_call
 440
 441 _full_syscall_postsys:
 442         addl    $SYS_DROP, %esp
 443         pushl   %edx
 444         pushl   %eax
 445         pushl   %ebx
 446         call    syscall_exit
 447         addl    $12, %esp
 448         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 449         jmp     _sys_rtt
 450
 451 _syscall_fault:
 452         push    $0xe                    / EFAULT
 453         call    set_errno
 454         addl    $4, %esp
 455         xorl    %eax, %eax              / fake syscall_err()
 456         xorl    %edx, %edx
 457         jmp     _syslcall_done
 458         SET_SIZE(sys_call)
 459         SET_SIZE(brand_sys_call)
 460
 461 #endif  /* __lint */
 462
 463 /*
 464  * System call handler via the sysenter instruction
 465  *
 466  * Here's how syscall entry usually works (see sys_call for details).
 467  *
 468  * There, the caller (lcall or int) in userland has arranged that:
 469  *
 470  * -    %eax contains the syscall number
 471  * -    the user stack contains the args to the syscall
 472  *
 473  * Normally the lcall instruction into the call gate causes the processor
 474  * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
 475  * The sys_call handler then leaves space for r_trapno and r_err, and
 476  * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
 477  * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
 478  * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
 479  * extracts %efl and puts it into r_efl (which happens to live at the offset
 480  * that <top-of-stack> was copied into). Note that the value in r_efl has
 481  * the IF (interrupt enable) flag turned on. (The int instruction into the
 482  * interrupt gate does essentially the same thing, only instead of
 483  * <top-of-stack> we get eflags - see comment above.)
 484  *
 485  * In the sysenter case, things are a lot more primitive.
 486  *
 487  * The caller in userland has arranged that:
 488  *
 489  * -    %eax contains the syscall number
 490  * -    %ecx contains the user %esp
 491  * -    %edx contains the return %eip
 492  * -    the user stack contains the args to the syscall
 493  *
 494  * e.g.
 495  *      <args on the stack>
 496  *      mov     $SYS_callnum, %eax
 497  *      mov     $1f, %edx       / return %eip
 498  *      mov     %esp, %ecx      / return %esp
 499  *      sysenter
 500  * 1:
 501  *
 502  * Hardware and (privileged) initialization code have arranged that by
 503  * the time the sysenter instructions completes:
 504  *
 505  * - %eip is pointing to sys_sysenter (below).
 506  * - %cs and %ss are set to kernel text and stack (data) selectors.
 507  * - %esp is pointing at the lwp's stack
 508  * - Interrupts have been disabled.
 509  *
 510  * The task for the sysenter handler is:
 511  *
 512  * -    recreate the same regs structure on the stack and the same
 513  *      kernel state as if we'd come in on an lcall
 514  * -    do the normal work of a syscall
 515  * -    execute the system call epilogue, use sysexit to return to userland.
 516  *
 517  * Note that we are unable to return both "rvals" to userland with this
 518  * call, as %edx is used by the sysexit instruction.
 519  *
 520  * One final complication in this routine is its interaction with
 521  * single-stepping in a debugger.  For most of the system call mechanisms,
 522  * the CPU automatically clears the single-step flag before we enter the
 523  * kernel.  The sysenter mechanism does not clear the flag, so a user
 524  * single-stepping through a libc routine may suddenly find themself
 525  * single-stepping through the kernel.  To detect this, kmdb compares the
 526  * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
 527  * If it finds that we have single-stepped to a sysenter entry point, it
 528  * explicitly clears the flag and executes the sys_sysenter routine.
 529  *
 530  * One final complication in this final complication is the fact that we
 531  * have two different entry points for sysenter: brand_sys_sysenter and
 532  * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
 533  * through the kernel with kmdb, we will eventually hit the instruction at
 534  * sys_sysenter.  kmdb cannot distinguish between that valid single-step
 535  * and the undesirable one mentioned above.  To avoid this situation, we
 536  * simply add a jump over the instruction at sys_sysenter to make it
 537  * impossible to single-step to it.
 538  */
 539 #if defined(__lint)
 540
 541 void
 542 sys_sysenter()
 543 {}
 544
 545 #else   /* __lint */
 546
 547         ENTRY_NP(brand_sys_sysenter)
 548         pushl   %edx
 549         BRAND_CALLBACK(BRAND_CB_SYSENTER)
 550         popl    %edx
 551         /*
 552          * Jump over sys_sysenter to allow single-stepping as described
 553          * above.
 554          */
 555         ja      1f
 556
 557         ALTENTRY(sys_sysenter)
 558         nop
 559 1:
 560         /
 561         / do what the call gate would've done to the stack ..
 562         /
 563         pushl   $UDS_SEL        / (really %ss, but it's the same ..)
 564         pushl   %ecx            / userland makes this a copy of %esp
 565         pushfl
 566         orl     $PS_IE, (%esp)  / turn interrupts on when we return to user
 567         pushl   $UCS_SEL
 568         pushl   %edx            / userland makes this a copy of %eip
 569         /
 570         / done.  finish building the stack frame
 571         /
 572         subl    $8, %esp        / leave space for ERR and TRAPNO
 573
 574         SYSENTER_PUSH
 575
 576 #ifdef TRAPTRACE
 577         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER) / uses labels 8 and 9
 578         TRACE_REGS(%edi, %esp, %ebx, %ecx)              / uses label 9
 579         pushl   %eax
 580         TRACE_STAMP(%edi)               / clobbers %eax, %edx, uses label 9
 581         popl    %eax
 582         movl    %eax, TTR_SYSNUM(%edi)
 583 #endif
 584         movl    %esp, %ebp
 585
 586         CPU_STATS_SYS_SYSCALL_INC
 587
 588         ENABLE_INTR_FLAGS
 589
 590         pushl   %eax                            / preserve across mstate call
 591         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 592         popl    %eax
 593
 594         movl    %gs:CPU_THREAD, %ebx
 595
 596         ASSERT_LWPTOREGS(%ebx, %esp)
 597
 598         CHECK_PRESYS_NE(%ebx, %eax)
 599         jne     _full_syscall_presys
 600         SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
 601
 602 _sysenter_call:
 603         call    *SY_CALLC(%eax)
 604
 605 _sysenter_done:
 606         CHECK_POSTSYS_NE(%ebx, %ecx)
 607         jne     _full_syscall_postsys
 608         SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
 609         /
 610         / sysexit uses %edx to restore %eip, so we can't use it
 611         / to return a value, sigh.
 612         /
 613         movl    %eax, REGOFF_EAX(%esp)
 614         / movl  %edx, REGOFF_EDX(%esp)
 615
 616         / Interrupts will be turned on by the 'sti' executed just before
 617         / sysexit. The following ensures that restoring the user's EFLAGS
 618         / doesn't enable interrupts too soon.
 619         andl    $_BITNOT(PS_IE), REGOFF_EFL(%esp)
 620
 621         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 622
 623         cli
 624
 625         SYSCALL_POP
 626
 627         popl    %edx                    / sysexit: %edx -> %eip
 628         addl    $4, %esp                / get CS off the stack
 629         popfl                           / EFL
 630         popl    %ecx                    / sysexit: %ecx -> %esp
 631         sti
 632         sysexit
 633         SET_SIZE(sys_sysenter)
 634         SET_SIZE(brand_sys_sysenter)
 635
 636 /*
 637  * Declare a uintptr_t which covers the entire pc range of syscall
 638  * handlers for the stack walkers that need this.
 639  */
 640         .align  CPTRSIZE
 641         .globl  _allsyscalls_size
 642         .type   _allsyscalls_size, @object
 643 _allsyscalls_size:
 644         .NWORD  . - _allsyscalls
 645         SET_SIZE(_allsyscalls_size)
 646
 647 #endif  /* __lint */
 648
 649 /*
 650  * These are the thread context handlers for lwps using sysenter/sysexit.
 651  */
 652
 653 #if defined(__lint)
 654
 655 /*ARGSUSED*/
 656 void
 657 sep_save(void *ksp)
 658 {}
 659
 660 /*ARGSUSED*/
 661 void
 662 sep_restore(void *ksp)
 663 {}
 664
 665 #else   /* __lint */
 666
 667         /*
 668          * setting this value to zero as we switch away causes the
 669          * stack-pointer-on-sysenter to be NULL, ensuring that we
 670          * don't silently corrupt another (preempted) thread stack
 671          * when running an lwp that (somehow) didn't get sep_restore'd
 672          */
 673         ENTRY_NP(sep_save)
 674         xorl    %edx, %edx
 675         xorl    %eax, %eax
 676         movl    $MSR_INTC_SEP_ESP, %ecx
 677         wrmsr
 678         ret
 679         SET_SIZE(sep_save)
 680
 681         /*
 682          * Update the kernel stack pointer as we resume onto this cpu.
 683          */
 684         ENTRY_NP(sep_restore)
 685         movl    4(%esp), %eax                   /* per-lwp kernel sp */
 686         xorl    %edx, %edx
 687         movl    $MSR_INTC_SEP_ESP, %ecx
 688         wrmsr
 689         ret
 690         SET_SIZE(sep_restore)
 691
 692 #endif  /* __lint */
 693
 694 /*
 695  * Call syscall().  Called from trap() on watchpoint at lcall 0,7
 696  */
 697
 698 #if defined(__lint)
 699
 700 void
 701 watch_syscall(void)
 702 {}
 703
 704 #else   /* __lint */
 705
 706         ENTRY_NP(watch_syscall)
 707         CLI(%eax)
 708         movl    %gs:CPU_THREAD, %ebx
 709         movl    T_STACK(%ebx), %esp             / switch to the thread stack
 710         movl    REGOFF_EAX(%esp), %eax          / recover original syscall#
 711         jmp     _watch_do_syscall
 712         SET_SIZE(watch_syscall)
 713
 714 #endif  /* __lint */