2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
21 #include <linux/kernel.h>
23 #include <linux/elfcore.h>
24 #include <linux/smp.h>
25 #include <linux/slab.h>
26 #include <linux/user.h>
27 #include <linux/interrupt.h>
28 #include <linux/delay.h>
29 #include <linux/module.h>
30 #include <linux/ptrace.h>
31 #include <linux/notifier.h>
32 #include <linux/kprobes.h>
33 #include <linux/kdebug.h>
34 #include <linux/prctl.h>
35 #include <linux/uaccess.h>
37 #include <linux/ftrace.h>
39 #include <asm/pgtable.h>
40 #include <asm/processor.h>
42 #include <asm/fpu-internal.h>
43 #include <asm/mmu_context.h>
44 #include <asm/prctl.h>
46 #include <asm/proto.h>
49 #include <asm/syscalls.h>
50 #include <asm/debugreg.h>
51 #include <asm/switch_to.h>
52 #include <asm/xen/hypervisor.h>
54 asmlinkage
extern void ret_from_fork(void);
56 asmlinkage
DEFINE_PER_CPU(unsigned long, old_rsp
);
58 /* Prints also some state that isn't saved in the pt_regs */
59 void __show_regs(struct pt_regs
*regs
, int all
)
61 unsigned long cr0
= 0L, cr2
= 0L, cr3
= 0L, cr4
= 0L, fs
, gs
, shadowgs
;
62 unsigned long d0
, d1
, d2
, d3
, d6
, d7
;
63 unsigned int fsindex
, gsindex
;
64 unsigned int ds
, cs
, es
;
66 printk(KERN_DEFAULT
"RIP: %04lx:[<%016lx>] ", regs
->cs
& 0xffff, regs
->ip
);
67 printk_address(regs
->ip
);
68 printk(KERN_DEFAULT
"RSP: %04lx:%016lx EFLAGS: %08lx\n", regs
->ss
,
69 regs
->sp
, regs
->flags
);
70 printk(KERN_DEFAULT
"RAX: %016lx RBX: %016lx RCX: %016lx\n",
71 regs
->ax
, regs
->bx
, regs
->cx
);
72 printk(KERN_DEFAULT
"RDX: %016lx RSI: %016lx RDI: %016lx\n",
73 regs
->dx
, regs
->si
, regs
->di
);
74 printk(KERN_DEFAULT
"RBP: %016lx R08: %016lx R09: %016lx\n",
75 regs
->bp
, regs
->r8
, regs
->r9
);
76 printk(KERN_DEFAULT
"R10: %016lx R11: %016lx R12: %016lx\n",
77 regs
->r10
, regs
->r11
, regs
->r12
);
78 printk(KERN_DEFAULT
"R13: %016lx R14: %016lx R15: %016lx\n",
79 regs
->r13
, regs
->r14
, regs
->r15
);
81 asm("movl %%ds,%0" : "=r" (ds
));
82 asm("movl %%cs,%0" : "=r" (cs
));
83 asm("movl %%es,%0" : "=r" (es
));
84 asm("movl %%fs,%0" : "=r" (fsindex
));
85 asm("movl %%gs,%0" : "=r" (gsindex
));
87 rdmsrl(MSR_FS_BASE
, fs
);
88 rdmsrl(MSR_GS_BASE
, gs
);
89 rdmsrl(MSR_KERNEL_GS_BASE
, shadowgs
);
99 printk(KERN_DEFAULT
"FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
100 fs
, fsindex
, gs
, gsindex
, shadowgs
);
101 printk(KERN_DEFAULT
"CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs
, ds
,
103 printk(KERN_DEFAULT
"CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2
, cr3
,
113 /* Only print out debug registers if they are in their non-default state. */
114 if ((d0
== 0) && (d1
== 0) && (d2
== 0) && (d3
== 0) &&
115 (d6
== DR6_RESERVED
) && (d7
== 0x400))
118 printk(KERN_DEFAULT
"DR0: %016lx DR1: %016lx DR2: %016lx\n", d0
, d1
, d2
);
119 printk(KERN_DEFAULT
"DR3: %016lx DR6: %016lx DR7: %016lx\n", d3
, d6
, d7
);
123 void release_thread(struct task_struct
*dead_task
)
126 if (dead_task
->mm
->context
.ldt
) {
127 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
129 dead_task
->mm
->context
.ldt
,
130 dead_task
->mm
->context
.ldt
->size
);
136 static inline void set_32bit_tls(struct task_struct
*t
, int tls
, u32 addr
)
138 struct user_desc ud
= {
145 struct desc_struct
*desc
= t
->thread
.tls_array
;
150 static inline u32
read_32bit_tls(struct task_struct
*t
, int tls
)
152 return get_desc_base(&t
->thread
.tls_array
[tls
]);
155 int copy_thread(unsigned long clone_flags
, unsigned long sp
,
156 unsigned long arg
, struct task_struct
*p
)
159 struct pt_regs
*childregs
;
160 struct task_struct
*me
= current
;
162 p
->thread
.sp0
= (unsigned long)task_stack_page(p
) + THREAD_SIZE
;
163 childregs
= task_pt_regs(p
);
164 p
->thread
.sp
= (unsigned long) childregs
;
165 p
->thread
.usersp
= me
->thread
.usersp
;
166 set_tsk_thread_flag(p
, TIF_FORK
);
168 p
->thread
.io_bitmap_ptr
= NULL
;
170 savesegment(gs
, p
->thread
.gsindex
);
171 p
->thread
.gs
= p
->thread
.gsindex
? 0 : me
->thread
.gs
;
172 savesegment(fs
, p
->thread
.fsindex
);
173 p
->thread
.fs
= p
->thread
.fsindex
? 0 : me
->thread
.fs
;
174 savesegment(es
, p
->thread
.es
);
175 savesegment(ds
, p
->thread
.ds
);
176 memset(p
->thread
.ptrace_bps
, 0, sizeof(p
->thread
.ptrace_bps
));
178 if (unlikely(p
->flags
& PF_KTHREAD
)) {
180 memset(childregs
, 0, sizeof(struct pt_regs
));
181 childregs
->sp
= (unsigned long)childregs
;
182 childregs
->ss
= __KERNEL_DS
;
183 childregs
->bx
= sp
; /* function */
185 childregs
->orig_ax
= -1;
186 childregs
->cs
= __KERNEL_CS
| get_kernel_rpl();
187 childregs
->flags
= X86_EFLAGS_IF
| X86_EFLAGS_FIXED
;
190 *childregs
= *current_pt_regs();
197 memset(p
->thread
.ptrace_bps
, 0, sizeof(p
->thread
.ptrace_bps
));
199 if (unlikely(test_tsk_thread_flag(me
, TIF_IO_BITMAP
))) {
200 p
->thread
.io_bitmap_ptr
= kmemdup(me
->thread
.io_bitmap_ptr
,
201 IO_BITMAP_BYTES
, GFP_KERNEL
);
202 if (!p
->thread
.io_bitmap_ptr
) {
203 p
->thread
.io_bitmap_max
= 0;
206 set_tsk_thread_flag(p
, TIF_IO_BITMAP
);
210 * Set a new TLS for the child thread?
212 if (clone_flags
& CLONE_SETTLS
) {
213 #ifdef CONFIG_IA32_EMULATION
214 if (test_thread_flag(TIF_IA32
))
215 err
= do_set_thread_area(p
, -1,
216 (struct user_desc __user
*)childregs
->si
, 0);
219 err
= do_arch_prctl(p
, ARCH_SET_FS
, childregs
->r8
);
225 if (err
&& p
->thread
.io_bitmap_ptr
) {
226 kfree(p
->thread
.io_bitmap_ptr
);
227 p
->thread
.io_bitmap_max
= 0;
234 start_thread_common(struct pt_regs
*regs
, unsigned long new_ip
,
235 unsigned long new_sp
,
236 unsigned int _cs
, unsigned int _ss
, unsigned int _ds
)
239 loadsegment(es
, _ds
);
240 loadsegment(ds
, _ds
);
242 current
->thread
.usersp
= new_sp
;
245 this_cpu_write(old_rsp
, new_sp
);
248 regs
->flags
= X86_EFLAGS_IF
;
252 start_thread(struct pt_regs
*regs
, unsigned long new_ip
, unsigned long new_sp
)
254 start_thread_common(regs
, new_ip
, new_sp
,
255 __USER_CS
, __USER_DS
, 0);
258 #ifdef CONFIG_IA32_EMULATION
259 void start_thread_ia32(struct pt_regs
*regs
, u32 new_ip
, u32 new_sp
)
261 start_thread_common(regs
, new_ip
, new_sp
,
262 test_thread_flag(TIF_X32
)
263 ? __USER_CS
: __USER32_CS
,
264 __USER_DS
, __USER_DS
);
269 * switch_to(x,y) should switch tasks from x to y.
271 * This could still be optimized:
272 * - fold all the options into a flag word and test it with a single test.
273 * - could test fs/gs bitsliced
275 * Kprobes not supported here. Set the probe on schedule instead.
276 * Function graph tracer not supported too.
278 __visible __notrace_funcgraph
struct task_struct
*
279 __switch_to(struct task_struct
*prev_p
, struct task_struct
*next_p
)
281 struct thread_struct
*prev
= &prev_p
->thread
;
282 struct thread_struct
*next
= &next_p
->thread
;
283 int cpu
= smp_processor_id();
284 struct tss_struct
*tss
= &per_cpu(init_tss
, cpu
);
285 unsigned fsindex
, gsindex
;
288 fpu
= switch_fpu_prepare(prev_p
, next_p
, cpu
);
291 * Reload esp0, LDT and the page table pointer:
297 * This won't pick up thread selector changes, but I guess that is ok.
299 savesegment(es
, prev
->es
);
300 if (unlikely(next
->es
| prev
->es
))
301 loadsegment(es
, next
->es
);
303 savesegment(ds
, prev
->ds
);
304 if (unlikely(next
->ds
| prev
->ds
))
305 loadsegment(ds
, next
->ds
);
308 /* We must save %fs and %gs before load_TLS() because
309 * %fs and %gs may be cleared by load_TLS().
311 * (e.g. xen_load_tls())
313 savesegment(fs
, fsindex
);
314 savesegment(gs
, gsindex
);
319 * Leave lazy mode, flushing any hypercalls made here.
320 * This must be done before restoring TLS segments so
321 * the GDT and LDT are properly updated, and must be
322 * done before math_state_restore, so the TS bit is up
325 arch_end_context_switch(next_p
);
330 * Segment register != 0 always requires a reload. Also
331 * reload when it has changed. When prev process used 64bit
332 * base always reload to avoid an information leak.
334 if (unlikely(fsindex
| next
->fsindex
| prev
->fs
)) {
335 loadsegment(fs
, next
->fsindex
);
337 * Check if the user used a selector != 0; if yes
338 * clear 64bit base, since overloaded base is always
339 * mapped to the Null selector
344 /* when next process has a 64bit base use it */
346 wrmsrl(MSR_FS_BASE
, next
->fs
);
347 prev
->fsindex
= fsindex
;
349 if (unlikely(gsindex
| next
->gsindex
| prev
->gs
)) {
350 load_gs_index(next
->gsindex
);
355 wrmsrl(MSR_KERNEL_GS_BASE
, next
->gs
);
356 prev
->gsindex
= gsindex
;
358 switch_fpu_finish(next_p
, fpu
);
361 * Switch the PDA and FPU contexts.
363 prev
->usersp
= this_cpu_read(old_rsp
);
364 this_cpu_write(old_rsp
, next
->usersp
);
365 this_cpu_write(current_task
, next_p
);
367 this_cpu_write(kernel_stack
,
368 (unsigned long)task_stack_page(next_p
) +
369 THREAD_SIZE
- KERNEL_STACK_OFFSET
);
372 * Now maybe reload the debug registers and handle I/O bitmaps
374 if (unlikely(task_thread_info(next_p
)->flags
& _TIF_WORK_CTXSW_NEXT
||
375 task_thread_info(prev_p
)->flags
& _TIF_WORK_CTXSW_PREV
))
376 __switch_to_xtra(prev_p
, next_p
, tss
);
380 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
381 * current_pt_regs()->flags may not match the current task's
382 * intended IOPL. We need to switch it manually.
384 if (unlikely(xen_pv_domain() &&
385 prev
->iopl
!= next
->iopl
))
386 xen_set_iopl_mask(next
->iopl
);
392 void set_personality_64bit(void)
394 /* inherit personality from parent */
396 /* Make sure to be in 64bit mode */
397 clear_thread_flag(TIF_IA32
);
398 clear_thread_flag(TIF_ADDR32
);
399 clear_thread_flag(TIF_X32
);
401 /* Ensure the corresponding mm is not marked. */
403 current
->mm
->context
.ia32_compat
= 0;
405 /* TBD: overwrites user setup. Should have two bits.
406 But 64bit processes have always behaved this way,
407 so it's not too bad. The main problem is just that
408 32bit childs are affected again. */
409 current
->personality
&= ~READ_IMPLIES_EXEC
;
412 void set_personality_ia32(bool x32
)
414 /* inherit personality from parent */
416 /* Make sure to be in 32bit mode */
417 set_thread_flag(TIF_ADDR32
);
419 /* Mark the associated mm as containing 32-bit tasks. */
421 current
->mm
->context
.ia32_compat
= 1;
424 clear_thread_flag(TIF_IA32
);
425 set_thread_flag(TIF_X32
);
426 current
->personality
&= ~READ_IMPLIES_EXEC
;
427 /* is_compat_task() uses the presence of the x32
428 syscall bit flag to determine compat status */
429 current_thread_info()->status
&= ~TS_COMPAT
;
431 set_thread_flag(TIF_IA32
);
432 clear_thread_flag(TIF_X32
);
433 current
->personality
|= force_personality32
;
434 /* Prepare the first "return" to user space */
435 current_thread_info()->status
|= TS_COMPAT
;
438 EXPORT_SYMBOL_GPL(set_personality_ia32
);
440 unsigned long get_wchan(struct task_struct
*p
)
446 if (!p
|| p
== current
|| p
->state
== TASK_RUNNING
)
448 stack
= (unsigned long)task_stack_page(p
);
449 if (p
->thread
.sp
< stack
|| p
->thread
.sp
>= stack
+THREAD_SIZE
)
451 fp
= *(u64
*)(p
->thread
.sp
);
453 if (fp
< (unsigned long)stack
||
454 fp
>= (unsigned long)stack
+THREAD_SIZE
)
457 if (!in_sched_functions(ip
))
460 } while (count
++ < 16);
464 long do_arch_prctl(struct task_struct
*task
, int code
, unsigned long addr
)
467 int doit
= task
== current
;
472 if (addr
>= TASK_SIZE_OF(task
))
475 /* handle small bases via the GDT because that's faster to
477 if (addr
<= 0xffffffff) {
478 set_32bit_tls(task
, GS_TLS
, addr
);
480 load_TLS(&task
->thread
, cpu
);
481 load_gs_index(GS_TLS_SEL
);
483 task
->thread
.gsindex
= GS_TLS_SEL
;
486 task
->thread
.gsindex
= 0;
487 task
->thread
.gs
= addr
;
490 ret
= wrmsrl_safe(MSR_KERNEL_GS_BASE
, addr
);
496 /* Not strictly needed for fs, but do it for symmetry
498 if (addr
>= TASK_SIZE_OF(task
))
501 /* handle small bases via the GDT because that's faster to
503 if (addr
<= 0xffffffff) {
504 set_32bit_tls(task
, FS_TLS
, addr
);
506 load_TLS(&task
->thread
, cpu
);
507 loadsegment(fs
, FS_TLS_SEL
);
509 task
->thread
.fsindex
= FS_TLS_SEL
;
512 task
->thread
.fsindex
= 0;
513 task
->thread
.fs
= addr
;
515 /* set the selector to 0 to not confuse
518 ret
= wrmsrl_safe(MSR_FS_BASE
, addr
);
525 if (task
->thread
.fsindex
== FS_TLS_SEL
)
526 base
= read_32bit_tls(task
, FS_TLS
);
528 rdmsrl(MSR_FS_BASE
, base
);
530 base
= task
->thread
.fs
;
531 ret
= put_user(base
, (unsigned long __user
*)addr
);
537 if (task
->thread
.gsindex
== GS_TLS_SEL
)
538 base
= read_32bit_tls(task
, GS_TLS
);
540 savesegment(gs
, gsindex
);
542 rdmsrl(MSR_KERNEL_GS_BASE
, base
);
544 base
= task
->thread
.gs
;
546 base
= task
->thread
.gs
;
547 ret
= put_user(base
, (unsigned long __user
*)addr
);
559 long sys_arch_prctl(int code
, unsigned long addr
)
561 return do_arch_prctl(current
, code
, addr
);
564 unsigned long KSTK_ESP(struct task_struct
*task
)
566 return (test_tsk_thread_flag(task
, TIF_IA32
)) ?
567 (task_pt_regs(task
)->sp
) : ((task
)->thread
.usersp
);